1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 // magma functions specific to ceed 18 #ifndef _ceed_magma_h 19 #define _ceed_magma_h 20 21 #include <ceed/ceed.h> 22 #include <ceed/backend.h> 23 #include <magma_v2.h> 24 25 typedef enum { 26 MAGMA_KERNEL_DIM_GENERIC=101, 27 MAGMA_KERNEL_DIM_SPECIFIC=102 28 } magma_kernel_mode_t; 29 30 typedef struct { 31 magma_kernel_mode_t basis_kernel_mode; 32 magma_int_t maxthreads[3]; 33 magma_device_t device; 34 magma_queue_t queue; 35 } Ceed_Magma; 36 37 typedef struct { 38 CeedScalar *dqref1d; 39 CeedScalar *dinterp1d; 40 CeedScalar *dgrad1d; 41 CeedScalar *dqweight1d; 42 } CeedBasis_Magma; 43 44 typedef struct { 45 CeedScalar *dqref; 46 CeedScalar *dinterp; 47 CeedScalar *dgrad; 48 CeedScalar *dqweight; 49 } CeedBasisNonTensor_Magma; 50 51 typedef struct { 52 CeedInt *offsets; 53 CeedInt *doffsets; 54 int own_; 55 int down_; // cover a case where we own Device memory 56 } CeedElemRestriction_Magma; 57 58 typedef struct { 59 const CeedScalar **inputs; 60 CeedScalar **outputs; 61 bool setupdone; 62 } CeedQFunction_Magma; 63 64 #define USE_MAGMA_BATCH 65 #define USE_MAGMA_BATCH2 66 #define USE_MAGMA_BATCH3 67 #define USE_MAGMA_BATCH4 68 69 #ifdef __cplusplus 70 CEED_INTERN { 71 #endif 72 73 magma_int_t magma_interp_1d( 74 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 75 const CeedScalar *dT, CeedTransposeMode tmode, 76 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 77 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 78 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 79 80 magma_int_t magma_interp_2d( 81 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 82 const CeedScalar *dT, CeedTransposeMode tmode, 83 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 84 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 85 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 86 87 magma_int_t magma_interp_3d( 88 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 89 const CeedScalar *dT, CeedTransposeMode tmode, 90 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 91 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 92 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 93 94 magma_int_t magma_interp_generic(magma_int_t P, magma_int_t Q, 95 magma_int_t dim, magma_int_t ncomp, 96 const double *dT, CeedTransposeMode tmode, 97 const double *dU, magma_int_t u_elemstride, 98 magma_int_t cstrdU, 99 double *dV, magma_int_t v_elemstride, 100 magma_int_t cstrdV, 101 magma_int_t nelem, magma_queue_t queue); 102 103 magma_int_t magma_interp( 104 magma_int_t P, magma_int_t Q, 105 magma_int_t dim, magma_int_t ncomp, 106 const double *dT, CeedTransposeMode tmode, 107 const double *dU, magma_int_t estrdU, magma_int_t cstrdU, 108 double *dV, magma_int_t estrdV, magma_int_t cstrdV, 109 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue); 110 111 magma_int_t magma_grad_1d( 112 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 113 const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode, 114 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 115 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 116 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 117 118 magma_int_t magma_gradn_2d( 119 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 120 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 121 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 122 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 123 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 124 125 magma_int_t magma_gradt_2d( 126 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 127 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 128 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 129 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 130 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 131 132 magma_int_t magma_gradn_3d( 133 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 134 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 135 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 136 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 137 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 138 139 magma_int_t magma_gradt_3d( 140 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 141 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 142 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 143 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 144 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 145 146 magma_int_t magma_grad_generic( 147 magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp, 148 const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 149 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 150 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 151 magma_int_t nelem, magma_queue_t queue); 152 153 magma_int_t magma_grad( 154 magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp, 155 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 156 const CeedScalar *dU, magma_int_t u_elemstride, magma_int_t cstrdU, magma_int_t dstrdU, 157 CeedScalar *dV, magma_int_t v_elemstride, magma_int_t cstrdV, magma_int_t dstrdV, 158 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue); 159 160 magma_int_t magma_weight_1d( 161 magma_int_t Q, const CeedScalar *dqweight1d, 162 CeedScalar *dV, magma_int_t v_stride, 163 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 164 165 magma_int_t magma_weight_2d( 166 magma_int_t Q, const CeedScalar *dqweight1d, 167 CeedScalar *dV, magma_int_t v_stride, 168 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 169 170 magma_int_t magma_weight_3d( 171 magma_int_t Q, const CeedScalar *dqweight1d, 172 CeedScalar *dV, magma_int_t v_stride, 173 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 174 175 magma_int_t magma_weight_generic( 176 magma_int_t Q, magma_int_t dim, 177 const CeedScalar *dqweight1d, 178 CeedScalar *dV, magma_int_t vstride, 179 magma_int_t nelem, magma_queue_t queue); 180 181 magma_int_t magma_weight( 182 magma_int_t Q, magma_int_t dim, 183 const CeedScalar *dqweight1d, 184 CeedScalar *dV, magma_int_t v_stride, 185 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue); 186 187 void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem, 188 magma_int_t Q, 189 double *dqweight, double *dv, magma_queue_t queue); 190 191 void magma_readDofsOffset(const magma_int_t NCOMP, 192 const magma_int_t compstride, 193 const magma_int_t esize, const magma_int_t nelem, 194 magma_int_t *offsets, const double *du, double *dv, 195 magma_queue_t queue); 196 197 void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, 198 const magma_int_t nelem, magma_int_t *strides, 199 const double *du, double *dv, 200 magma_queue_t queue); 201 202 void magma_writeDofsOffset(const magma_int_t NCOMP, 203 const magma_int_t compstride, 204 const magma_int_t esize, const magma_int_t nelem, 205 magma_int_t *offsets,const double *du, double *dv, 206 magma_queue_t queue); 207 208 void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, 209 const magma_int_t nelem, magma_int_t *strides, 210 const double *du, double *dv, 211 magma_queue_t queue); 212 213 int magma_dgemm_nontensor( 214 magma_trans_t transA, magma_trans_t transB, 215 magma_int_t m, magma_int_t n, magma_int_t k, 216 double alpha, const double *dA, magma_int_t ldda, 217 const double *dB, magma_int_t lddb, 218 double beta, double *dC, magma_int_t lddc, 219 magma_queue_t queue ); 220 221 magma_int_t 222 magma_isdevptr(const void *A); 223 224 int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, 225 CeedInt Q1d, 226 const CeedScalar *interp1d, 227 const CeedScalar *grad1d, 228 const CeedScalar *qref1d, 229 const CeedScalar *qweight1d, 230 CeedBasis basis); 231 232 int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, 233 CeedInt ndof, CeedInt nqpts, 234 const CeedScalar *interp, 235 const CeedScalar *grad, 236 const CeedScalar *qref, 237 const CeedScalar *qweight, 238 CeedBasis basis); 239 240 int CeedElemRestrictionCreate_Magma(CeedMemType mtype, 241 CeedCopyMode cmode, 242 const CeedInt *offsets, 243 CeedElemRestriction r); 244 245 int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype, 246 const CeedCopyMode cmode, 247 const CeedInt *offsets, 248 const CeedElemRestriction res); 249 250 int CeedOperatorCreate_Magma(CeedOperator op); 251 252 #ifdef __cplusplus 253 } 254 #endif 255 256 // comment the line below to use the default magma_is_devptr function 257 #define magma_is_devptr magma_isdevptr 258 259 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync 260 // should do nothing 261 #define ceed_magma_queue_sync(...) 262 263 // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value> 264 #ifndef MAGMA_BATCH_STRIDE 265 #define MAGMA_BATCH_STRIDE (1000) 266 #endif 267 268 #endif // _ceed_magma_h 269