1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 // magma functions specific to ceed 18 #ifndef _ceed_magma_h 19 #define _ceed_magma_h 20 21 #include <ceed/ceed.h> 22 #include <ceed/backend.h> 23 #include <magma_v2.h> 24 25 typedef enum { 26 MAGMA_KERNEL_DIM_GENERIC=101, 27 MAGMA_KERNEL_DIM_SPECIFIC=102 28 } magma_kernel_mode_t; 29 30 typedef struct { 31 magma_kernel_mode_t basis_kernel_mode; 32 magma_int_t maxthreads[3]; 33 magma_device_t device; 34 magma_queue_t queue; 35 } Ceed_Magma; 36 37 typedef struct { 38 CeedScalar *dqref1d; 39 CeedScalar *dinterp1d; 40 CeedScalar *dgrad1d; 41 CeedScalar *dqweight1d; 42 } CeedBasis_Magma; 43 44 typedef struct { 45 CeedScalar *dqref; 46 CeedScalar *dinterp; 47 CeedScalar *dgrad; 48 CeedScalar *dqweight; 49 } CeedBasisNonTensor_Magma; 50 51 typedef enum { 52 OWNED_NONE = 0, 53 OWNED_UNPINNED, 54 OWNED_PINNED, 55 } OwnershipMode; 56 57 typedef struct { 58 CeedInt *offsets; 59 CeedInt *doffsets; 60 OwnershipMode own_; 61 int down_; // cover a case where we own Device memory 62 } CeedElemRestriction_Magma; 63 64 typedef struct { 65 const CeedScalar **inputs; 66 CeedScalar **outputs; 67 bool setupdone; 68 } CeedQFunction_Magma; 69 70 #define USE_MAGMA_BATCH 71 #define USE_MAGMA_BATCH2 72 #define USE_MAGMA_BATCH3 73 #define USE_MAGMA_BATCH4 74 75 #ifdef __cplusplus 76 CEED_INTERN { 77 #endif 78 79 magma_int_t magma_interp_1d( 80 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 81 const CeedScalar *dT, CeedTransposeMode tmode, 82 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 83 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 84 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 85 86 magma_int_t magma_interp_2d( 87 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 88 const CeedScalar *dT, CeedTransposeMode tmode, 89 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 90 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 91 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 92 93 magma_int_t magma_interp_3d( 94 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 95 const CeedScalar *dT, CeedTransposeMode tmode, 96 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 97 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 98 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 99 100 magma_int_t magma_interp_generic(magma_int_t P, magma_int_t Q, 101 magma_int_t dim, magma_int_t ncomp, 102 const CeedScalar *dT, CeedTransposeMode tmode, 103 const CeedScalar *dU, magma_int_t u_elemstride, 104 magma_int_t cstrdU, 105 CeedScalar *dV, magma_int_t v_elemstride, 106 magma_int_t cstrdV, 107 magma_int_t nelem, magma_queue_t queue); 108 109 magma_int_t magma_interp( 110 magma_int_t P, magma_int_t Q, 111 magma_int_t dim, magma_int_t ncomp, 112 const CeedScalar *dT, CeedTransposeMode tmode, 113 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 114 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 115 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue); 116 117 magma_int_t magma_grad_1d( 118 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 119 const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode, 120 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 121 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 122 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 123 124 magma_int_t magma_gradn_2d( 125 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 126 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 127 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 128 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 129 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 130 131 magma_int_t magma_gradt_2d( 132 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 133 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 134 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 135 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 136 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 137 138 magma_int_t magma_gradn_3d( 139 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 140 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 141 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 142 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 143 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 144 145 magma_int_t magma_gradt_3d( 146 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 147 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 148 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 149 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 150 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 151 152 magma_int_t magma_grad_generic( 153 magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp, 154 const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 155 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 156 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 157 magma_int_t nelem, magma_queue_t queue); 158 159 magma_int_t magma_grad( 160 magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp, 161 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 162 const CeedScalar *dU, magma_int_t u_elemstride, magma_int_t cstrdU, magma_int_t dstrdU, 163 CeedScalar *dV, magma_int_t v_elemstride, magma_int_t cstrdV, magma_int_t dstrdV, 164 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue); 165 166 magma_int_t magma_weight_1d( 167 magma_int_t Q, const CeedScalar *dqweight1d, 168 CeedScalar *dV, magma_int_t v_stride, 169 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 170 171 magma_int_t magma_weight_2d( 172 magma_int_t Q, const CeedScalar *dqweight1d, 173 CeedScalar *dV, magma_int_t v_stride, 174 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 175 176 magma_int_t magma_weight_3d( 177 magma_int_t Q, const CeedScalar *dqweight1d, 178 CeedScalar *dV, magma_int_t v_stride, 179 magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue); 180 181 magma_int_t magma_weight_generic( 182 magma_int_t Q, magma_int_t dim, 183 const CeedScalar *dqweight1d, 184 CeedScalar *dV, magma_int_t vstride, 185 magma_int_t nelem, magma_queue_t queue); 186 187 magma_int_t magma_weight( 188 magma_int_t Q, magma_int_t dim, 189 const CeedScalar *dqweight1d, 190 CeedScalar *dV, magma_int_t v_stride, 191 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue); 192 193 void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem, 194 magma_int_t Q, 195 CeedScalar *dqweight, CeedScalar *dv, magma_queue_t queue); 196 197 void magma_readDofsOffset(const magma_int_t NCOMP, 198 const magma_int_t compstride, 199 const magma_int_t esize, const magma_int_t nelem, 200 magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv, 201 magma_queue_t queue); 202 203 void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, 204 const magma_int_t nelem, magma_int_t *strides, 205 const CeedScalar *du, CeedScalar *dv, 206 magma_queue_t queue); 207 208 void magma_writeDofsOffset(const magma_int_t NCOMP, 209 const magma_int_t compstride, 210 const magma_int_t esize, const magma_int_t nelem, 211 magma_int_t *offsets,const CeedScalar *du, CeedScalar *dv, 212 magma_queue_t queue); 213 214 void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, 215 const magma_int_t nelem, magma_int_t *strides, 216 const CeedScalar *du, CeedScalar *dv, 217 magma_queue_t queue); 218 219 int magma_dgemm_nontensor( 220 magma_trans_t transA, magma_trans_t transB, 221 magma_int_t m, magma_int_t n, magma_int_t k, 222 double alpha, const double *dA, magma_int_t ldda, 223 const double *dB, magma_int_t lddb, 224 double beta, double *dC, magma_int_t lddc, 225 magma_queue_t queue ); 226 227 int magma_sgemm_nontensor( 228 magma_trans_t transA, magma_trans_t transB, 229 magma_int_t m, magma_int_t n, magma_int_t k, 230 float alpha, const float *dA, magma_int_t ldda, 231 const float *dB, magma_int_t lddb, 232 float beta, float *dC, magma_int_t lddc, 233 magma_queue_t queue ); 234 235 magma_int_t 236 magma_isdevptr(const void *A); 237 238 int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, 239 CeedInt Q1d, 240 const CeedScalar *interp1d, 241 const CeedScalar *grad1d, 242 const CeedScalar *qref1d, 243 const CeedScalar *qweight1d, 244 CeedBasis basis); 245 246 int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, 247 CeedInt ndof, CeedInt nqpts, 248 const CeedScalar *interp, 249 const CeedScalar *grad, 250 const CeedScalar *qref, 251 const CeedScalar *qweight, 252 CeedBasis basis); 253 254 int CeedElemRestrictionCreate_Magma(CeedMemType mtype, 255 CeedCopyMode cmode, 256 const CeedInt *offsets, 257 CeedElemRestriction r); 258 259 int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype, 260 const CeedCopyMode cmode, 261 const CeedInt *offsets, 262 const CeedElemRestriction res); 263 264 int CeedOperatorCreate_Magma(CeedOperator op); 265 266 #ifdef __cplusplus 267 } 268 #endif 269 270 // comment the line below to use the default magma_is_devptr function 271 #define magma_is_devptr magma_isdevptr 272 273 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync 274 // should do nothing 275 #define ceed_magma_queue_sync(...) 276 277 // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value> 278 #ifndef MAGMA_BATCH_STRIDE 279 #define MAGMA_BATCH_STRIDE (1000) 280 #endif 281 282 #endif // _ceed_magma_h 283