1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 // magma functions specific to ceed 9 #ifndef _ceed_magma_h 10 #define _ceed_magma_h 11 12 #include <ceed/ceed.h> 13 #include <ceed/backend.h> 14 #include <magma_v2.h> 15 16 typedef enum { 17 MAGMA_KERNEL_DIM_GENERIC=101, 18 MAGMA_KERNEL_DIM_SPECIFIC=102 19 } magma_kernel_mode_t; 20 21 typedef struct { 22 magma_kernel_mode_t basis_kernel_mode; 23 magma_device_t device; 24 magma_queue_t queue; 25 } Ceed_Magma; 26 27 typedef struct { 28 CeedScalar *dqref1d; 29 CeedScalar *dinterp1d; 30 CeedScalar *dgrad1d; 31 CeedScalar *dqweight1d; 32 } CeedBasis_Magma; 33 34 typedef struct { 35 CeedScalar *dqref; 36 CeedScalar *dinterp; 37 CeedScalar *dgrad; 38 CeedScalar *dqweight; 39 } CeedBasisNonTensor_Magma; 40 41 typedef enum { 42 OWNED_NONE = 0, 43 OWNED_UNPINNED, 44 OWNED_PINNED, 45 } OwnershipMode; 46 47 typedef struct { 48 CeedInt *offsets; 49 CeedInt *doffsets; 50 OwnershipMode own_; 51 int down_; // cover a case where we own Device memory 52 } CeedElemRestriction_Magma; 53 54 typedef struct { 55 const CeedScalar **inputs; 56 CeedScalar **outputs; 57 bool setupdone; 58 } CeedQFunction_Magma; 59 60 #define USE_MAGMA_BATCH 61 #define USE_MAGMA_BATCH2 62 #define USE_MAGMA_BATCH3 63 #define USE_MAGMA_BATCH4 64 65 #ifdef __cplusplus 66 CEED_INTERN { 67 #endif 68 69 magma_int_t magma_interp_1d( 70 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 71 const CeedScalar *dT, CeedTransposeMode tmode, 72 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 73 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 74 magma_int_t nelem, magma_queue_t queue); 75 76 magma_int_t magma_interp_2d( 77 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 78 const CeedScalar *dT, CeedTransposeMode tmode, 79 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 80 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 81 magma_int_t nelem, magma_queue_t queue); 82 83 magma_int_t magma_interp_3d( 84 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 85 const CeedScalar *dT, CeedTransposeMode tmode, 86 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 87 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 88 magma_int_t nelem, magma_queue_t queue); 89 90 magma_int_t magma_interp_generic(magma_int_t P, magma_int_t Q, 91 magma_int_t dim, magma_int_t ncomp, 92 const CeedScalar *dT, CeedTransposeMode tmode, 93 const CeedScalar *dU, magma_int_t u_elemstride, 94 magma_int_t cstrdU, 95 CeedScalar *dV, magma_int_t v_elemstride, 96 magma_int_t cstrdV, 97 magma_int_t nelem, magma_queue_t queue); 98 99 magma_int_t magma_interp( 100 magma_int_t P, magma_int_t Q, 101 magma_int_t dim, magma_int_t ncomp, 102 const CeedScalar *dT, CeedTransposeMode tmode, 103 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 104 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 105 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue); 106 107 magma_int_t magma_grad_1d( 108 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 109 const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode, 110 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, 111 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, 112 magma_int_t nelem, magma_queue_t queue); 113 114 magma_int_t magma_gradn_2d( 115 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 116 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 117 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 118 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 119 magma_int_t nelem, magma_queue_t queue); 120 121 magma_int_t magma_gradt_2d( 122 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 123 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 124 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 125 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 126 magma_int_t nelem, magma_queue_t queue); 127 128 magma_int_t magma_gradn_3d( 129 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 130 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 131 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 132 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 133 magma_int_t nelem, magma_queue_t queue); 134 135 magma_int_t magma_gradt_3d( 136 magma_int_t P, magma_int_t Q, magma_int_t ncomp, 137 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 138 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 139 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 140 magma_int_t nelem, magma_queue_t queue); 141 142 magma_int_t magma_grad_generic( 143 magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp, 144 const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 145 const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU, 146 CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV, 147 magma_int_t nelem, magma_queue_t queue); 148 149 magma_int_t magma_grad( 150 magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp, 151 const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode, 152 const CeedScalar *dU, magma_int_t u_elemstride, magma_int_t cstrdU, magma_int_t dstrdU, 153 CeedScalar *dV, magma_int_t v_elemstride, magma_int_t cstrdV, magma_int_t dstrdV, 154 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue); 155 156 magma_int_t magma_weight_1d( 157 magma_int_t Q, const CeedScalar *dqweight1d, 158 CeedScalar *dV, magma_int_t v_stride, 159 magma_int_t nelem, magma_queue_t queue); 160 161 magma_int_t magma_weight_2d( 162 magma_int_t Q, const CeedScalar *dqweight1d, 163 CeedScalar *dV, magma_int_t v_stride, 164 magma_int_t nelem, magma_queue_t queue); 165 166 magma_int_t magma_weight_3d( 167 magma_int_t Q, const CeedScalar *dqweight1d, 168 CeedScalar *dV, magma_int_t v_stride, 169 magma_int_t nelem, magma_queue_t queue); 170 171 magma_int_t magma_weight_generic( 172 magma_int_t Q, magma_int_t dim, 173 const CeedScalar *dqweight1d, 174 CeedScalar *dV, magma_int_t vstride, 175 magma_int_t nelem, magma_queue_t queue); 176 177 magma_int_t magma_weight( 178 magma_int_t Q, magma_int_t dim, 179 const CeedScalar *dqweight1d, 180 CeedScalar *dV, magma_int_t v_stride, 181 magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue); 182 183 void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem, 184 magma_int_t Q, 185 CeedScalar *dqweight, CeedScalar *dv, magma_queue_t queue); 186 187 void magma_readDofsOffset(const magma_int_t NCOMP, 188 const magma_int_t compstride, 189 const magma_int_t esize, const magma_int_t nelem, 190 magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv, 191 magma_queue_t queue); 192 193 void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, 194 const magma_int_t nelem, magma_int_t *strides, 195 const CeedScalar *du, CeedScalar *dv, 196 magma_queue_t queue); 197 198 void magma_writeDofsOffset(const magma_int_t NCOMP, 199 const magma_int_t compstride, 200 const magma_int_t esize, const magma_int_t nelem, 201 magma_int_t *offsets,const CeedScalar *du, CeedScalar *dv, 202 magma_queue_t queue); 203 204 void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize, 205 const magma_int_t nelem, magma_int_t *strides, 206 const CeedScalar *du, CeedScalar *dv, 207 magma_queue_t queue); 208 209 int magma_dgemm_nontensor( 210 magma_trans_t transA, magma_trans_t transB, 211 magma_int_t m, magma_int_t n, magma_int_t k, 212 double alpha, const double *dA, magma_int_t ldda, 213 const double *dB, magma_int_t lddb, 214 double beta, double *dC, magma_int_t lddc, 215 magma_queue_t queue ); 216 217 int magma_sgemm_nontensor( 218 magma_trans_t transA, magma_trans_t transB, 219 magma_int_t m, magma_int_t n, magma_int_t k, 220 float alpha, const float *dA, magma_int_t ldda, 221 const float *dB, magma_int_t lddb, 222 float beta, float *dC, magma_int_t lddc, 223 magma_queue_t queue ); 224 225 magma_int_t 226 magma_isdevptr(const void *A); 227 228 int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, 229 CeedInt Q1d, 230 const CeedScalar *interp1d, 231 const CeedScalar *grad1d, 232 const CeedScalar *qref1d, 233 const CeedScalar *qweight1d, 234 CeedBasis basis); 235 236 int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, 237 CeedInt ndof, CeedInt nqpts, 238 const CeedScalar *interp, 239 const CeedScalar *grad, 240 const CeedScalar *qref, 241 const CeedScalar *qweight, 242 CeedBasis basis); 243 244 int CeedElemRestrictionCreate_Magma(CeedMemType mtype, 245 CeedCopyMode cmode, 246 const CeedInt *offsets, 247 CeedElemRestriction r); 248 249 int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype, 250 const CeedCopyMode cmode, 251 const CeedInt *offsets, 252 const CeedElemRestriction res); 253 254 int CeedOperatorCreate_Magma(CeedOperator op); 255 256 #ifdef __cplusplus 257 } 258 #endif 259 260 // comment the line below to use the default magma_is_devptr function 261 #define magma_is_devptr magma_isdevptr 262 263 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync 264 // should do nothing 265 #define ceed_magma_queue_sync(...) 266 267 // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value> 268 #ifndef MAGMA_BATCH_STRIDE 269 #define MAGMA_BATCH_STRIDE (1000) 270 #endif 271 272 #endif // _ceed_magma_h 273