1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 // magma functions specific to ceed 9 #ifndef _ceed_magma_h 10 #define _ceed_magma_h 11 12 #include <ceed/ceed.h> 13 #include <ceed/backend.h> 14 #include <magma_v2.h> 15 16 #define MAGMA_MAXTHREADS_1D 128 17 #define MAGMA_MAXTHREADS_2D 128 18 #define MAGMA_MAXTHREADS_3D 64 19 // Define macro for determining number of threads in y-direction 20 // for basis kernels 21 #define MAGMA_BASIS_NTCOL(x, maxt) (((maxt) < (x)) ? 1 : ((maxt) / (x))) 22 // Define macro for computing the total threads in a block 23 // for use with __launch_bounds__() 24 #define MAGMA_BASIS_BOUNDS(x, maxt) (x * MAGMA_BASIS_NTCOL(x, maxt)) 25 26 #ifdef CEED_MAGMA_USE_HIP 27 typedef hipModule_t CeedMagmaModule; 28 typedef hipFunction_t CeedMagmaFunction; 29 #define CeedCompileMagma CeedCompileHip 30 #define CeedGetKernelMagma CeedGetKernelHip 31 #define CeedRunKernelMagma CeedRunKernelHip 32 #define CeedRunKernelDimMagma CeedRunKernelDimHip 33 #define CeedRunKernelDimSharedMagma CeedRunKernelDimSharedHip 34 #else 35 typedef CUmodule CeedMagmaModule; 36 typedef CUfunction CeedMagmaFunction; 37 #define CeedCompileMagma CeedCompileCuda 38 #define CeedGetKernelMagma CeedGetKernelCuda 39 #define CeedRunKernelMagma CeedRunKernelCuda 40 #define CeedRunKernelDimMagma CeedRunKernelDimCuda 41 #define CeedRunKernelDimSharedMagma CeedRunKernelDimSharedCuda 42 #endif 43 44 typedef enum { 45 MAGMA_KERNEL_DIM_GENERIC=101, 46 MAGMA_KERNEL_DIM_SPECIFIC=102 47 } magma_kernel_mode_t; 48 49 typedef struct { 50 magma_kernel_mode_t basis_kernel_mode; 51 magma_device_t device; 52 magma_queue_t queue; 53 } Ceed_Magma; 54 55 typedef struct { 56 CeedMagmaModule module; 57 CeedMagmaFunction magma_interp; 58 CeedMagmaFunction magma_interp_tr; 59 CeedMagmaFunction magma_grad; 60 CeedMagmaFunction magma_grad_tr; 61 CeedMagmaFunction magma_weight; 62 CeedScalar *dqref1d; 63 CeedScalar *dinterp1d; 64 CeedScalar *dgrad1d; 65 CeedScalar *dqweight1d; 66 } CeedBasis_Magma; 67 68 typedef struct { 69 CeedScalar *dqref; 70 CeedScalar *dinterp; 71 CeedScalar *dgrad; 72 CeedScalar *dqweight; 73 } CeedBasisNonTensor_Magma; 74 75 typedef enum { 76 OWNED_NONE = 0, 77 OWNED_UNPINNED, 78 OWNED_PINNED, 79 } OwnershipMode; 80 81 typedef struct { 82 CeedMagmaModule module; 83 CeedMagmaFunction StridedTranspose; 84 CeedMagmaFunction StridedNoTranspose; 85 CeedMagmaFunction OffsetTranspose; 86 CeedMagmaFunction OffsetNoTranspose; 87 CeedInt *offsets; 88 CeedInt *doffsets; 89 OwnershipMode own_; 90 int down_; // cover a case where we own Device memory 91 } CeedElemRestriction_Magma; 92 93 typedef struct { 94 const CeedScalar **inputs; 95 CeedScalar **outputs; 96 bool setupdone; 97 } CeedQFunction_Magma; 98 99 #define USE_MAGMA_BATCH 100 #define USE_MAGMA_BATCH2 101 #define USE_MAGMA_BATCH3 102 #define USE_MAGMA_BATCH4 103 104 CEED_INTERN void 105 magma_weight_nontensor( 106 magma_int_t grid, magma_int_t threads, magma_int_t nelem, 107 magma_int_t Q, 108 CeedScalar *dqweight, CeedScalar *dv, magma_queue_t queue); 109 110 CEED_INTERN int 111 magma_dgemm_nontensor( 112 magma_trans_t transA, magma_trans_t transB, 113 magma_int_t m, magma_int_t n, magma_int_t k, 114 double alpha, const double *dA, magma_int_t ldda, 115 const double *dB, magma_int_t lddb, 116 double beta, double *dC, magma_int_t lddc, 117 magma_queue_t queue ); 118 119 CEED_INTERN int 120 magma_sgemm_nontensor( 121 magma_trans_t transA, magma_trans_t transB, 122 magma_int_t m, magma_int_t n, magma_int_t k, 123 float alpha, const float *dA, magma_int_t ldda, 124 const float *dB, magma_int_t lddb, 125 float beta, float *dC, magma_int_t lddc, 126 magma_queue_t queue ); 127 128 CEED_INTERN void 129 gemm_selector( 130 int gpu_arch, 131 char precision, char transA, 132 int m, int n, int k, 133 int *nbatch, int *use_magma ); 134 135 CEED_INTERN magma_int_t 136 magma_isdevptr(const void *A); 137 138 CEED_INTERN int 139 CeedBasisCreateTensorH1_Magma( 140 CeedInt dim, CeedInt P1d, 141 CeedInt Q1d, 142 const CeedScalar *interp1d, 143 const CeedScalar *grad1d, 144 const CeedScalar *qref1d, 145 const CeedScalar *qweight1d, 146 CeedBasis basis); 147 148 CEED_INTERN int 149 CeedBasisCreateH1_Magma( 150 CeedElemTopology topo, CeedInt dim, 151 CeedInt ndof, CeedInt nqpts, 152 const CeedScalar *interp, 153 const CeedScalar *grad, 154 const CeedScalar *qref, 155 const CeedScalar *qweight, 156 CeedBasis basis); 157 158 CEED_INTERN int 159 CeedElemRestrictionCreate_Magma( 160 CeedMemType mtype, 161 CeedCopyMode cmode, 162 const CeedInt *offsets, 163 CeedElemRestriction r); 164 165 CEED_INTERN int 166 CeedElemRestrictionCreateBlocked_Magma( 167 const CeedMemType mtype, 168 const CeedCopyMode cmode, 169 const CeedInt *offsets, 170 const CeedElemRestriction res); 171 172 CEED_INTERN int CeedOperatorCreate_Magma(CeedOperator op); 173 174 // comment the line below to use the default magma_is_devptr function 175 #define magma_is_devptr magma_isdevptr 176 177 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync 178 // should do nothing 179 #define ceed_magma_queue_sync(...) 180 181 // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value> 182 #ifndef MAGMA_BATCH_STRIDE 183 #define MAGMA_BATCH_STRIDE (1000) 184 #endif 185 186 #endif // _ceed_magma_h 187