xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.h (revision e2cfdb03597769a59e92151c2ed08fee92f7b662)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 // magma functions specific to ceed
18 #ifndef _ceed_magma_h
19 #define _ceed_magma_h
20 
21 #include <ceed/ceed.h>
22 #include <ceed/backend.h>
23 #include <magma_v2.h>
24 
25 typedef enum {
26   MAGMA_KERNEL_DIM_GENERIC=101,
27   MAGMA_KERNEL_DIM_SPECIFIC=102
28 } magma_kernel_mode_t;
29 
30 typedef struct {
31   magma_kernel_mode_t basis_kernel_mode;
32   magma_int_t maxthreads[3];
33   magma_device_t device;
34   magma_queue_t queue;
35 } Ceed_Magma;
36 
37 typedef struct {
38   CeedScalar *dqref1d;
39   CeedScalar *dinterp1d;
40   CeedScalar *dgrad1d;
41   CeedScalar *dqweight1d;
42 } CeedBasis_Magma;
43 
44 typedef struct {
45   CeedScalar *dqref;
46   CeedScalar *dinterp;
47   CeedScalar *dgrad;
48   CeedScalar *dqweight;
49 } CeedBasisNonTensor_Magma;
50 
51 typedef enum {
52   OWNED_NONE = 0,
53   OWNED_UNPINNED,
54   OWNED_PINNED,
55 } OwnershipMode;
56 
57 typedef struct {
58   CeedInt *offsets;
59   CeedInt *doffsets;
60   OwnershipMode own_;
61   int down_;            // cover a case where we own Device memory
62 } CeedElemRestriction_Magma;
63 
64 typedef struct {
65   const CeedScalar **inputs;
66   CeedScalar **outputs;
67   bool setupdone;
68 } CeedQFunction_Magma;
69 
70 #define USE_MAGMA_BATCH
71 #define USE_MAGMA_BATCH2
72 #define USE_MAGMA_BATCH3
73 #define USE_MAGMA_BATCH4
74 
75 #ifdef __cplusplus
76 CEED_INTERN {
77 #endif
78 
79   magma_int_t magma_interp_1d(
80     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
81     const CeedScalar *dT, CeedTransposeMode tmode,
82     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
84     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
85 
86   magma_int_t magma_interp_2d(
87     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
88     const CeedScalar *dT, CeedTransposeMode tmode,
89     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
90     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
91     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
92 
93   magma_int_t magma_interp_3d(
94     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
95     const CeedScalar *dT, CeedTransposeMode tmode,
96     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
97     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
98     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
99 
100   magma_int_t magma_interp_generic(magma_int_t P, magma_int_t Q,
101                                    magma_int_t dim, magma_int_t ncomp,
102                                    const CeedScalar *dT, CeedTransposeMode tmode,
103                                    const CeedScalar *dU, magma_int_t u_elemstride,
104                                    magma_int_t cstrdU,
105                                    CeedScalar *dV, magma_int_t v_elemstride,
106                                    magma_int_t cstrdV,
107                                    magma_int_t nelem, magma_queue_t queue);
108 
109   magma_int_t magma_interp(
110     magma_int_t P, magma_int_t Q,
111     magma_int_t dim, magma_int_t ncomp,
112     const CeedScalar *dT, CeedTransposeMode tmode,
113     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
114     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
115     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
116 
117   magma_int_t magma_grad_1d(
118     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
119     const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode,
120     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
121     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
122     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
123 
124   magma_int_t magma_gradn_2d(
125     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
126     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
127     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
128     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
129     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
130 
131   magma_int_t magma_gradt_2d(
132     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
133     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
134     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
135     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
136     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
137 
138   magma_int_t magma_gradn_3d(
139     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
140     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
141     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
142     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
143     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
144 
145   magma_int_t magma_gradt_3d(
146     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
147     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
148     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
149     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
150     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
151 
152   magma_int_t magma_grad_generic(
153     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
154     const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
155     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
156     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
157     magma_int_t nelem, magma_queue_t queue);
158 
159   magma_int_t magma_grad(
160     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
161     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
162     const CeedScalar *dU, magma_int_t u_elemstride, magma_int_t cstrdU, magma_int_t dstrdU,
163     CeedScalar *dV, magma_int_t v_elemstride, magma_int_t cstrdV, magma_int_t dstrdV,
164     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
165 
166   magma_int_t magma_weight_1d(
167     magma_int_t Q, const CeedScalar *dqweight1d,
168     CeedScalar *dV, magma_int_t v_stride,
169     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
170 
171   magma_int_t magma_weight_2d(
172     magma_int_t Q, const CeedScalar *dqweight1d,
173     CeedScalar *dV, magma_int_t v_stride,
174     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
175 
176   magma_int_t magma_weight_3d(
177     magma_int_t Q, const CeedScalar *dqweight1d,
178     CeedScalar *dV, magma_int_t v_stride,
179     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
180 
181   magma_int_t magma_weight_generic(
182     magma_int_t Q, magma_int_t dim,
183     const CeedScalar *dqweight1d,
184     CeedScalar *dV, magma_int_t vstride,
185     magma_int_t nelem, magma_queue_t queue);
186 
187   magma_int_t magma_weight(
188     magma_int_t Q, magma_int_t dim,
189     const CeedScalar *dqweight1d,
190     CeedScalar *dV, magma_int_t v_stride,
191     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
192 
193   void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem,
194                               magma_int_t Q,
195                               CeedScalar *dqweight, CeedScalar *dv, magma_queue_t queue);
196 
197   void magma_readDofsOffset(const magma_int_t NCOMP,
198                             const magma_int_t compstride,
199                             const magma_int_t esize, const magma_int_t nelem,
200                             magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv,
201                             magma_queue_t queue);
202 
203   void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
204                              const magma_int_t nelem, magma_int_t *strides,
205                              const CeedScalar *du, CeedScalar *dv,
206                              magma_queue_t queue);
207 
208   void magma_writeDofsOffset(const magma_int_t NCOMP,
209                              const magma_int_t compstride,
210                              const magma_int_t esize, const magma_int_t nelem,
211                              magma_int_t *offsets,const CeedScalar *du, CeedScalar *dv,
212                              magma_queue_t queue);
213 
214   void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
215                               const magma_int_t nelem, magma_int_t *strides,
216                               const CeedScalar *du, CeedScalar *dv,
217                               magma_queue_t queue);
218 
219   int magma_dgemm_nontensor(
220     magma_trans_t transA, magma_trans_t transB,
221     magma_int_t m, magma_int_t n, magma_int_t k,
222     double alpha, const double *dA, magma_int_t ldda,
223     const double *dB, magma_int_t lddb,
224     double beta,  double *dC, magma_int_t lddc,
225     magma_queue_t queue );
226 
227   int magma_sgemm_nontensor(
228     magma_trans_t transA, magma_trans_t transB,
229     magma_int_t m, magma_int_t n, magma_int_t k,
230     float alpha, const float *dA, magma_int_t ldda,
231     const float *dB, magma_int_t lddb,
232     float beta,  float *dC, magma_int_t lddc,
233     magma_queue_t queue );
234 
235   magma_int_t
236   magma_isdevptr(const void *A);
237 
238   int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d,
239                                     CeedInt Q1d,
240                                     const CeedScalar *interp1d,
241                                     const CeedScalar *grad1d,
242                                     const CeedScalar *qref1d,
243                                     const CeedScalar *qweight1d,
244                                     CeedBasis basis);
245 
246   int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim,
247                               CeedInt ndof, CeedInt nqpts,
248                               const CeedScalar *interp,
249                               const CeedScalar *grad,
250                               const CeedScalar *qref,
251                               const CeedScalar *qweight,
252                               CeedBasis basis);
253 
254   int CeedElemRestrictionCreate_Magma(CeedMemType mtype,
255                                       CeedCopyMode cmode,
256                                       const CeedInt *offsets,
257                                       CeedElemRestriction r);
258 
259   int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype,
260       const CeedCopyMode cmode,
261       const CeedInt *offsets,
262       const CeedElemRestriction res);
263 
264   int CeedOperatorCreate_Magma(CeedOperator op);
265 
266   #ifdef __cplusplus
267 }
268   #endif
269 
270 // comment the line below to use the default magma_is_devptr function
271 #define magma_is_devptr magma_isdevptr
272 
273 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync
274 // should do nothing
275 #define ceed_magma_queue_sync(...)
276 
277 // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value>
278 #ifndef MAGMA_BATCH_STRIDE
279 #define MAGMA_BATCH_STRIDE (1000)
280 #endif
281 
282 #endif  // _ceed_magma_h
283