xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.h (revision 80a9ef0545a39c00cdcaab1ca26f8053604f3120)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 // magma functions specific to ceed
18 #ifndef _ceed_magma_h
19 #define _ceed_magma_h
20 
21 #include <ceed/ceed.h>
22 #include <ceed/backend.h>
23 #include <magma_v2.h>
24 
25 typedef enum {
26   MAGMA_KERNEL_DIM_GENERIC=101,
27   MAGMA_KERNEL_DIM_SPECIFIC=102
28 } magma_kernel_mode_t;
29 
30 typedef struct {
31   magma_kernel_mode_t basis_kernel_mode;
32   magma_int_t maxthreads[3];
33   magma_device_t device;
34   magma_queue_t queue;
35 } Ceed_Magma;
36 
37 typedef struct {
38   CeedScalar *dqref1d;
39   CeedScalar *dinterp1d;
40   CeedScalar *dgrad1d;
41   CeedScalar *dqweight1d;
42 } CeedBasis_Magma;
43 
44 typedef struct {
45   CeedScalar *dqref;
46   CeedScalar *dinterp;
47   CeedScalar *dgrad;
48   CeedScalar *dqweight;
49 } CeedBasisNonTensor_Magma;
50 
51 typedef struct {
52   CeedInt *offsets;
53   CeedInt *doffsets;
54   int  own_;
55   int down_;            // cover a case where we own Device memory
56 } CeedElemRestriction_Magma;
57 
58 typedef struct {
59   const CeedScalar **inputs;
60   CeedScalar **outputs;
61   bool setupdone;
62 } CeedQFunction_Magma;
63 
64 #define USE_MAGMA_BATCH
65 #define USE_MAGMA_BATCH2
66 #define USE_MAGMA_BATCH3
67 #define USE_MAGMA_BATCH4
68 
69 #ifdef __cplusplus
70 CEED_INTERN {
71 #endif
72 
73   magma_int_t magma_interp_1d(
74     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
75     const CeedScalar *dT, CeedTransposeMode tmode,
76     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
77     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
78     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
79 
80   magma_int_t magma_interp_2d(
81     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
82     const CeedScalar *dT, CeedTransposeMode tmode,
83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
84     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
85     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
86 
87   magma_int_t magma_interp_3d(
88     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
89     const CeedScalar *dT, CeedTransposeMode tmode,
90     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
91     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
92     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
93 
94   magma_int_t magma_interp_generic(magma_int_t P, magma_int_t Q,
95                                    magma_int_t dim, magma_int_t ncomp,
96                                    const CeedScalar *dT, CeedTransposeMode tmode,
97                                    const CeedScalar *dU, magma_int_t u_elemstride,
98                                    magma_int_t cstrdU,
99                                    CeedScalar *dV, magma_int_t v_elemstride,
100                                    magma_int_t cstrdV,
101                                    magma_int_t nelem, magma_queue_t queue);
102 
103   magma_int_t magma_interp(
104     magma_int_t P, magma_int_t Q,
105     magma_int_t dim, magma_int_t ncomp,
106     const CeedScalar *dT, CeedTransposeMode tmode,
107     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
108     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
109     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
110 
111   magma_int_t magma_grad_1d(
112     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
113     const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode,
114     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
115     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
116     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
117 
118   magma_int_t magma_gradn_2d(
119     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
120     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
121     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
122     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
123     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
124 
125   magma_int_t magma_gradt_2d(
126     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
127     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
128     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
129     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
130     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
131 
132   magma_int_t magma_gradn_3d(
133     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
134     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
135     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
136     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
137     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
138 
139   magma_int_t magma_gradt_3d(
140     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
141     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
142     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
143     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
144     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
145 
146   magma_int_t magma_grad_generic(
147     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
148     const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
149     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
150     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
151     magma_int_t nelem, magma_queue_t queue);
152 
153   magma_int_t magma_grad(
154     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
155     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
156     const CeedScalar *dU, magma_int_t u_elemstride, magma_int_t cstrdU, magma_int_t dstrdU,
157     CeedScalar *dV, magma_int_t v_elemstride, magma_int_t cstrdV, magma_int_t dstrdV,
158     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
159 
160   magma_int_t magma_weight_1d(
161     magma_int_t Q, const CeedScalar *dqweight1d,
162     CeedScalar *dV, magma_int_t v_stride,
163     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
164 
165   magma_int_t magma_weight_2d(
166     magma_int_t Q, const CeedScalar *dqweight1d,
167     CeedScalar *dV, magma_int_t v_stride,
168     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
169 
170   magma_int_t magma_weight_3d(
171     magma_int_t Q, const CeedScalar *dqweight1d,
172     CeedScalar *dV, magma_int_t v_stride,
173     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
174 
175   magma_int_t magma_weight_generic(
176     magma_int_t Q, magma_int_t dim,
177     const CeedScalar *dqweight1d,
178     CeedScalar *dV, magma_int_t vstride,
179     magma_int_t nelem, magma_queue_t queue);
180 
181   magma_int_t magma_weight(
182     magma_int_t Q, magma_int_t dim,
183     const CeedScalar *dqweight1d,
184     CeedScalar *dV, magma_int_t v_stride,
185     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
186 
187   void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem,
188                               magma_int_t Q,
189                               CeedScalar *dqweight, CeedScalar *dv, magma_queue_t queue);
190 
191   void magma_readDofsOffset(const magma_int_t NCOMP,
192                             const magma_int_t compstride,
193                             const magma_int_t esize, const magma_int_t nelem,
194                             magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv,
195                             magma_queue_t queue);
196 
197   void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
198                              const magma_int_t nelem, magma_int_t *strides,
199                              const CeedScalar *du, CeedScalar *dv,
200                              magma_queue_t queue);
201 
202   void magma_writeDofsOffset(const magma_int_t NCOMP,
203                              const magma_int_t compstride,
204                              const magma_int_t esize, const magma_int_t nelem,
205                              magma_int_t *offsets,const CeedScalar *du, CeedScalar *dv,
206                              magma_queue_t queue);
207 
208   void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
209                               const magma_int_t nelem, magma_int_t *strides,
210                               const CeedScalar *du, CeedScalar *dv,
211                               magma_queue_t queue);
212 
213   int magma_dgemm_nontensor(
214     magma_trans_t transA, magma_trans_t transB,
215     magma_int_t m, magma_int_t n, magma_int_t k,
216     double alpha, const double *dA, magma_int_t ldda,
217     const double *dB, magma_int_t lddb,
218     double beta,  double *dC, magma_int_t lddc,
219     magma_queue_t queue );
220 
221   int magma_sgemm_nontensor(
222     magma_trans_t transA, magma_trans_t transB,
223     magma_int_t m, magma_int_t n, magma_int_t k,
224     float alpha, const float *dA, magma_int_t ldda,
225     const float *dB, magma_int_t lddb,
226     float beta,  float *dC, magma_int_t lddc,
227     magma_queue_t queue );
228 
229   magma_int_t
230   magma_isdevptr(const void *A);
231 
232   int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d,
233                                     CeedInt Q1d,
234                                     const CeedScalar *interp1d,
235                                     const CeedScalar *grad1d,
236                                     const CeedScalar *qref1d,
237                                     const CeedScalar *qweight1d,
238                                     CeedBasis basis);
239 
240   int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim,
241                               CeedInt ndof, CeedInt nqpts,
242                               const CeedScalar *interp,
243                               const CeedScalar *grad,
244                               const CeedScalar *qref,
245                               const CeedScalar *qweight,
246                               CeedBasis basis);
247 
248   int CeedElemRestrictionCreate_Magma(CeedMemType mtype,
249                                       CeedCopyMode cmode,
250                                       const CeedInt *offsets,
251                                       CeedElemRestriction r);
252 
253   int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype,
254       const CeedCopyMode cmode,
255       const CeedInt *offsets,
256       const CeedElemRestriction res);
257 
258   int CeedOperatorCreate_Magma(CeedOperator op);
259 
260   #ifdef __cplusplus
261 }
262   #endif
263 
264 // comment the line below to use the default magma_is_devptr function
265 #define magma_is_devptr magma_isdevptr
266 
267 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync
268 // should do nothing
269 #define ceed_magma_queue_sync(...)
270 
271 // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value>
272 #ifndef MAGMA_BATCH_STRIDE
273 #define MAGMA_BATCH_STRIDE (1000)
274 #endif
275 
276 #endif  // _ceed_magma_h
277