xref: /libCEED/backends/magma/ceed-magma.h (revision dcefb99bf4e43737baea662a94271a0fc9c30aca)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 // magma functions specific to ceed
18 #ifndef _ceed_magma_h
19 #define _ceed_magma_h
20 
21 #include <ceed/ceed.h>
22 #include <ceed/backend.h>
23 #include <magma_v2.h>
24 
25 typedef enum {
26   MAGMA_KERNEL_DIM_GENERIC=101,
27   MAGMA_KERNEL_DIM_SPECIFIC=102
28 } magma_kernel_mode_t;
29 
30 typedef struct {
31   magma_kernel_mode_t basis_kernel_mode;
32   magma_int_t maxthreads[3];
33   magma_device_t device;
34   magma_queue_t queue;
35 } Ceed_Magma;
36 
37 typedef struct {
38   CeedScalar *dqref1d;
39   CeedScalar *dinterp1d;
40   CeedScalar *dgrad1d;
41   CeedScalar *dqweight1d;
42 } CeedBasis_Magma;
43 
44 typedef struct {
45   CeedScalar *dqref;
46   CeedScalar *dinterp;
47   CeedScalar *dgrad;
48   CeedScalar *dqweight;
49 } CeedBasisNonTensor_Magma;
50 
51 typedef struct {
52   CeedInt *offsets;
53   CeedInt *doffsets;
54   int  own_;
55   int down_;            // cover a case where we own Device memory
56 } CeedElemRestriction_Magma;
57 
58 typedef struct {
59   const CeedScalar **inputs;
60   CeedScalar **outputs;
61   bool setupdone;
62 } CeedQFunction_Magma;
63 
64 #define USE_MAGMA_BATCH
65 #define USE_MAGMA_BATCH2
66 #define USE_MAGMA_BATCH3
67 #define USE_MAGMA_BATCH4
68 
69 #ifdef __cplusplus
70 CEED_INTERN {
71 #endif
72 
73   magma_int_t magma_interp_1d(
74     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
75     const CeedScalar *dT, CeedTransposeMode tmode,
76     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
77     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
78     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
79 
80   magma_int_t magma_interp_2d(
81     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
82     const CeedScalar *dT, CeedTransposeMode tmode,
83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
84     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
85     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
86 
87   magma_int_t magma_interp_3d(
88     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
89     const CeedScalar *dT, CeedTransposeMode tmode,
90     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
91     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
92     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
93 
94   magma_int_t magma_interp_generic(magma_int_t P, magma_int_t Q,
95                                    magma_int_t dim, magma_int_t ncomp,
96                                    const double *dT, CeedTransposeMode tmode,
97                                    const double *dU, magma_int_t u_elemstride,
98                                    magma_int_t cstrdU,
99                                    double *dV, magma_int_t v_elemstride,
100                                    magma_int_t cstrdV,
101                                    magma_int_t nelem, magma_queue_t queue);
102 
103   magma_int_t magma_interp(
104     magma_int_t P, magma_int_t Q,
105     magma_int_t dim, magma_int_t ncomp,
106     const double *dT, CeedTransposeMode tmode,
107     const double *dU, magma_int_t estrdU, magma_int_t cstrdU,
108     double *dV, magma_int_t estrdV, magma_int_t cstrdV,
109     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
110 
111   magma_int_t magma_grad_1d(
112     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
113     const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode,
114     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
115     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
116     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
117 
118   magma_int_t magma_gradn_2d(
119     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
120     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
121     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
122     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
123     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
124 
125   magma_int_t magma_gradt_2d(
126     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
127     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
128     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
129     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
130     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
131 
132   magma_int_t magma_gradn_3d(
133     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
134     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
135     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
136     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
137     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
138 
139   magma_int_t magma_gradt_3d(
140     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
141     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
142     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
143     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
144     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
145 
146   magma_int_t magma_grad_generic(
147     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
148     const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
149     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
150     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
151     magma_int_t nelem, magma_queue_t queue);
152 
153   magma_int_t magma_grad(
154     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
155     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
156     const CeedScalar *dU, magma_int_t u_elemstride, magma_int_t cstrdU, magma_int_t dstrdU,
157     CeedScalar *dV, magma_int_t v_elemstride, magma_int_t cstrdV, magma_int_t dstrdV,
158     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
159 
160   magma_int_t magma_weight_1d(
161     magma_int_t Q, const CeedScalar *dqweight1d,
162     CeedScalar *dV, magma_int_t v_stride,
163     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
164 
165   magma_int_t magma_weight_2d(
166     magma_int_t Q, const CeedScalar *dqweight1d,
167     CeedScalar *dV, magma_int_t v_stride,
168     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
169 
170   magma_int_t magma_weight_3d(
171     magma_int_t Q, const CeedScalar *dqweight1d,
172     CeedScalar *dV, magma_int_t v_stride,
173     magma_int_t nelem, magma_int_t maxthreads, magma_queue_t queue);
174 
175   magma_int_t magma_weight_generic(
176     magma_int_t Q, magma_int_t dim,
177     const CeedScalar *dqweight1d,
178     CeedScalar *dV, magma_int_t vstride,
179     magma_int_t nelem, magma_queue_t queue);
180 
181   magma_int_t magma_weight(
182     magma_int_t Q, magma_int_t dim,
183     const CeedScalar *dqweight1d,
184     CeedScalar *dV, magma_int_t v_stride,
185     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_int_t *maxthreads, magma_queue_t queue);
186 
187   void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem,
188                               magma_int_t Q,
189                               double *dqweight, double *dv, magma_queue_t queue);
190 
191   void magma_readDofsOffset(const magma_int_t NCOMP,
192                             const magma_int_t compstride,
193                             const magma_int_t esize, const magma_int_t nelem,
194                             magma_int_t *offsets, const double *du, double *dv,
195                             magma_queue_t queue);
196 
197   void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
198                              const magma_int_t nelem, magma_int_t *strides,
199                              const double *du, double *dv,
200                              magma_queue_t queue);
201 
202   void magma_writeDofsOffset(const magma_int_t NCOMP,
203                              const magma_int_t compstride,
204                              const magma_int_t esize, const magma_int_t nelem,
205                              magma_int_t *offsets,const double *du, double *dv,
206                              magma_queue_t queue);
207 
208   void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
209                               const magma_int_t nelem, magma_int_t *strides,
210                               const double *du, double *dv,
211                               magma_queue_t queue);
212 
213   int magma_dgemm_nontensor(
214     magma_trans_t transA, magma_trans_t transB,
215     magma_int_t m, magma_int_t n, magma_int_t k,
216     double alpha, const double *dA, magma_int_t ldda,
217     const double *dB, magma_int_t lddb,
218     double beta,  double *dC, magma_int_t lddc,
219     magma_queue_t queue );
220 
221   magma_int_t
222   magma_isdevptr(const void *A);
223 
224   int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d,
225                                     CeedInt Q1d,
226                                     const CeedScalar *interp1d,
227                                     const CeedScalar *grad1d,
228                                     const CeedScalar *qref1d,
229                                     const CeedScalar *qweight1d,
230                                     CeedBasis basis);
231 
232   int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim,
233                               CeedInt ndof, CeedInt nqpts,
234                               const CeedScalar *interp,
235                               const CeedScalar *grad,
236                               const CeedScalar *qref,
237                               const CeedScalar *qweight,
238                               CeedBasis basis);
239 
240   int CeedElemRestrictionCreate_Magma(CeedMemType mtype,
241                                       CeedCopyMode cmode,
242                                       const CeedInt *offsets,
243                                       CeedElemRestriction r);
244 
245   int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype,
246       const CeedCopyMode cmode,
247       const CeedInt *offsets,
248       const CeedElemRestriction res);
249 
250   int CeedOperatorCreate_Magma(CeedOperator op);
251 
252   #ifdef __cplusplus
253 }
254   #endif
255 
256 // comment the line below to use the default magma_is_devptr function
257 #define magma_is_devptr magma_isdevptr
258 
259 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync
260 // should do nothing
261 #define ceed_magma_queue_sync(...)
262 
263 // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value>
264 #ifndef MAGMA_BATCH_STRIDE
265 #define MAGMA_BATCH_STRIDE (1000)
266 #endif
267 
268 #endif  // _ceed_magma_h
269