xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.h (revision ce18bed930e8f3bfebcf709a18844aba97fe4630)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 // magma functions specific to ceed
9 #ifndef _ceed_magma_h
10 #define _ceed_magma_h
11 
12 #include <ceed/ceed.h>
13 #include <ceed/backend.h>
14 #include <magma_v2.h>
15 
16 typedef enum {
17   MAGMA_KERNEL_DIM_GENERIC=101,
18   MAGMA_KERNEL_DIM_SPECIFIC=102
19 } magma_kernel_mode_t;
20 
21 typedef struct {
22   magma_kernel_mode_t basis_kernel_mode;
23   magma_device_t device;
24   magma_queue_t queue;
25 } Ceed_Magma;
26 
27 typedef struct {
28   CeedScalar *dqref1d;
29   CeedScalar *dinterp1d;
30   CeedScalar *dgrad1d;
31   CeedScalar *dqweight1d;
32 } CeedBasis_Magma;
33 
34 typedef struct {
35   CeedScalar *dqref;
36   CeedScalar *dinterp;
37   CeedScalar *dgrad;
38   CeedScalar *dqweight;
39 } CeedBasisNonTensor_Magma;
40 
41 typedef enum {
42   OWNED_NONE = 0,
43   OWNED_UNPINNED,
44   OWNED_PINNED,
45 } OwnershipMode;
46 
47 typedef struct {
48   CeedInt *offsets;
49   CeedInt *doffsets;
50   OwnershipMode own_;
51   int down_;            // cover a case where we own Device memory
52 } CeedElemRestriction_Magma;
53 
54 typedef struct {
55   const CeedScalar **inputs;
56   CeedScalar **outputs;
57   bool setupdone;
58 } CeedQFunction_Magma;
59 
60 #define USE_MAGMA_BATCH
61 #define USE_MAGMA_BATCH2
62 #define USE_MAGMA_BATCH3
63 #define USE_MAGMA_BATCH4
64 
65 #ifdef __cplusplus
66 CEED_INTERN {
67 #endif
68 
69   magma_int_t magma_interp_1d(
70     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
71     const CeedScalar *dT, CeedTransposeMode tmode,
72     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
73     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
74     magma_int_t nelem, magma_queue_t queue);
75 
76   magma_int_t magma_interp_2d(
77     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
78     const CeedScalar *dT, CeedTransposeMode tmode,
79     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
80     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
81     magma_int_t nelem, magma_queue_t queue);
82 
83   magma_int_t magma_interp_3d(
84     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
85     const CeedScalar *dT, CeedTransposeMode tmode,
86     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
87     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
88     magma_int_t nelem, magma_queue_t queue);
89 
90   magma_int_t magma_interp_generic(magma_int_t P, magma_int_t Q,
91                                    magma_int_t dim, magma_int_t ncomp,
92                                    const CeedScalar *dT, CeedTransposeMode tmode,
93                                    const CeedScalar *dU, magma_int_t u_elemstride,
94                                    magma_int_t cstrdU,
95                                    CeedScalar *dV, magma_int_t v_elemstride,
96                                    magma_int_t cstrdV,
97                                    magma_int_t nelem, magma_queue_t queue);
98 
99   magma_int_t magma_interp(
100     magma_int_t P, magma_int_t Q,
101     magma_int_t dim, magma_int_t ncomp,
102     const CeedScalar *dT, CeedTransposeMode tmode,
103     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
104     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
105     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue);
106 
107   magma_int_t magma_grad_1d(
108     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
109     const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode,
110     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
111     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
112     magma_int_t nelem, magma_queue_t queue);
113 
114   magma_int_t magma_gradn_2d(
115     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
116     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
117     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
118     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
119     magma_int_t nelem, magma_queue_t queue);
120 
121   magma_int_t magma_gradt_2d(
122     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
123     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
124     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
125     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
126     magma_int_t nelem, magma_queue_t queue);
127 
128   magma_int_t magma_gradn_3d(
129     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
130     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
131     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
132     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
133     magma_int_t nelem, magma_queue_t queue);
134 
135   magma_int_t magma_gradt_3d(
136     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
137     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
138     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
139     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
140     magma_int_t nelem, magma_queue_t queue);
141 
142   magma_int_t magma_grad_generic(
143     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
144     const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
145     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
146     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
147     magma_int_t nelem, magma_queue_t queue);
148 
149   magma_int_t magma_grad(
150     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
151     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
152     const CeedScalar *dU, magma_int_t u_elemstride, magma_int_t cstrdU, magma_int_t dstrdU,
153     CeedScalar *dV, magma_int_t v_elemstride, magma_int_t cstrdV, magma_int_t dstrdV,
154     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue);
155 
156   magma_int_t magma_weight_1d(
157     magma_int_t Q, const CeedScalar *dqweight1d,
158     CeedScalar *dV, magma_int_t v_stride,
159     magma_int_t nelem, magma_queue_t queue);
160 
161   magma_int_t magma_weight_2d(
162     magma_int_t Q, const CeedScalar *dqweight1d,
163     CeedScalar *dV, magma_int_t v_stride,
164     magma_int_t nelem, magma_queue_t queue);
165 
166   magma_int_t magma_weight_3d(
167     magma_int_t Q, const CeedScalar *dqweight1d,
168     CeedScalar *dV, magma_int_t v_stride,
169     magma_int_t nelem, magma_queue_t queue);
170 
171   magma_int_t magma_weight_generic(
172     magma_int_t Q, magma_int_t dim,
173     const CeedScalar *dqweight1d,
174     CeedScalar *dV, magma_int_t vstride,
175     magma_int_t nelem, magma_queue_t queue);
176 
177   magma_int_t magma_weight(
178     magma_int_t Q, magma_int_t dim,
179     const CeedScalar *dqweight1d,
180     CeedScalar *dV, magma_int_t v_stride,
181     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue);
182 
183   void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem,
184                               magma_int_t Q,
185                               CeedScalar *dqweight, CeedScalar *dv, magma_queue_t queue);
186 
187   void magma_readDofsOffset(const magma_int_t NCOMP,
188                             const magma_int_t compstride,
189                             const magma_int_t esize, const magma_int_t nelem,
190                             magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv,
191                             magma_queue_t queue);
192 
193   void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
194                              const magma_int_t nelem, magma_int_t *strides,
195                              const CeedScalar *du, CeedScalar *dv,
196                              magma_queue_t queue);
197 
198   void magma_writeDofsOffset(const magma_int_t NCOMP,
199                              const magma_int_t compstride,
200                              const magma_int_t esize, const magma_int_t nelem,
201                              magma_int_t *offsets,const CeedScalar *du, CeedScalar *dv,
202                              magma_queue_t queue);
203 
204   void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
205                               const magma_int_t nelem, magma_int_t *strides,
206                               const CeedScalar *du, CeedScalar *dv,
207                               magma_queue_t queue);
208 
209   int magma_dgemm_nontensor(
210     magma_trans_t transA, magma_trans_t transB,
211     magma_int_t m, magma_int_t n, magma_int_t k,
212     double alpha, const double *dA, magma_int_t ldda,
213     const double *dB, magma_int_t lddb,
214     double beta,  double *dC, magma_int_t lddc,
215     magma_queue_t queue );
216 
217   int magma_sgemm_nontensor(
218     magma_trans_t transA, magma_trans_t transB,
219     magma_int_t m, magma_int_t n, magma_int_t k,
220     float alpha, const float *dA, magma_int_t ldda,
221     const float *dB, magma_int_t lddb,
222     float beta,  float *dC, magma_int_t lddc,
223     magma_queue_t queue );
224 
225   magma_int_t
226   magma_isdevptr(const void *A);
227 
228   int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d,
229                                     CeedInt Q1d,
230                                     const CeedScalar *interp1d,
231                                     const CeedScalar *grad1d,
232                                     const CeedScalar *qref1d,
233                                     const CeedScalar *qweight1d,
234                                     CeedBasis basis);
235 
236   int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim,
237                               CeedInt ndof, CeedInt nqpts,
238                               const CeedScalar *interp,
239                               const CeedScalar *grad,
240                               const CeedScalar *qref,
241                               const CeedScalar *qweight,
242                               CeedBasis basis);
243 
244   int CeedElemRestrictionCreate_Magma(CeedMemType mtype,
245                                       CeedCopyMode cmode,
246                                       const CeedInt *offsets,
247                                       CeedElemRestriction r);
248 
249   int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype,
250       const CeedCopyMode cmode,
251       const CeedInt *offsets,
252       const CeedElemRestriction res);
253 
254   int CeedOperatorCreate_Magma(CeedOperator op);
255 
256   #ifdef __cplusplus
257 }
258   #endif
259 
260 // comment the line below to use the default magma_is_devptr function
261 #define magma_is_devptr magma_isdevptr
262 
263 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync
264 // should do nothing
265 #define ceed_magma_queue_sync(...)
266 
267 // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value>
268 #ifndef MAGMA_BATCH_STRIDE
269 #define MAGMA_BATCH_STRIDE (1000)
270 #endif
271 
272 #endif  // _ceed_magma_h
273