xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma.h (revision 3d8e882215d238700cdceb37404f76ca7fa24eaa)
1*3d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2*3d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
34444f328STzanio //
4*3d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
54444f328STzanio //
6*3d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
74444f328STzanio 
890104f39SStan Tomov // magma functions specific to ceed
9972b3d9dSNatalie Beams #ifndef _ceed_magma_h
103d576824SJeremy L Thompson #define _ceed_magma_h
1190104f39SStan Tomov 
12ec3da8bcSJed Brown #include <ceed/ceed.h>
13ec3da8bcSJed Brown #include <ceed/backend.h>
14e0582403Sabdelfattah83 #include <magma_v2.h>
15e0582403Sabdelfattah83 
16e0582403Sabdelfattah83 typedef enum {
17e0582403Sabdelfattah83   MAGMA_KERNEL_DIM_GENERIC=101,
18e0582403Sabdelfattah83   MAGMA_KERNEL_DIM_SPECIFIC=102
19e0582403Sabdelfattah83 } magma_kernel_mode_t;
20e0582403Sabdelfattah83 
21e0582403Sabdelfattah83 typedef struct {
22e0582403Sabdelfattah83   magma_kernel_mode_t basis_kernel_mode;
23e0582403Sabdelfattah83   magma_device_t device;
24e0582403Sabdelfattah83   magma_queue_t queue;
25e0582403Sabdelfattah83 } Ceed_Magma;
265a9ca9adSVeselin Dobrev 
277f5b9731SStan Tomov typedef struct {
287f5b9731SStan Tomov   CeedScalar *dqref1d;
297f5b9731SStan Tomov   CeedScalar *dinterp1d;
307f5b9731SStan Tomov   CeedScalar *dgrad1d;
317f5b9731SStan Tomov   CeedScalar *dqweight1d;
327f5b9731SStan Tomov } CeedBasis_Magma;
337f5b9731SStan Tomov 
347f5b9731SStan Tomov typedef struct {
35868539c2SNatalie Beams   CeedScalar *dqref;
36868539c2SNatalie Beams   CeedScalar *dinterp;
37868539c2SNatalie Beams   CeedScalar *dgrad;
38868539c2SNatalie Beams   CeedScalar *dqweight;
39868539c2SNatalie Beams } CeedBasisNonTensor_Magma;
40868539c2SNatalie Beams 
41c8b3a627SJed Brown typedef enum {
42c8b3a627SJed Brown   OWNED_NONE = 0,
43c8b3a627SJed Brown   OWNED_UNPINNED,
44c8b3a627SJed Brown   OWNED_PINNED,
45c8b3a627SJed Brown } OwnershipMode;
46c8b3a627SJed Brown 
47868539c2SNatalie Beams typedef struct {
48d655899aSNatalie Beams   CeedInt *offsets;
49d655899aSNatalie Beams   CeedInt *doffsets;
50c8b3a627SJed Brown   OwnershipMode own_;
51868539c2SNatalie Beams   int down_;            // cover a case where we own Device memory
52868539c2SNatalie Beams } CeedElemRestriction_Magma;
53868539c2SNatalie Beams 
54868539c2SNatalie Beams typedef struct {
557f5b9731SStan Tomov   const CeedScalar **inputs;
567f5b9731SStan Tomov   CeedScalar **outputs;
577f5b9731SStan Tomov   bool setupdone;
587f5b9731SStan Tomov } CeedQFunction_Magma;
597f5b9731SStan Tomov 
6090104f39SStan Tomov #define USE_MAGMA_BATCH
6197ee337cSStan Tomov #define USE_MAGMA_BATCH2
627f5b9731SStan Tomov #define USE_MAGMA_BATCH3
637f5b9731SStan Tomov #define USE_MAGMA_BATCH4
6490104f39SStan Tomov 
657f5b9731SStan Tomov #ifdef __cplusplus
667f5b9731SStan Tomov CEED_INTERN {
677f5b9731SStan Tomov #endif
68e0582403Sabdelfattah83 
69e0582403Sabdelfattah83   magma_int_t magma_interp_1d(
70e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
71e0582403Sabdelfattah83     const CeedScalar *dT, CeedTransposeMode tmode,
72e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
73e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
74f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
75e0582403Sabdelfattah83 
76e0582403Sabdelfattah83   magma_int_t magma_interp_2d(
77e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
78e0582403Sabdelfattah83     const CeedScalar *dT, CeedTransposeMode tmode,
79e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
80e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
81f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
82e0582403Sabdelfattah83 
83e0582403Sabdelfattah83   magma_int_t magma_interp_3d(
84e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
85e0582403Sabdelfattah83     const CeedScalar *dT, CeedTransposeMode tmode,
86e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
87e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
88f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
89e0582403Sabdelfattah83 
90e0582403Sabdelfattah83   magma_int_t magma_interp_generic(magma_int_t P, magma_int_t Q,
91868539c2SNatalie Beams                                    magma_int_t dim, magma_int_t ncomp,
9280a9ef05SNatalie Beams                                    const CeedScalar *dT, CeedTransposeMode tmode,
9380a9ef05SNatalie Beams                                    const CeedScalar *dU, magma_int_t u_elemstride,
94e0582403Sabdelfattah83                                    magma_int_t cstrdU,
9580a9ef05SNatalie Beams                                    CeedScalar *dV, magma_int_t v_elemstride,
96e0582403Sabdelfattah83                                    magma_int_t cstrdV,
97e0582403Sabdelfattah83                                    magma_int_t nelem, magma_queue_t queue);
987f5b9731SStan Tomov 
99e0582403Sabdelfattah83   magma_int_t magma_interp(
100e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q,
101868539c2SNatalie Beams     magma_int_t dim, magma_int_t ncomp,
10280a9ef05SNatalie Beams     const CeedScalar *dT, CeedTransposeMode tmode,
10380a9ef05SNatalie Beams     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
10480a9ef05SNatalie Beams     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
105f71aa81bSnbeams     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue);
1067f5b9731SStan Tomov 
107e0582403Sabdelfattah83   magma_int_t magma_grad_1d(
108e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
109e0582403Sabdelfattah83     const CeedScalar *dTinterp, const CeedScalar *dTgrad, CeedTransposeMode tmode,
110e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU,
111e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV,
112f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
113868539c2SNatalie Beams 
114e0582403Sabdelfattah83   magma_int_t magma_gradn_2d(
115e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
116e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
117e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
118e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
119f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
120e0582403Sabdelfattah83 
121e0582403Sabdelfattah83   magma_int_t magma_gradt_2d(
122e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
123e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
124e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
125e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
126f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
127e0582403Sabdelfattah83 
128e0582403Sabdelfattah83   magma_int_t magma_gradn_3d(
129e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
130e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
131e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
132e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
133f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
134e0582403Sabdelfattah83 
135e0582403Sabdelfattah83   magma_int_t magma_gradt_3d(
136e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t ncomp,
137e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
138e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
139e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
140f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
141e0582403Sabdelfattah83 
142e0582403Sabdelfattah83   magma_int_t magma_grad_generic(
143e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
144e0582403Sabdelfattah83     const CeedScalar* dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
145e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t estrdU, magma_int_t cstrdU, magma_int_t dstrdU,
146e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t estrdV, magma_int_t cstrdV, magma_int_t dstrdV,
147e0582403Sabdelfattah83     magma_int_t nelem, magma_queue_t queue);
148e0582403Sabdelfattah83 
149e0582403Sabdelfattah83   magma_int_t magma_grad(
150e0582403Sabdelfattah83     magma_int_t P, magma_int_t Q, magma_int_t dim, magma_int_t ncomp,
151e0582403Sabdelfattah83     const CeedScalar *dinterp1d, const CeedScalar *dgrad1d, CeedTransposeMode tmode,
152e0582403Sabdelfattah83     const CeedScalar *dU, magma_int_t u_elemstride, magma_int_t cstrdU, magma_int_t dstrdU,
153e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_elemstride, magma_int_t cstrdV, magma_int_t dstrdV,
154f71aa81bSnbeams     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue);
155e0582403Sabdelfattah83 
156e0582403Sabdelfattah83   magma_int_t magma_weight_1d(
157e0582403Sabdelfattah83     magma_int_t Q, const CeedScalar *dqweight1d,
158e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
159f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
160e0582403Sabdelfattah83 
161e0582403Sabdelfattah83   magma_int_t magma_weight_2d(
162e0582403Sabdelfattah83     magma_int_t Q, const CeedScalar *dqweight1d,
163e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
164f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
165e0582403Sabdelfattah83 
166e0582403Sabdelfattah83   magma_int_t magma_weight_3d(
167e0582403Sabdelfattah83     magma_int_t Q, const CeedScalar *dqweight1d,
168e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
169f71aa81bSnbeams     magma_int_t nelem, magma_queue_t queue);
170e0582403Sabdelfattah83 
171e0582403Sabdelfattah83   magma_int_t magma_weight_generic(
172e0582403Sabdelfattah83     magma_int_t Q, magma_int_t dim,
173e0582403Sabdelfattah83     const CeedScalar *dqweight1d,
174e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t vstride,
175e0582403Sabdelfattah83     magma_int_t nelem, magma_queue_t queue);
176e0582403Sabdelfattah83 
177e0582403Sabdelfattah83   magma_int_t magma_weight(
178e0582403Sabdelfattah83     magma_int_t Q, magma_int_t dim,
179e0582403Sabdelfattah83     const CeedScalar *dqweight1d,
180e0582403Sabdelfattah83     CeedScalar *dV, magma_int_t v_stride,
181f71aa81bSnbeams     magma_int_t nelem, magma_kernel_mode_t kernel_mode, magma_queue_t queue);
182e0582403Sabdelfattah83 
183e0582403Sabdelfattah83   void magma_weight_nontensor(magma_int_t grid, magma_int_t threads, magma_int_t nelem,
184868539c2SNatalie Beams                               magma_int_t Q,
18580a9ef05SNatalie Beams                               CeedScalar *dqweight, CeedScalar *dv, magma_queue_t queue);
186e0582403Sabdelfattah83 
1878dc8d968Sjeremylt   void magma_readDofsOffset(const magma_int_t NCOMP,
1888dc8d968Sjeremylt                             const magma_int_t compstride,
1898dc8d968Sjeremylt                             const magma_int_t esize, const magma_int_t nelem,
19080a9ef05SNatalie Beams                             magma_int_t *offsets, const CeedScalar *du, CeedScalar *dv,
191e0582403Sabdelfattah83                             magma_queue_t queue);
192868539c2SNatalie Beams 
1938dc8d968Sjeremylt   void magma_readDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
194266dd7abSnbeams                              const magma_int_t nelem, magma_int_t *strides,
19580a9ef05SNatalie Beams                              const CeedScalar *du, CeedScalar *dv,
196e0582403Sabdelfattah83                              magma_queue_t queue);
197266dd7abSnbeams 
1988dc8d968Sjeremylt   void magma_writeDofsOffset(const magma_int_t NCOMP,
1998dc8d968Sjeremylt                              const magma_int_t compstride,
2008dc8d968Sjeremylt                              const magma_int_t esize, const magma_int_t nelem,
20180a9ef05SNatalie Beams                              magma_int_t *offsets,const CeedScalar *du, CeedScalar *dv,
202e0582403Sabdelfattah83                              magma_queue_t queue);
203868539c2SNatalie Beams 
2048dc8d968Sjeremylt   void magma_writeDofsStrided(const magma_int_t NCOMP, const magma_int_t esize,
205266dd7abSnbeams                               const magma_int_t nelem, magma_int_t *strides,
20680a9ef05SNatalie Beams                               const CeedScalar *du, CeedScalar *dv,
207e0582403Sabdelfattah83                               magma_queue_t queue);
208e0582403Sabdelfattah83 
209e0582403Sabdelfattah83   int magma_dgemm_nontensor(
210e0582403Sabdelfattah83     magma_trans_t transA, magma_trans_t transB,
211e0582403Sabdelfattah83     magma_int_t m, magma_int_t n, magma_int_t k,
212e0582403Sabdelfattah83     double alpha, const double *dA, magma_int_t ldda,
213e0582403Sabdelfattah83     const double *dB, magma_int_t lddb,
214e0582403Sabdelfattah83     double beta,  double *dC, magma_int_t lddc,
215e0582403Sabdelfattah83     magma_queue_t queue );
216e0582403Sabdelfattah83 
21780a9ef05SNatalie Beams   int magma_sgemm_nontensor(
21880a9ef05SNatalie Beams     magma_trans_t transA, magma_trans_t transB,
21980a9ef05SNatalie Beams     magma_int_t m, magma_int_t n, magma_int_t k,
22080a9ef05SNatalie Beams     float alpha, const float *dA, magma_int_t ldda,
22180a9ef05SNatalie Beams     const float *dB, magma_int_t lddb,
22280a9ef05SNatalie Beams     float beta,  float *dC, magma_int_t lddc,
22380a9ef05SNatalie Beams     magma_queue_t queue );
22480a9ef05SNatalie Beams 
2257f5b9731SStan Tomov   magma_int_t
2267f5b9731SStan Tomov   magma_isdevptr(const void *A);
2277f5b9731SStan Tomov 
228868539c2SNatalie Beams   int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d,
229868539c2SNatalie Beams                                     CeedInt Q1d,
230868539c2SNatalie Beams                                     const CeedScalar *interp1d,
231868539c2SNatalie Beams                                     const CeedScalar *grad1d,
232868539c2SNatalie Beams                                     const CeedScalar *qref1d,
233868539c2SNatalie Beams                                     const CeedScalar *qweight1d,
234868539c2SNatalie Beams                                     CeedBasis basis);
2357f5b9731SStan Tomov 
236868539c2SNatalie Beams   int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim,
237d4f68153Sjeremylt                               CeedInt ndof, CeedInt nqpts,
238d4f68153Sjeremylt                               const CeedScalar *interp,
239d4f68153Sjeremylt                               const CeedScalar *grad,
240d4f68153Sjeremylt                               const CeedScalar *qref,
241d4f68153Sjeremylt                               const CeedScalar *qweight,
242d4f68153Sjeremylt                               CeedBasis basis);
243868539c2SNatalie Beams 
244868539c2SNatalie Beams   int CeedElemRestrictionCreate_Magma(CeedMemType mtype,
245868539c2SNatalie Beams                                       CeedCopyMode cmode,
246d655899aSNatalie Beams                                       const CeedInt *offsets,
247868539c2SNatalie Beams                                       CeedElemRestriction r);
248868539c2SNatalie Beams 
249868539c2SNatalie Beams   int CeedElemRestrictionCreateBlocked_Magma(const CeedMemType mtype,
250868539c2SNatalie Beams       const CeedCopyMode cmode,
251d655899aSNatalie Beams       const CeedInt *offsets,
252868539c2SNatalie Beams       const CeedElemRestriction res);
253a8c028e3SNatalie Beams 
254a8c028e3SNatalie Beams   int CeedOperatorCreate_Magma(CeedOperator op);
255a8c028e3SNatalie Beams 
2567f5b9731SStan Tomov   #ifdef __cplusplus
2577f5b9731SStan Tomov }
2587f5b9731SStan Tomov   #endif
2597f5b9731SStan Tomov 
2607f5b9731SStan Tomov // comment the line below to use the default magma_is_devptr function
2617f5b9731SStan Tomov #define magma_is_devptr magma_isdevptr
2627f5b9731SStan Tomov 
263e0582403Sabdelfattah83 // if magma and cuda/ref are using the null stream, then ceed_magma_queue_sync
264e0582403Sabdelfattah83 // should do nothing
265e0582403Sabdelfattah83 #define ceed_magma_queue_sync(...)
266e0582403Sabdelfattah83 
2677f5b9731SStan Tomov // batch stride, override using -DMAGMA_BATCH_STRIDE=<desired-value>
2687f5b9731SStan Tomov #ifndef MAGMA_BATCH_STRIDE
2697f5b9731SStan Tomov #define MAGMA_BATCH_STRIDE (1000)
2707f5b9731SStan Tomov #endif
271e0582403Sabdelfattah83 
2723d576824SJeremy L Thompson #endif  // _ceed_magma_h
273