xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma-basis.c (revision 42e449dbb55ca8abe2f5868e9a58bd02fffe56f9)
17f5b9731SStan Tomov // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
27f5b9731SStan Tomov // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
37f5b9731SStan Tomov // All Rights reserved. See files LICENSE and NOTICE for details.
47f5b9731SStan Tomov //
57f5b9731SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software
67f5b9731SStan Tomov // libraries and APIs for efficient high-order finite element and spectral
77f5b9731SStan Tomov // element discretizations for exascale applications. For more information and
87f5b9731SStan Tomov // source code availability see http://github.com/ceed.
97f5b9731SStan Tomov //
107f5b9731SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
117f5b9731SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office
127f5b9731SStan Tomov // of Science and the National Nuclear Security Administration) responsible for
137f5b9731SStan Tomov // the planning and preparation of a capable exascale ecosystem, including
147f5b9731SStan Tomov // software, applications, hardware, advanced system engineering and early
157f5b9731SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative.
167f5b9731SStan Tomov 
17ec3da8bcSJed Brown #include <ceed/ceed.h>
18ec3da8bcSJed Brown #include <ceed/backend.h>
197f5b9731SStan Tomov #include "ceed-magma.h"
207f5b9731SStan Tomov 
217f5b9731SStan Tomov #ifdef __cplusplus
227f5b9731SStan Tomov CEED_INTERN "C"
237f5b9731SStan Tomov #endif
247f5b9731SStan Tomov int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem,
257f5b9731SStan Tomov                          CeedTransposeMode tmode, CeedEvalMode emode,
263513a710Sjeremylt                          CeedVector U, CeedVector V) {
277f5b9731SStan Tomov   int ierr;
287f5b9731SStan Tomov   Ceed ceed;
29e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
30e0582403Sabdelfattah83   CeedInt dim, ncomp, ndof;
31e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
32e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
33e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr);
34e0582403Sabdelfattah83 
35e0582403Sabdelfattah83   Ceed_Magma *data;
36e15f9bd0SJeremy L Thompson   ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
37e0582403Sabdelfattah83 
387f5b9731SStan Tomov   const CeedScalar *u;
397f5b9731SStan Tomov   CeedScalar *v;
40868539c2SNatalie Beams   if (emode != CEED_EVAL_WEIGHT) {
41e15f9bd0SJeremy L Thompson     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChkBackend(ierr);
427f5b9731SStan Tomov   } else if (emode != CEED_EVAL_WEIGHT) {
437f5b9731SStan Tomov     // LCOV_EXCL_START
44e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
457f5b9731SStan Tomov                      "An input vector is required for this CeedEvalMode");
467f5b9731SStan Tomov     // LCOV_EXCL_STOP
477f5b9731SStan Tomov   }
48e15f9bd0SJeremy L Thompson   ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &v); CeedChkBackend(ierr);
497f5b9731SStan Tomov 
507f5b9731SStan Tomov   CeedBasis_Magma *impl;
51e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr);
527f5b9731SStan Tomov 
537f5b9731SStan Tomov   CeedInt P1d, Q1d;
54e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChkBackend(ierr);
55e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChkBackend(ierr);
567f5b9731SStan Tomov 
577f5b9731SStan Tomov   CeedDebug("\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d",
587f5b9731SStan Tomov             ncomp*CeedIntPow(P1d, dim), ncomp);
597f5b9731SStan Tomov 
607f5b9731SStan Tomov   if (tmode == CEED_TRANSPOSE) {
617f5b9731SStan Tomov     CeedInt length;
62e15f9bd0SJeremy L Thompson     ierr = CeedVectorGetLength(V, &length); CeedChkBackend(ierr);
63e0582403Sabdelfattah83     magmablas_dlaset(MagmaFull, length, 1, 0., 0., v, length, data->queue);
64e0582403Sabdelfattah83     ceed_magma_queue_sync( data->queue );
657f5b9731SStan Tomov   }
663513a710Sjeremylt   switch (emode) {
673513a710Sjeremylt   case CEED_EVAL_INTERP: {
687f5b9731SStan Tomov     CeedInt P = P1d, Q = Q1d;
697f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
707f5b9731SStan Tomov       P = Q1d; Q = P1d;
717f5b9731SStan Tomov     }
727f5b9731SStan Tomov 
737f5b9731SStan Tomov     // Define element sizes for dofs/quad
747f5b9731SStan Tomov     CeedInt elquadsize = CeedIntPow(Q1d, dim);
757f5b9731SStan Tomov     CeedInt eldofssize = CeedIntPow(P1d, dim);
767f5b9731SStan Tomov 
777f5b9731SStan Tomov     // E-vector ordering -------------- Q-vector ordering
78868539c2SNatalie Beams     //  component                        component
79868539c2SNatalie Beams     //    elem                             elem
807f5b9731SStan Tomov     //       node                            node
817f5b9731SStan Tomov 
827f5b9731SStan Tomov     // ---  Define strides for NOTRANSPOSE mode: ---
837f5b9731SStan Tomov     // Input (u) is E-vector, output (v) is Q-vector
847f5b9731SStan Tomov 
857f5b9731SStan Tomov     // Element strides
86868539c2SNatalie Beams     CeedInt u_elstride = eldofssize;
877f5b9731SStan Tomov     CeedInt v_elstride = elquadsize;
887f5b9731SStan Tomov     // Component strides
89868539c2SNatalie Beams     CeedInt u_compstride = nelem * eldofssize;
907f5b9731SStan Tomov     CeedInt v_compstride = nelem * elquadsize;
917f5b9731SStan Tomov 
927f5b9731SStan Tomov     // ---  Swap strides for TRANSPOSE mode: ---
937f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
947f5b9731SStan Tomov       // Input (u) is Q-vector, output (v) is E-vector
957f5b9731SStan Tomov       // Element strides
96868539c2SNatalie Beams       v_elstride = eldofssize;
977f5b9731SStan Tomov       u_elstride = elquadsize;
987f5b9731SStan Tomov       // Component strides
99868539c2SNatalie Beams       v_compstride = nelem * eldofssize;
1007f5b9731SStan Tomov       u_compstride = nelem * elquadsize;
1017f5b9731SStan Tomov     }
1027f5b9731SStan Tomov 
103e0582403Sabdelfattah83     ierr = magma_interp(P, Q, dim, ncomp,
1047f5b9731SStan Tomov                         impl->dinterp1d, tmode,
105868539c2SNatalie Beams                         u, u_elstride, u_compstride,
106868539c2SNatalie Beams                         v, v_elstride, v_compstride,
107e0582403Sabdelfattah83                         nelem, data->basis_kernel_mode, data->maxthreads,
108e0582403Sabdelfattah83                         data->queue);
109*42e449dbSjeremylt     if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND,
110e0582403Sabdelfattah83                                       "MAGMA: launch failure detected for magma_interp");
1117f5b9731SStan Tomov   }
1123513a710Sjeremylt   break;
1133513a710Sjeremylt   case CEED_EVAL_GRAD: {
1147f5b9731SStan Tomov     CeedInt P = P1d, Q = Q1d;
1157f5b9731SStan Tomov     // In CEED_NOTRANSPOSE mode:
1167f5b9731SStan Tomov     // u is (P^dim x nc), column-major layout (nc = ncomp)
1177f5b9731SStan Tomov     // v is (Q^dim x nc x dim), column-major layout (nc = ncomp)
1187f5b9731SStan Tomov     // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
1197f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
1207f5b9731SStan Tomov       P = Q1d, Q = P1d;
1217f5b9731SStan Tomov     }
1227f5b9731SStan Tomov 
1237f5b9731SStan Tomov     // Define element sizes for dofs/quad
1247f5b9731SStan Tomov     CeedInt elquadsize = CeedIntPow(Q1d, dim);
1257f5b9731SStan Tomov     CeedInt eldofssize = CeedIntPow(P1d, dim);
1267f5b9731SStan Tomov 
1277f5b9731SStan Tomov     // E-vector ordering -------------- Q-vector ordering
1287f5b9731SStan Tomov     //                                  dim
129868539c2SNatalie Beams     //  component                        component
130868539c2SNatalie Beams     //    elem                              elem
1317f5b9731SStan Tomov     //       node                            node
1327f5b9731SStan Tomov 
1337f5b9731SStan Tomov     // ---  Define strides for NOTRANSPOSE mode: ---
1347f5b9731SStan Tomov     // Input (u) is E-vector, output (v) is Q-vector
1357f5b9731SStan Tomov 
1367f5b9731SStan Tomov     // Element strides
137868539c2SNatalie Beams     CeedInt u_elstride = eldofssize;
1387f5b9731SStan Tomov     CeedInt v_elstride = elquadsize;
1397f5b9731SStan Tomov     // Component strides
140868539c2SNatalie Beams     CeedInt u_compstride = nelem * eldofssize;
1417f5b9731SStan Tomov     CeedInt v_compstride = nelem * elquadsize;
1427f5b9731SStan Tomov     // Dimension strides
1437f5b9731SStan Tomov     CeedInt u_dimstride = 0;
1447f5b9731SStan Tomov     CeedInt v_dimstride = nelem * elquadsize * ncomp;
1457f5b9731SStan Tomov 
1467f5b9731SStan Tomov     // ---  Swap strides for TRANSPOSE mode: ---
1477f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
1487f5b9731SStan Tomov       // Input (u) is Q-vector, output (v) is E-vector
1497f5b9731SStan Tomov       // Element strides
150868539c2SNatalie Beams       v_elstride = eldofssize;
1517f5b9731SStan Tomov       u_elstride = elquadsize;
1527f5b9731SStan Tomov       // Component strides
153868539c2SNatalie Beams       v_compstride = nelem * eldofssize;
1547f5b9731SStan Tomov       u_compstride = nelem * elquadsize;
1557f5b9731SStan Tomov       // Dimension strides
1567f5b9731SStan Tomov       v_dimstride = 0;
1577f5b9731SStan Tomov       u_dimstride = nelem * elquadsize * ncomp;
1587f5b9731SStan Tomov 
1597f5b9731SStan Tomov     }
1607f5b9731SStan Tomov 
161e0582403Sabdelfattah83     ierr = magma_grad( P, Q, dim, ncomp,
1627f5b9731SStan Tomov                        impl->dinterp1d, impl->dgrad1d, tmode,
163e0582403Sabdelfattah83                        u, u_elstride, u_compstride, u_dimstride,
164e0582403Sabdelfattah83                        v, v_elstride, v_compstride, v_dimstride,
165e0582403Sabdelfattah83                        nelem, data->basis_kernel_mode, data->maxthreads,
166e0582403Sabdelfattah83                        data->queue);
167*42e449dbSjeremylt     if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND,
168e0582403Sabdelfattah83                                       "MAGMA: launch failure detected for magma_grad");
1697f5b9731SStan Tomov   }
1703513a710Sjeremylt   break;
1713513a710Sjeremylt   case CEED_EVAL_WEIGHT: {
1727f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE)
1737f5b9731SStan Tomov       // LCOV_EXCL_START
174e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
1757f5b9731SStan Tomov                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
1767f5b9731SStan Tomov     // LCOV_EXCL_STOP
1777f5b9731SStan Tomov     CeedInt Q = Q1d;
1787f5b9731SStan Tomov     int eldofssize = CeedIntPow(Q, dim);
179e0582403Sabdelfattah83     ierr = magma_weight(Q, dim, impl->dqweight1d, v, eldofssize, nelem,
180e0582403Sabdelfattah83                         data->basis_kernel_mode, data->maxthreads, data->queue);
181*42e449dbSjeremylt     if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND,
182e0582403Sabdelfattah83                                       "MAGMA: launch failure detected for magma_weight");
1837f5b9731SStan Tomov   }
1843513a710Sjeremylt   break;
1853513a710Sjeremylt   // LCOV_EXCL_START
1863513a710Sjeremylt   case CEED_EVAL_DIV:
187e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported");
1883513a710Sjeremylt   case CEED_EVAL_CURL:
189e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported");
1903513a710Sjeremylt   case CEED_EVAL_NONE:
191e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
1923513a710Sjeremylt                      "CEED_EVAL_NONE does not make sense in this context");
1933513a710Sjeremylt     // LCOV_EXCL_STOP
1943513a710Sjeremylt   }
1957f5b9731SStan Tomov 
196e0582403Sabdelfattah83   // must sync to ensure completeness
197e0582403Sabdelfattah83   ceed_magma_queue_sync( data->queue );
198e0582403Sabdelfattah83 
1997f5b9731SStan Tomov   if (emode!=CEED_EVAL_WEIGHT) {
200e15f9bd0SJeremy L Thompson     ierr = CeedVectorRestoreArrayRead(U, &u); CeedChkBackend(ierr);
2017f5b9731SStan Tomov   }
202e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArray(V, &v); CeedChkBackend(ierr);
203e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2047f5b9731SStan Tomov }
2057f5b9731SStan Tomov 
2067f5b9731SStan Tomov #ifdef __cplusplus
2077f5b9731SStan Tomov CEED_INTERN "C"
2087f5b9731SStan Tomov #endif
209868539c2SNatalie Beams int CeedBasisApplyNonTensor_Magma(CeedBasis basis, CeedInt nelem,
210868539c2SNatalie Beams                                   CeedTransposeMode tmode, CeedEvalMode emode,
211868539c2SNatalie Beams                                   CeedVector U, CeedVector V) {
212868539c2SNatalie Beams   int ierr;
213868539c2SNatalie Beams   Ceed ceed;
214e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
215e0582403Sabdelfattah83 
216e0582403Sabdelfattah83   Ceed_Magma *data;
217e15f9bd0SJeremy L Thompson   ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
218e0582403Sabdelfattah83 
219868539c2SNatalie Beams   CeedInt dim, ncomp, ndof, nqpt;
220e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
221e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
222e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr);
223e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChkBackend(ierr);
224868539c2SNatalie Beams   const CeedScalar *du;
225868539c2SNatalie Beams   CeedScalar *dv;
226868539c2SNatalie Beams   if (emode != CEED_EVAL_WEIGHT) {
227e15f9bd0SJeremy L Thompson     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &du); CeedChkBackend(ierr);
228868539c2SNatalie Beams   } else if (emode != CEED_EVAL_WEIGHT) {
229868539c2SNatalie Beams     // LCOV_EXCL_START
230e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
231868539c2SNatalie Beams                      "An input vector is required for this CeedEvalMode");
232868539c2SNatalie Beams     // LCOV_EXCL_STOP
233868539c2SNatalie Beams   }
234e15f9bd0SJeremy L Thompson   ierr = CeedVectorGetArray(V, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr);
235868539c2SNatalie Beams 
236868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
237e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr);
238868539c2SNatalie Beams 
239868539c2SNatalie Beams   CeedDebug("\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d",
240868539c2SNatalie Beams             ncomp*ndof, ncomp);
241868539c2SNatalie Beams 
242868539c2SNatalie Beams   if (tmode == CEED_TRANSPOSE) {
243868539c2SNatalie Beams     CeedInt length;
244868539c2SNatalie Beams     ierr = CeedVectorGetLength(V, &length);
245e0582403Sabdelfattah83     magmablas_dlaset(MagmaFull, length, 1, 0., 0., dv, length, data->queue);
246e0582403Sabdelfattah83     ceed_magma_queue_sync( data->queue );
247868539c2SNatalie Beams   }
248868539c2SNatalie Beams   switch (emode) {
249868539c2SNatalie Beams   case CEED_EVAL_INTERP: {
250868539c2SNatalie Beams     CeedInt P = ndof, Q = nqpt;
251868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE)
252e0582403Sabdelfattah83       magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans,
253868539c2SNatalie Beams                             P, nelem*ncomp, Q,
254868539c2SNatalie Beams                             1.0, impl->dinterp, P,
255868539c2SNatalie Beams                             du, Q,
256e0582403Sabdelfattah83                             0.0, dv, P, data->queue);
257868539c2SNatalie Beams     else
258e0582403Sabdelfattah83       magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans,
259868539c2SNatalie Beams                             Q, nelem*ncomp, P,
260868539c2SNatalie Beams                             1.0, impl->dinterp, P,
261868539c2SNatalie Beams                             du, P,
262e0582403Sabdelfattah83                             0.0, dv, Q, data->queue);
263868539c2SNatalie Beams   }
264868539c2SNatalie Beams   break;
265868539c2SNatalie Beams 
266868539c2SNatalie Beams   case CEED_EVAL_GRAD: {
267868539c2SNatalie Beams     CeedInt P = ndof, Q = nqpt;
268868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE) {
269868539c2SNatalie Beams       double beta = 0.0;
270868539c2SNatalie Beams       for(int d=0; d<dim; d++) {
271868539c2SNatalie Beams         if (d>0)
272868539c2SNatalie Beams           beta = 1.0;
273e0582403Sabdelfattah83         magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans,
274868539c2SNatalie Beams                               P, nelem*ncomp, Q,
275868539c2SNatalie Beams                               1.0, impl->dgrad + d*P*Q, P,
276868539c2SNatalie Beams                               du + d*nelem*ncomp*Q, Q,
277e0582403Sabdelfattah83                               beta, dv, P, data->queue);
278868539c2SNatalie Beams       }
279868539c2SNatalie Beams     } else {
280868539c2SNatalie Beams       for(int d=0; d< dim; d++)
281e0582403Sabdelfattah83         magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans,
282868539c2SNatalie Beams                               Q, nelem*ncomp, P,
283868539c2SNatalie Beams                               1.0, impl->dgrad + d*P*Q, P,
284868539c2SNatalie Beams                               du, P,
285e0582403Sabdelfattah83                               0.0, dv + d*nelem*ncomp*Q, Q, data->queue);
286868539c2SNatalie Beams     }
287868539c2SNatalie Beams   }
288868539c2SNatalie Beams   break;
289868539c2SNatalie Beams 
290868539c2SNatalie Beams   case CEED_EVAL_WEIGHT: {
291868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE)
292868539c2SNatalie Beams       // LCOV_EXCL_START
293e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
294868539c2SNatalie Beams                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
295868539c2SNatalie Beams     // LCOV_EXCL_STOP
296868539c2SNatalie Beams 
297868539c2SNatalie Beams     int elemsPerBlock = 1;//basis->Q1d < 7 ? optElems[basis->Q1d] : 1;
298868539c2SNatalie Beams     int grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)?
299868539c2SNatalie Beams                                        1 : 0 );
300e0582403Sabdelfattah83     magma_weight_nontensor(grid, nqpt, nelem, nqpt, impl->dqweight, dv,
301e0582403Sabdelfattah83                            data->queue);
302e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
303868539c2SNatalie Beams   }
304868539c2SNatalie Beams   break;
305868539c2SNatalie Beams 
306868539c2SNatalie Beams   // LCOV_EXCL_START
307868539c2SNatalie Beams   case CEED_EVAL_DIV:
308e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported");
309868539c2SNatalie Beams   case CEED_EVAL_CURL:
310e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported");
311868539c2SNatalie Beams   case CEED_EVAL_NONE:
312e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
313868539c2SNatalie Beams                      "CEED_EVAL_NONE does not make sense in this context");
314868539c2SNatalie Beams     // LCOV_EXCL_STOP
315868539c2SNatalie Beams   }
316868539c2SNatalie Beams 
317e0582403Sabdelfattah83   // must sync to ensure completeness
318e0582403Sabdelfattah83   ceed_magma_queue_sync( data->queue );
319e0582403Sabdelfattah83 
320868539c2SNatalie Beams   if (emode!=CEED_EVAL_WEIGHT) {
321e15f9bd0SJeremy L Thompson     ierr = CeedVectorRestoreArrayRead(U, &du); CeedChkBackend(ierr);
322868539c2SNatalie Beams   }
323e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArray(V, &dv); CeedChkBackend(ierr);
324e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
325868539c2SNatalie Beams }
326868539c2SNatalie Beams 
327868539c2SNatalie Beams #ifdef __cplusplus
328868539c2SNatalie Beams CEED_INTERN "C"
329868539c2SNatalie Beams #endif
3303513a710Sjeremylt int CeedBasisDestroy_Magma(CeedBasis basis) {
3317f5b9731SStan Tomov   int ierr;
3327f5b9731SStan Tomov   CeedBasis_Magma *impl;
333e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr);
3347f5b9731SStan Tomov 
335e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dqref1d); CeedChkBackend(ierr);
336e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dinterp1d); CeedChkBackend(ierr);
337e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dgrad1d); CeedChkBackend(ierr);
338e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dqweight1d); CeedChkBackend(ierr);
3397f5b9731SStan Tomov 
340e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
3417f5b9731SStan Tomov 
342e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
3437f5b9731SStan Tomov }
3447f5b9731SStan Tomov 
3457f5b9731SStan Tomov #ifdef __cplusplus
3467f5b9731SStan Tomov CEED_INTERN "C"
3477f5b9731SStan Tomov #endif
348868539c2SNatalie Beams int CeedBasisDestroyNonTensor_Magma(CeedBasis basis) {
349868539c2SNatalie Beams   int ierr;
350868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
351e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr);
352868539c2SNatalie Beams 
353e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dqref); CeedChkBackend(ierr);
354e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dinterp); CeedChkBackend(ierr);
355e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dgrad); CeedChkBackend(ierr);
356e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dqweight); CeedChkBackend(ierr);
357868539c2SNatalie Beams 
358e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
359868539c2SNatalie Beams 
360e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
361868539c2SNatalie Beams }
362868539c2SNatalie Beams 
363868539c2SNatalie Beams #ifdef __cplusplus
364868539c2SNatalie Beams CEED_INTERN "C"
365868539c2SNatalie Beams #endif
3663513a710Sjeremylt int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d,
3673513a710Sjeremylt                                   const CeedScalar *interp1d,
3687f5b9731SStan Tomov                                   const CeedScalar *grad1d,
3697f5b9731SStan Tomov                                   const CeedScalar *qref1d,
3703513a710Sjeremylt                                   const CeedScalar *qweight1d, CeedBasis basis) {
3717f5b9731SStan Tomov   int ierr;
3727f5b9731SStan Tomov   CeedBasis_Magma *impl;
3737f5b9731SStan Tomov   Ceed ceed;
374e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
3757f5b9731SStan Tomov 
376c9f8acf2SJeremy L Thompson   // Check for supported parameters
377c9f8acf2SJeremy L Thompson   CeedInt ncomp = 0;
378e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
379c9f8acf2SJeremy L Thompson   if (ncomp > 3)
380c9f8acf2SJeremy L Thompson     // LCOV_EXCL_START
381e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
382c9f8acf2SJeremy L Thompson                      "Magma backend does not support tensor bases with more than 3 components");
383c9f8acf2SJeremy L Thompson   // LCOV_EXCL_STOP
384c9f8acf2SJeremy L Thompson   if (P1d > 10)
385c9f8acf2SJeremy L Thompson     // LCOV_EXCL_START
386e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
387c9f8acf2SJeremy L Thompson                      "Magma backend does not support tensor bases with more than 10 nodes in each dimension");
388c9f8acf2SJeremy L Thompson   // LCOV_EXCL_STOP
389c9f8acf2SJeremy L Thompson   if (Q1d > 10)
390c9f8acf2SJeremy L Thompson     // LCOV_EXCL_START
391e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
392c9f8acf2SJeremy L Thompson                      "Magma backend does not support tensor bases with more than 10 quadrature points in each dimension");
393c9f8acf2SJeremy L Thompson   // LCOV_EXCL_STOP
394c9f8acf2SJeremy L Thompson 
395e0582403Sabdelfattah83   Ceed_Magma *data;
396e15f9bd0SJeremy L Thompson   ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
397e0582403Sabdelfattah83 
3987f5b9731SStan Tomov   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
399e15f9bd0SJeremy L Thompson                                 CeedBasisApply_Magma); CeedChkBackend(ierr);
4007f5b9731SStan Tomov   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
401e15f9bd0SJeremy L Thompson                                 CeedBasisDestroy_Magma); CeedChkBackend(ierr);
4027f5b9731SStan Tomov 
403e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1,&impl); CeedChkBackend(ierr);
404e15f9bd0SJeremy L Thompson   ierr = CeedBasisSetData(basis, impl); CeedChkBackend(ierr);
4057f5b9731SStan Tomov 
4067f5b9731SStan Tomov   // Copy qref1d to the GPU
4077f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dqref1d, Q1d*sizeof(qref1d[0]));
408e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
409e0582403Sabdelfattah83   magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1,
410e0582403Sabdelfattah83                   data->queue);
4117f5b9731SStan Tomov 
4127f5b9731SStan Tomov   // Copy interp1d to the GPU
4137f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0]));
414e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
415e0582403Sabdelfattah83   magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1,
416e0582403Sabdelfattah83                   data->queue);
4177f5b9731SStan Tomov 
4187f5b9731SStan Tomov   // Copy grad1d to the GPU
4197f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0]));
420e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
421e0582403Sabdelfattah83   magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1,
422e0582403Sabdelfattah83                   data->queue);
4237f5b9731SStan Tomov 
4247f5b9731SStan Tomov   // Copy qweight1d to the GPU
4257f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dqweight1d, Q1d*sizeof(qweight1d[0]));
426e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
427e0582403Sabdelfattah83   magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1,
428e0582403Sabdelfattah83                   data->queue);
4297f5b9731SStan Tomov 
430e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4317f5b9731SStan Tomov }
4327f5b9731SStan Tomov 
4337f5b9731SStan Tomov #ifdef __cplusplus
4347f5b9731SStan Tomov CEED_INTERN "C"
4357f5b9731SStan Tomov #endif
4363513a710Sjeremylt int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof,
4373513a710Sjeremylt                             CeedInt nqpts, const CeedScalar *interp,
4383513a710Sjeremylt                             const CeedScalar *grad, const CeedScalar *qref,
4393513a710Sjeremylt                             const CeedScalar *qweight, CeedBasis basis) {
4407f5b9731SStan Tomov   int ierr;
441868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
4427f5b9731SStan Tomov   Ceed ceed;
443e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
4447f5b9731SStan Tomov 
445e0582403Sabdelfattah83   Ceed_Magma *data;
446e15f9bd0SJeremy L Thompson   ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
447e0582403Sabdelfattah83 
448868539c2SNatalie Beams   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
449e15f9bd0SJeremy L Thompson                                 CeedBasisApplyNonTensor_Magma); CeedChkBackend(ierr);
450868539c2SNatalie Beams   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
451e15f9bd0SJeremy L Thompson                                 CeedBasisDestroyNonTensor_Magma); CeedChkBackend(ierr);
452868539c2SNatalie Beams 
453e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1,&impl); CeedChkBackend(ierr);
454e15f9bd0SJeremy L Thompson   ierr = CeedBasisSetData(basis, impl); CeedChkBackend(ierr);
455868539c2SNatalie Beams 
456868539c2SNatalie Beams   // Copy qref to the GPU
457868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dqref, nqpts*sizeof(qref[0]));
458e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
459e0582403Sabdelfattah83   magma_setvector(nqpts, sizeof(qref[0]), qref, 1, impl->dqref, 1, data->queue);
460868539c2SNatalie Beams 
461868539c2SNatalie Beams   // Copy interp to the GPU
462868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dinterp, nqpts*ndof*sizeof(interp[0]));
463e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
464e0582403Sabdelfattah83   magma_setvector(nqpts*ndof, sizeof(interp[0]), interp, 1, impl->dinterp, 1,
465e0582403Sabdelfattah83                   data->queue);
466868539c2SNatalie Beams 
467868539c2SNatalie Beams   // Copy grad to the GPU
468868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dgrad, nqpts*ndof*dim*sizeof(grad[0]));
469e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
470e0582403Sabdelfattah83   magma_setvector(nqpts*ndof*dim, sizeof(grad[0]), grad, 1, impl->dgrad, 1,
471e0582403Sabdelfattah83                   data->queue);
472868539c2SNatalie Beams 
473868539c2SNatalie Beams   // Copy qweight to the GPU
474868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dqweight, nqpts*sizeof(qweight[0]));
475e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
476e0582403Sabdelfattah83   magma_setvector(nqpts, sizeof(qweight[0]), qweight, 1, impl->dqweight, 1,
477e0582403Sabdelfattah83                   data->queue);
478868539c2SNatalie Beams 
479e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4807f5b9731SStan Tomov }
481