xref: /libCEED/rust/libceed-sys/c-src/backends/magma/ceed-magma-basis.c (revision f71aa81bd7d2e9c6555cba4570cf145ac8d1aa26)
17f5b9731SStan Tomov // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
27f5b9731SStan Tomov // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
37f5b9731SStan Tomov // All Rights reserved. See files LICENSE and NOTICE for details.
47f5b9731SStan Tomov //
57f5b9731SStan Tomov // This file is part of CEED, a collection of benchmarks, miniapps, software
67f5b9731SStan Tomov // libraries and APIs for efficient high-order finite element and spectral
77f5b9731SStan Tomov // element discretizations for exascale applications. For more information and
87f5b9731SStan Tomov // source code availability see http://github.com/ceed.
97f5b9731SStan Tomov //
107f5b9731SStan Tomov // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
117f5b9731SStan Tomov // a collaborative effort of two U.S. Department of Energy organizations (Office
127f5b9731SStan Tomov // of Science and the National Nuclear Security Administration) responsible for
137f5b9731SStan Tomov // the planning and preparation of a capable exascale ecosystem, including
147f5b9731SStan Tomov // software, applications, hardware, advanced system engineering and early
157f5b9731SStan Tomov // testbed platforms, in support of the nation's exascale computing imperative.
167f5b9731SStan Tomov 
17ec3da8bcSJed Brown #include <ceed/ceed.h>
18ec3da8bcSJed Brown #include <ceed/backend.h>
197f5b9731SStan Tomov #include "ceed-magma.h"
207f5b9731SStan Tomov 
217f5b9731SStan Tomov #ifdef __cplusplus
227f5b9731SStan Tomov CEED_INTERN "C"
237f5b9731SStan Tomov #endif
247f5b9731SStan Tomov int CeedBasisApply_Magma(CeedBasis basis, CeedInt nelem,
257f5b9731SStan Tomov                          CeedTransposeMode tmode, CeedEvalMode emode,
263513a710Sjeremylt                          CeedVector U, CeedVector V) {
277f5b9731SStan Tomov   int ierr;
287f5b9731SStan Tomov   Ceed ceed;
29e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
30e0582403Sabdelfattah83   CeedInt dim, ncomp, ndof;
31e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
32e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
33e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr);
34e0582403Sabdelfattah83 
35e0582403Sabdelfattah83   Ceed_Magma *data;
36e15f9bd0SJeremy L Thompson   ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
37e0582403Sabdelfattah83 
387f5b9731SStan Tomov   const CeedScalar *u;
397f5b9731SStan Tomov   CeedScalar *v;
40868539c2SNatalie Beams   if (emode != CEED_EVAL_WEIGHT) {
41e15f9bd0SJeremy L Thompson     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &u); CeedChkBackend(ierr);
427f5b9731SStan Tomov   } else if (emode != CEED_EVAL_WEIGHT) {
437f5b9731SStan Tomov     // LCOV_EXCL_START
44e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
457f5b9731SStan Tomov                      "An input vector is required for this CeedEvalMode");
467f5b9731SStan Tomov     // LCOV_EXCL_STOP
477f5b9731SStan Tomov   }
489c774eddSJeremy L Thompson   ierr = CeedVectorGetArrayWrite(V, CEED_MEM_DEVICE, &v); CeedChkBackend(ierr);
497f5b9731SStan Tomov 
507f5b9731SStan Tomov   CeedBasis_Magma *impl;
51e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr);
527f5b9731SStan Tomov 
537f5b9731SStan Tomov   CeedInt P1d, Q1d;
54e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChkBackend(ierr);
55e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChkBackend(ierr);
567f5b9731SStan Tomov 
573f21f6b1SJeremy L Thompson   CeedDebug(ceed, "\033[01m[CeedBasisApply_Magma] vsize=%d, comp = %d",
587f5b9731SStan Tomov             ncomp*CeedIntPow(P1d, dim), ncomp);
597f5b9731SStan Tomov 
607f5b9731SStan Tomov   if (tmode == CEED_TRANSPOSE) {
617f5b9731SStan Tomov     CeedInt length;
62e15f9bd0SJeremy L Thompson     ierr = CeedVectorGetLength(V, &length); CeedChkBackend(ierr);
6380a9ef05SNatalie Beams     if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) {
6480a9ef05SNatalie Beams       magmablas_slaset(MagmaFull, length, 1, 0., 0., (float *) v, length,
6580a9ef05SNatalie Beams                        data->queue);
6680a9ef05SNatalie Beams     } else {
6780a9ef05SNatalie Beams       magmablas_dlaset(MagmaFull, length, 1, 0., 0., (double *) v, length,
6880a9ef05SNatalie Beams                        data->queue);
6980a9ef05SNatalie Beams     }
70e0582403Sabdelfattah83     ceed_magma_queue_sync( data->queue );
717f5b9731SStan Tomov   }
723513a710Sjeremylt   switch (emode) {
733513a710Sjeremylt   case CEED_EVAL_INTERP: {
747f5b9731SStan Tomov     CeedInt P = P1d, Q = Q1d;
757f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
767f5b9731SStan Tomov       P = Q1d; Q = P1d;
777f5b9731SStan Tomov     }
787f5b9731SStan Tomov 
797f5b9731SStan Tomov     // Define element sizes for dofs/quad
807f5b9731SStan Tomov     CeedInt elquadsize = CeedIntPow(Q1d, dim);
817f5b9731SStan Tomov     CeedInt eldofssize = CeedIntPow(P1d, dim);
827f5b9731SStan Tomov 
837f5b9731SStan Tomov     // E-vector ordering -------------- Q-vector ordering
84868539c2SNatalie Beams     //  component                        component
85868539c2SNatalie Beams     //    elem                             elem
867f5b9731SStan Tomov     //       node                            node
877f5b9731SStan Tomov 
887f5b9731SStan Tomov     // ---  Define strides for NOTRANSPOSE mode: ---
897f5b9731SStan Tomov     // Input (u) is E-vector, output (v) is Q-vector
907f5b9731SStan Tomov 
917f5b9731SStan Tomov     // Element strides
92868539c2SNatalie Beams     CeedInt u_elstride = eldofssize;
937f5b9731SStan Tomov     CeedInt v_elstride = elquadsize;
947f5b9731SStan Tomov     // Component strides
95868539c2SNatalie Beams     CeedInt u_compstride = nelem * eldofssize;
967f5b9731SStan Tomov     CeedInt v_compstride = nelem * elquadsize;
977f5b9731SStan Tomov 
987f5b9731SStan Tomov     // ---  Swap strides for TRANSPOSE mode: ---
997f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
1007f5b9731SStan Tomov       // Input (u) is Q-vector, output (v) is E-vector
1017f5b9731SStan Tomov       // Element strides
102868539c2SNatalie Beams       v_elstride = eldofssize;
1037f5b9731SStan Tomov       u_elstride = elquadsize;
1047f5b9731SStan Tomov       // Component strides
105868539c2SNatalie Beams       v_compstride = nelem * eldofssize;
1067f5b9731SStan Tomov       u_compstride = nelem * elquadsize;
1077f5b9731SStan Tomov     }
1087f5b9731SStan Tomov 
109e0582403Sabdelfattah83     ierr = magma_interp(P, Q, dim, ncomp,
1107f5b9731SStan Tomov                         impl->dinterp1d, tmode,
111868539c2SNatalie Beams                         u, u_elstride, u_compstride,
112868539c2SNatalie Beams                         v, v_elstride, v_compstride,
113*f71aa81bSnbeams                         nelem, data->basis_kernel_mode,
114e0582403Sabdelfattah83                         data->queue);
11542e449dbSjeremylt     if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND,
116e0582403Sabdelfattah83                                       "MAGMA: launch failure detected for magma_interp");
1177f5b9731SStan Tomov   }
1183513a710Sjeremylt   break;
1193513a710Sjeremylt   case CEED_EVAL_GRAD: {
1207f5b9731SStan Tomov     CeedInt P = P1d, Q = Q1d;
1217f5b9731SStan Tomov     // In CEED_NOTRANSPOSE mode:
1227f5b9731SStan Tomov     // u is (P^dim x nc), column-major layout (nc = ncomp)
1237f5b9731SStan Tomov     // v is (Q^dim x nc x dim), column-major layout (nc = ncomp)
1247f5b9731SStan Tomov     // In CEED_TRANSPOSE mode, the sizes of u and v are switched.
1257f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
1267f5b9731SStan Tomov       P = Q1d, Q = P1d;
1277f5b9731SStan Tomov     }
1287f5b9731SStan Tomov 
1297f5b9731SStan Tomov     // Define element sizes for dofs/quad
1307f5b9731SStan Tomov     CeedInt elquadsize = CeedIntPow(Q1d, dim);
1317f5b9731SStan Tomov     CeedInt eldofssize = CeedIntPow(P1d, dim);
1327f5b9731SStan Tomov 
1337f5b9731SStan Tomov     // E-vector ordering -------------- Q-vector ordering
1347f5b9731SStan Tomov     //                                  dim
135868539c2SNatalie Beams     //  component                        component
136868539c2SNatalie Beams     //    elem                              elem
1377f5b9731SStan Tomov     //       node                            node
1387f5b9731SStan Tomov 
1397f5b9731SStan Tomov     // ---  Define strides for NOTRANSPOSE mode: ---
1407f5b9731SStan Tomov     // Input (u) is E-vector, output (v) is Q-vector
1417f5b9731SStan Tomov 
1427f5b9731SStan Tomov     // Element strides
143868539c2SNatalie Beams     CeedInt u_elstride = eldofssize;
1447f5b9731SStan Tomov     CeedInt v_elstride = elquadsize;
1457f5b9731SStan Tomov     // Component strides
146868539c2SNatalie Beams     CeedInt u_compstride = nelem * eldofssize;
1477f5b9731SStan Tomov     CeedInt v_compstride = nelem * elquadsize;
1487f5b9731SStan Tomov     // Dimension strides
1497f5b9731SStan Tomov     CeedInt u_dimstride = 0;
1507f5b9731SStan Tomov     CeedInt v_dimstride = nelem * elquadsize * ncomp;
1517f5b9731SStan Tomov 
1527f5b9731SStan Tomov     // ---  Swap strides for TRANSPOSE mode: ---
1537f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE) {
1547f5b9731SStan Tomov       // Input (u) is Q-vector, output (v) is E-vector
1557f5b9731SStan Tomov       // Element strides
156868539c2SNatalie Beams       v_elstride = eldofssize;
1577f5b9731SStan Tomov       u_elstride = elquadsize;
1587f5b9731SStan Tomov       // Component strides
159868539c2SNatalie Beams       v_compstride = nelem * eldofssize;
1607f5b9731SStan Tomov       u_compstride = nelem * elquadsize;
1617f5b9731SStan Tomov       // Dimension strides
1627f5b9731SStan Tomov       v_dimstride = 0;
1637f5b9731SStan Tomov       u_dimstride = nelem * elquadsize * ncomp;
1647f5b9731SStan Tomov 
1657f5b9731SStan Tomov     }
1667f5b9731SStan Tomov 
167e0582403Sabdelfattah83     ierr = magma_grad( P, Q, dim, ncomp,
1687f5b9731SStan Tomov                        impl->dinterp1d, impl->dgrad1d, tmode,
169e0582403Sabdelfattah83                        u, u_elstride, u_compstride, u_dimstride,
170e0582403Sabdelfattah83                        v, v_elstride, v_compstride, v_dimstride,
171*f71aa81bSnbeams                        nelem, data->basis_kernel_mode,
172e0582403Sabdelfattah83                        data->queue);
17342e449dbSjeremylt     if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND,
174e0582403Sabdelfattah83                                       "MAGMA: launch failure detected for magma_grad");
1757f5b9731SStan Tomov   }
1763513a710Sjeremylt   break;
1773513a710Sjeremylt   case CEED_EVAL_WEIGHT: {
1787f5b9731SStan Tomov     if (tmode == CEED_TRANSPOSE)
1797f5b9731SStan Tomov       // LCOV_EXCL_START
180e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
1817f5b9731SStan Tomov                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
1827f5b9731SStan Tomov     // LCOV_EXCL_STOP
1837f5b9731SStan Tomov     CeedInt Q = Q1d;
1847f5b9731SStan Tomov     int eldofssize = CeedIntPow(Q, dim);
185e0582403Sabdelfattah83     ierr = magma_weight(Q, dim, impl->dqweight1d, v, eldofssize, nelem,
186*f71aa81bSnbeams                         data->basis_kernel_mode, data->queue);
18742e449dbSjeremylt     if (ierr != 0) return CeedError(ceed, CEED_ERROR_BACKEND,
188e0582403Sabdelfattah83                                       "MAGMA: launch failure detected for magma_weight");
1897f5b9731SStan Tomov   }
1903513a710Sjeremylt   break;
1913513a710Sjeremylt   // LCOV_EXCL_START
1923513a710Sjeremylt   case CEED_EVAL_DIV:
193e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported");
1943513a710Sjeremylt   case CEED_EVAL_CURL:
195e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported");
1963513a710Sjeremylt   case CEED_EVAL_NONE:
197e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
1983513a710Sjeremylt                      "CEED_EVAL_NONE does not make sense in this context");
1993513a710Sjeremylt     // LCOV_EXCL_STOP
2003513a710Sjeremylt   }
2017f5b9731SStan Tomov 
202e0582403Sabdelfattah83   // must sync to ensure completeness
203e0582403Sabdelfattah83   ceed_magma_queue_sync( data->queue );
204e0582403Sabdelfattah83 
2057f5b9731SStan Tomov   if (emode!=CEED_EVAL_WEIGHT) {
206e15f9bd0SJeremy L Thompson     ierr = CeedVectorRestoreArrayRead(U, &u); CeedChkBackend(ierr);
2077f5b9731SStan Tomov   }
208e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArray(V, &v); CeedChkBackend(ierr);
209e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2107f5b9731SStan Tomov }
2117f5b9731SStan Tomov 
2127f5b9731SStan Tomov #ifdef __cplusplus
2137f5b9731SStan Tomov CEED_INTERN "C"
2147f5b9731SStan Tomov #endif
21580a9ef05SNatalie Beams int CeedBasisApplyNonTensor_f64_Magma(CeedBasis basis, CeedInt nelem,
216868539c2SNatalie Beams                                       CeedTransposeMode tmode, CeedEvalMode emode,
217868539c2SNatalie Beams                                       CeedVector U, CeedVector V) {
218868539c2SNatalie Beams   int ierr;
219868539c2SNatalie Beams   Ceed ceed;
220e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
221e0582403Sabdelfattah83 
222e0582403Sabdelfattah83   Ceed_Magma *data;
223e15f9bd0SJeremy L Thompson   ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
224e0582403Sabdelfattah83 
225868539c2SNatalie Beams   CeedInt dim, ncomp, ndof, nqpt;
226e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
227e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
228e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr);
229e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChkBackend(ierr);
230868539c2SNatalie Beams   const CeedScalar *du;
231868539c2SNatalie Beams   CeedScalar *dv;
232868539c2SNatalie Beams   if (emode != CEED_EVAL_WEIGHT) {
233e15f9bd0SJeremy L Thompson     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &du); CeedChkBackend(ierr);
234868539c2SNatalie Beams   } else if (emode != CEED_EVAL_WEIGHT) {
235868539c2SNatalie Beams     // LCOV_EXCL_START
236e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
237868539c2SNatalie Beams                      "An input vector is required for this CeedEvalMode");
238868539c2SNatalie Beams     // LCOV_EXCL_STOP
239868539c2SNatalie Beams   }
2409c774eddSJeremy L Thompson   ierr = CeedVectorGetArrayWrite(V, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr);
241868539c2SNatalie Beams 
242868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
243e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr);
244868539c2SNatalie Beams 
2453f21f6b1SJeremy L Thompson   CeedDebug(ceed, "\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d",
246868539c2SNatalie Beams             ncomp*ndof, ncomp);
247868539c2SNatalie Beams 
248868539c2SNatalie Beams   if (tmode == CEED_TRANSPOSE) {
249868539c2SNatalie Beams     CeedInt length;
250868539c2SNatalie Beams     ierr = CeedVectorGetLength(V, &length);
25180a9ef05SNatalie Beams     if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) {
25280a9ef05SNatalie Beams       magmablas_slaset(MagmaFull, length, 1, 0., 0., (float *) dv, length,
25380a9ef05SNatalie Beams                        data->queue);
25480a9ef05SNatalie Beams     } else {
25580a9ef05SNatalie Beams       magmablas_dlaset(MagmaFull, length, 1, 0., 0., (double *) dv, length,
25680a9ef05SNatalie Beams                        data->queue);
25780a9ef05SNatalie Beams     }
258e0582403Sabdelfattah83     ceed_magma_queue_sync( data->queue );
259868539c2SNatalie Beams   }
26080a9ef05SNatalie Beams 
261868539c2SNatalie Beams   switch (emode) {
262868539c2SNatalie Beams   case CEED_EVAL_INTERP: {
263868539c2SNatalie Beams     CeedInt P = ndof, Q = nqpt;
264868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE)
265e0582403Sabdelfattah83       magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans,
266868539c2SNatalie Beams                             P, nelem*ncomp, Q,
26780a9ef05SNatalie Beams                             1.0, (double *)impl->dinterp, P,
26880a9ef05SNatalie Beams                             (double *)du, Q,
26980a9ef05SNatalie Beams                             0.0, (double *)dv, P, data->queue);
270868539c2SNatalie Beams     else
271e0582403Sabdelfattah83       magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans,
272868539c2SNatalie Beams                             Q, nelem*ncomp, P,
27380a9ef05SNatalie Beams                             1.0, (double *)impl->dinterp, P,
27480a9ef05SNatalie Beams                             (double *)du, P,
27580a9ef05SNatalie Beams                             0.0, (double *)dv, Q, data->queue);
276868539c2SNatalie Beams   }
277868539c2SNatalie Beams   break;
278868539c2SNatalie Beams 
279868539c2SNatalie Beams   case CEED_EVAL_GRAD: {
280868539c2SNatalie Beams     CeedInt P = ndof, Q = nqpt;
281868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE) {
28280a9ef05SNatalie Beams       CeedScalar beta = 0.0;
283868539c2SNatalie Beams       for(int d=0; d<dim; d++) {
284868539c2SNatalie Beams         if (d>0)
285868539c2SNatalie Beams           beta = 1.0;
286e0582403Sabdelfattah83         magma_dgemm_nontensor(MagmaNoTrans, MagmaNoTrans,
287868539c2SNatalie Beams                               P, nelem*ncomp, Q,
28880a9ef05SNatalie Beams                               1.0, (double *)(impl->dgrad + d*P*Q), P,
28980a9ef05SNatalie Beams                               (double *)(du + d*nelem*ncomp*Q), Q,
29080a9ef05SNatalie Beams                               beta, (double *)dv, P, data->queue);
291868539c2SNatalie Beams       }
292868539c2SNatalie Beams     } else {
293868539c2SNatalie Beams       for(int d=0; d< dim; d++)
294e0582403Sabdelfattah83         magma_dgemm_nontensor(MagmaTrans, MagmaNoTrans,
295868539c2SNatalie Beams                               Q, nelem*ncomp, P,
29680a9ef05SNatalie Beams                               1.0, (double *)(impl->dgrad + d*P*Q), P,
29780a9ef05SNatalie Beams                               (double *)du, P,
29880a9ef05SNatalie Beams                               0.0, (double *)(dv + d*nelem*ncomp*Q), Q, data->queue);
29980a9ef05SNatalie Beams     }
30080a9ef05SNatalie Beams   }
30180a9ef05SNatalie Beams   break;
30280a9ef05SNatalie Beams 
30380a9ef05SNatalie Beams   case CEED_EVAL_WEIGHT: {
30480a9ef05SNatalie Beams     if (tmode == CEED_TRANSPOSE)
30580a9ef05SNatalie Beams       // LCOV_EXCL_START
30680a9ef05SNatalie Beams       return CeedError(ceed, CEED_ERROR_BACKEND,
30780a9ef05SNatalie Beams                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
30880a9ef05SNatalie Beams     // LCOV_EXCL_STOP
30980a9ef05SNatalie Beams 
31080a9ef05SNatalie Beams     int elemsPerBlock = 1;//basis->Q1d < 7 ? optElems[basis->Q1d] : 1;
31180a9ef05SNatalie Beams     int grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)?
31280a9ef05SNatalie Beams                                        1 : 0 );
31380a9ef05SNatalie Beams     magma_weight_nontensor(grid, nqpt, nelem, nqpt, impl->dqweight, dv,
31480a9ef05SNatalie Beams                            data->queue);
31580a9ef05SNatalie Beams     CeedChkBackend(ierr);
31680a9ef05SNatalie Beams   }
31780a9ef05SNatalie Beams   break;
31880a9ef05SNatalie Beams 
31980a9ef05SNatalie Beams   // LCOV_EXCL_START
32080a9ef05SNatalie Beams   case CEED_EVAL_DIV:
32180a9ef05SNatalie Beams     return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported");
32280a9ef05SNatalie Beams   case CEED_EVAL_CURL:
32380a9ef05SNatalie Beams     return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported");
32480a9ef05SNatalie Beams   case CEED_EVAL_NONE:
32580a9ef05SNatalie Beams     return CeedError(ceed, CEED_ERROR_BACKEND,
32680a9ef05SNatalie Beams                      "CEED_EVAL_NONE does not make sense in this context");
32780a9ef05SNatalie Beams     // LCOV_EXCL_STOP
32880a9ef05SNatalie Beams   }
32980a9ef05SNatalie Beams 
33080a9ef05SNatalie Beams   // must sync to ensure completeness
33180a9ef05SNatalie Beams   ceed_magma_queue_sync( data->queue );
33280a9ef05SNatalie Beams 
33380a9ef05SNatalie Beams   if (emode!=CEED_EVAL_WEIGHT) {
33480a9ef05SNatalie Beams     ierr = CeedVectorRestoreArrayRead(U, &du); CeedChkBackend(ierr);
33580a9ef05SNatalie Beams   }
33680a9ef05SNatalie Beams   ierr = CeedVectorRestoreArray(V, &dv); CeedChkBackend(ierr);
33780a9ef05SNatalie Beams   return CEED_ERROR_SUCCESS;
33880a9ef05SNatalie Beams }
33980a9ef05SNatalie Beams 
34080a9ef05SNatalie Beams int CeedBasisApplyNonTensor_f32_Magma(CeedBasis basis, CeedInt nelem,
34180a9ef05SNatalie Beams                                       CeedTransposeMode tmode, CeedEvalMode emode,
34280a9ef05SNatalie Beams                                       CeedVector U, CeedVector V) {
34380a9ef05SNatalie Beams   int ierr;
34480a9ef05SNatalie Beams   Ceed ceed;
34580a9ef05SNatalie Beams   ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
34680a9ef05SNatalie Beams 
34780a9ef05SNatalie Beams   Ceed_Magma *data;
34880a9ef05SNatalie Beams   ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
34980a9ef05SNatalie Beams 
35080a9ef05SNatalie Beams   CeedInt dim, ncomp, ndof, nqpt;
35180a9ef05SNatalie Beams   ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
35280a9ef05SNatalie Beams   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
35380a9ef05SNatalie Beams   ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChkBackend(ierr);
35480a9ef05SNatalie Beams   ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChkBackend(ierr);
35580a9ef05SNatalie Beams   const CeedScalar *du;
35680a9ef05SNatalie Beams   CeedScalar *dv;
35780a9ef05SNatalie Beams   if (emode != CEED_EVAL_WEIGHT) {
35880a9ef05SNatalie Beams     ierr = CeedVectorGetArrayRead(U, CEED_MEM_DEVICE, &du); CeedChkBackend(ierr);
35980a9ef05SNatalie Beams   } else if (emode != CEED_EVAL_WEIGHT) {
36080a9ef05SNatalie Beams     // LCOV_EXCL_START
36180a9ef05SNatalie Beams     return CeedError(ceed, CEED_ERROR_BACKEND,
36280a9ef05SNatalie Beams                      "An input vector is required for this CeedEvalMode");
36380a9ef05SNatalie Beams     // LCOV_EXCL_STOP
36480a9ef05SNatalie Beams   }
3659c774eddSJeremy L Thompson   ierr = CeedVectorGetArrayWrite(V, CEED_MEM_DEVICE, &dv); CeedChkBackend(ierr);
36680a9ef05SNatalie Beams 
36780a9ef05SNatalie Beams   CeedBasisNonTensor_Magma *impl;
36880a9ef05SNatalie Beams   ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr);
36980a9ef05SNatalie Beams 
3703f21f6b1SJeremy L Thompson   CeedDebug(ceed, "\033[01m[CeedBasisApplyNonTensor_Magma] vsize=%d, comp = %d",
37180a9ef05SNatalie Beams             ncomp*ndof, ncomp);
37280a9ef05SNatalie Beams 
37380a9ef05SNatalie Beams   if (tmode == CEED_TRANSPOSE) {
37480a9ef05SNatalie Beams     CeedInt length;
37580a9ef05SNatalie Beams     ierr = CeedVectorGetLength(V, &length);
37680a9ef05SNatalie Beams     if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) {
37780a9ef05SNatalie Beams       magmablas_slaset(MagmaFull, length, 1, 0., 0., (float *) dv, length,
37880a9ef05SNatalie Beams                        data->queue);
37980a9ef05SNatalie Beams     } else {
38080a9ef05SNatalie Beams       magmablas_dlaset(MagmaFull, length, 1, 0., 0., (double *) dv, length,
38180a9ef05SNatalie Beams                        data->queue);
38280a9ef05SNatalie Beams     }
38380a9ef05SNatalie Beams     ceed_magma_queue_sync( data->queue );
38480a9ef05SNatalie Beams   }
38580a9ef05SNatalie Beams 
38680a9ef05SNatalie Beams   switch (emode) {
38780a9ef05SNatalie Beams   case CEED_EVAL_INTERP: {
38880a9ef05SNatalie Beams     CeedInt P = ndof, Q = nqpt;
38980a9ef05SNatalie Beams     if (tmode == CEED_TRANSPOSE)
39080a9ef05SNatalie Beams       magma_sgemm_nontensor(MagmaNoTrans, MagmaNoTrans,
39180a9ef05SNatalie Beams                             P, nelem*ncomp, Q,
39280a9ef05SNatalie Beams                             1.0, (float *)impl->dinterp, P,
39380a9ef05SNatalie Beams                             (float *)du, Q,
39480a9ef05SNatalie Beams                             0.0, (float *)dv, P, data->queue);
39580a9ef05SNatalie Beams     else
39680a9ef05SNatalie Beams       magma_sgemm_nontensor(MagmaTrans, MagmaNoTrans,
39780a9ef05SNatalie Beams                             Q, nelem*ncomp, P,
39880a9ef05SNatalie Beams                             1.0, (float *)impl->dinterp, P,
39980a9ef05SNatalie Beams                             (float *)du, P,
40080a9ef05SNatalie Beams                             0.0, (float *)dv, Q, data->queue);
40180a9ef05SNatalie Beams   }
40280a9ef05SNatalie Beams   break;
40380a9ef05SNatalie Beams 
40480a9ef05SNatalie Beams   case CEED_EVAL_GRAD: {
40580a9ef05SNatalie Beams     CeedInt P = ndof, Q = nqpt;
40680a9ef05SNatalie Beams     if (tmode == CEED_TRANSPOSE) {
40780a9ef05SNatalie Beams       CeedScalar beta = 0.0;
40880a9ef05SNatalie Beams       for(int d=0; d<dim; d++) {
40980a9ef05SNatalie Beams         if (d>0)
41080a9ef05SNatalie Beams           beta = 1.0;
41180a9ef05SNatalie Beams         magma_sgemm_nontensor(MagmaNoTrans, MagmaNoTrans,
41280a9ef05SNatalie Beams                               P, nelem*ncomp, Q,
41380a9ef05SNatalie Beams                               1.0, (float *)(impl->dgrad + d*P*Q), P,
41480a9ef05SNatalie Beams                               (float *)(du + d*nelem*ncomp*Q), Q,
41580a9ef05SNatalie Beams                               beta, (float *)dv, P, data->queue);
41680a9ef05SNatalie Beams       }
41780a9ef05SNatalie Beams     } else {
41880a9ef05SNatalie Beams       for(int d=0; d< dim; d++)
41980a9ef05SNatalie Beams         magma_sgemm_nontensor(MagmaTrans, MagmaNoTrans,
42080a9ef05SNatalie Beams                               Q, nelem*ncomp, P,
42180a9ef05SNatalie Beams                               1.0, (float *)(impl->dgrad + d*P*Q), P,
42280a9ef05SNatalie Beams                               (float *)du, P,
42380a9ef05SNatalie Beams                               0.0, (float *)(dv + d*nelem*ncomp*Q), Q, data->queue);
424868539c2SNatalie Beams     }
425868539c2SNatalie Beams   }
426868539c2SNatalie Beams   break;
427868539c2SNatalie Beams 
428868539c2SNatalie Beams   case CEED_EVAL_WEIGHT: {
429868539c2SNatalie Beams     if (tmode == CEED_TRANSPOSE)
430868539c2SNatalie Beams       // LCOV_EXCL_START
431e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
432868539c2SNatalie Beams                        "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
433868539c2SNatalie Beams     // LCOV_EXCL_STOP
434868539c2SNatalie Beams 
435868539c2SNatalie Beams     int elemsPerBlock = 1;//basis->Q1d < 7 ? optElems[basis->Q1d] : 1;
436868539c2SNatalie Beams     int grid = nelem/elemsPerBlock + ( (nelem/elemsPerBlock*elemsPerBlock<nelem)?
437868539c2SNatalie Beams                                        1 : 0 );
438e0582403Sabdelfattah83     magma_weight_nontensor(grid, nqpt, nelem, nqpt, impl->dqweight, dv,
439e0582403Sabdelfattah83                            data->queue);
440e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
441868539c2SNatalie Beams   }
442868539c2SNatalie Beams   break;
443868539c2SNatalie Beams 
444868539c2SNatalie Beams   // LCOV_EXCL_START
445868539c2SNatalie Beams   case CEED_EVAL_DIV:
446e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported");
447868539c2SNatalie Beams   case CEED_EVAL_CURL:
448e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported");
449868539c2SNatalie Beams   case CEED_EVAL_NONE:
450e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
451868539c2SNatalie Beams                      "CEED_EVAL_NONE does not make sense in this context");
452868539c2SNatalie Beams     // LCOV_EXCL_STOP
453868539c2SNatalie Beams   }
454868539c2SNatalie Beams 
455e0582403Sabdelfattah83   // must sync to ensure completeness
456e0582403Sabdelfattah83   ceed_magma_queue_sync( data->queue );
457e0582403Sabdelfattah83 
458868539c2SNatalie Beams   if (emode!=CEED_EVAL_WEIGHT) {
459e15f9bd0SJeremy L Thompson     ierr = CeedVectorRestoreArrayRead(U, &du); CeedChkBackend(ierr);
460868539c2SNatalie Beams   }
461e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArray(V, &dv); CeedChkBackend(ierr);
462e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
463868539c2SNatalie Beams }
464868539c2SNatalie Beams 
465868539c2SNatalie Beams #ifdef __cplusplus
466868539c2SNatalie Beams CEED_INTERN "C"
467868539c2SNatalie Beams #endif
4683513a710Sjeremylt int CeedBasisDestroy_Magma(CeedBasis basis) {
4697f5b9731SStan Tomov   int ierr;
4707f5b9731SStan Tomov   CeedBasis_Magma *impl;
471e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr);
4727f5b9731SStan Tomov 
473e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dqref1d); CeedChkBackend(ierr);
474e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dinterp1d); CeedChkBackend(ierr);
475e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dgrad1d); CeedChkBackend(ierr);
476e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dqweight1d); CeedChkBackend(ierr);
4777f5b9731SStan Tomov 
478e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
4797f5b9731SStan Tomov 
480e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4817f5b9731SStan Tomov }
4827f5b9731SStan Tomov 
4837f5b9731SStan Tomov #ifdef __cplusplus
4847f5b9731SStan Tomov CEED_INTERN "C"
4857f5b9731SStan Tomov #endif
486868539c2SNatalie Beams int CeedBasisDestroyNonTensor_Magma(CeedBasis basis) {
487868539c2SNatalie Beams   int ierr;
488868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
489e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetData(basis, &impl); CeedChkBackend(ierr);
490868539c2SNatalie Beams 
491e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dqref); CeedChkBackend(ierr);
492e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dinterp); CeedChkBackend(ierr);
493e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dgrad); CeedChkBackend(ierr);
494e15f9bd0SJeremy L Thompson   ierr = magma_free(impl->dqweight); CeedChkBackend(ierr);
495868539c2SNatalie Beams 
496e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
497868539c2SNatalie Beams 
498e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
499868539c2SNatalie Beams }
500868539c2SNatalie Beams 
501868539c2SNatalie Beams #ifdef __cplusplus
502868539c2SNatalie Beams CEED_INTERN "C"
503868539c2SNatalie Beams #endif
5043513a710Sjeremylt int CeedBasisCreateTensorH1_Magma(CeedInt dim, CeedInt P1d, CeedInt Q1d,
5053513a710Sjeremylt                                   const CeedScalar *interp1d,
5067f5b9731SStan Tomov                                   const CeedScalar *grad1d,
5077f5b9731SStan Tomov                                   const CeedScalar *qref1d,
5083513a710Sjeremylt                                   const CeedScalar *qweight1d, CeedBasis basis) {
5097f5b9731SStan Tomov   int ierr;
5107f5b9731SStan Tomov   CeedBasis_Magma *impl;
5117f5b9731SStan Tomov   Ceed ceed;
512e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
5137f5b9731SStan Tomov 
514c9f8acf2SJeremy L Thompson   // Check for supported parameters
515c9f8acf2SJeremy L Thompson   CeedInt ncomp = 0;
516e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr);
517c9f8acf2SJeremy L Thompson   if (ncomp > 3)
518c9f8acf2SJeremy L Thompson     // LCOV_EXCL_START
519e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
520c9f8acf2SJeremy L Thompson                      "Magma backend does not support tensor bases with more than 3 components");
521c9f8acf2SJeremy L Thompson   // LCOV_EXCL_STOP
522c9f8acf2SJeremy L Thompson   if (P1d > 10)
523c9f8acf2SJeremy L Thompson     // LCOV_EXCL_START
524e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
525c9f8acf2SJeremy L Thompson                      "Magma backend does not support tensor bases with more than 10 nodes in each dimension");
526c9f8acf2SJeremy L Thompson   // LCOV_EXCL_STOP
527c9f8acf2SJeremy L Thompson   if (Q1d > 10)
528c9f8acf2SJeremy L Thompson     // LCOV_EXCL_START
529e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
530c9f8acf2SJeremy L Thompson                      "Magma backend does not support tensor bases with more than 10 quadrature points in each dimension");
531c9f8acf2SJeremy L Thompson   // LCOV_EXCL_STOP
532c9f8acf2SJeremy L Thompson 
533e0582403Sabdelfattah83   Ceed_Magma *data;
534e15f9bd0SJeremy L Thompson   ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
535e0582403Sabdelfattah83 
5367f5b9731SStan Tomov   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
537e15f9bd0SJeremy L Thompson                                 CeedBasisApply_Magma); CeedChkBackend(ierr);
5387f5b9731SStan Tomov   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
539e15f9bd0SJeremy L Thompson                                 CeedBasisDestroy_Magma); CeedChkBackend(ierr);
5407f5b9731SStan Tomov 
541e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1,&impl); CeedChkBackend(ierr);
542e15f9bd0SJeremy L Thompson   ierr = CeedBasisSetData(basis, impl); CeedChkBackend(ierr);
5437f5b9731SStan Tomov 
5447f5b9731SStan Tomov   // Copy qref1d to the GPU
5457f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dqref1d, Q1d*sizeof(qref1d[0]));
546e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
547e0582403Sabdelfattah83   magma_setvector(Q1d, sizeof(qref1d[0]), qref1d, 1, impl->dqref1d, 1,
548e0582403Sabdelfattah83                   data->queue);
5497f5b9731SStan Tomov 
5507f5b9731SStan Tomov   // Copy interp1d to the GPU
5517f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dinterp1d, Q1d*P1d*sizeof(interp1d[0]));
552e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
553e0582403Sabdelfattah83   magma_setvector(Q1d*P1d, sizeof(interp1d[0]), interp1d, 1, impl->dinterp1d, 1,
554e0582403Sabdelfattah83                   data->queue);
5557f5b9731SStan Tomov 
5567f5b9731SStan Tomov   // Copy grad1d to the GPU
5577f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dgrad1d, Q1d*P1d*sizeof(grad1d[0]));
558e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
559e0582403Sabdelfattah83   magma_setvector(Q1d*P1d, sizeof(grad1d[0]), grad1d, 1, impl->dgrad1d, 1,
560e0582403Sabdelfattah83                   data->queue);
5617f5b9731SStan Tomov 
5627f5b9731SStan Tomov   // Copy qweight1d to the GPU
5637f5b9731SStan Tomov   ierr = magma_malloc((void **)&impl->dqweight1d, Q1d*sizeof(qweight1d[0]));
564e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
565e0582403Sabdelfattah83   magma_setvector(Q1d, sizeof(qweight1d[0]), qweight1d, 1, impl->dqweight1d, 1,
566e0582403Sabdelfattah83                   data->queue);
5677f5b9731SStan Tomov 
568e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
5697f5b9731SStan Tomov }
5707f5b9731SStan Tomov 
5717f5b9731SStan Tomov #ifdef __cplusplus
5727f5b9731SStan Tomov CEED_INTERN "C"
5737f5b9731SStan Tomov #endif
5743513a710Sjeremylt int CeedBasisCreateH1_Magma(CeedElemTopology topo, CeedInt dim, CeedInt ndof,
5753513a710Sjeremylt                             CeedInt nqpts, const CeedScalar *interp,
5763513a710Sjeremylt                             const CeedScalar *grad, const CeedScalar *qref,
5773513a710Sjeremylt                             const CeedScalar *qweight, CeedBasis basis) {
5787f5b9731SStan Tomov   int ierr;
579868539c2SNatalie Beams   CeedBasisNonTensor_Magma *impl;
5807f5b9731SStan Tomov   Ceed ceed;
581e15f9bd0SJeremy L Thompson   ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
5827f5b9731SStan Tomov 
583e0582403Sabdelfattah83   Ceed_Magma *data;
584e15f9bd0SJeremy L Thompson   ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
585e0582403Sabdelfattah83 
58680a9ef05SNatalie Beams   if (CEED_SCALAR_TYPE == CEED_SCALAR_FP64) {
587868539c2SNatalie Beams     ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
58880a9ef05SNatalie Beams                                   CeedBasisApplyNonTensor_f64_Magma);
58980a9ef05SNatalie Beams     CeedChkBackend(ierr);
59080a9ef05SNatalie Beams   } else {
59180a9ef05SNatalie Beams     ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply",
59280a9ef05SNatalie Beams                                   CeedBasisApplyNonTensor_f32_Magma);
59380a9ef05SNatalie Beams     CeedChkBackend(ierr);
59480a9ef05SNatalie Beams   }
595868539c2SNatalie Beams   ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy",
596e15f9bd0SJeremy L Thompson                                 CeedBasisDestroyNonTensor_Magma); CeedChkBackend(ierr);
597868539c2SNatalie Beams 
598e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1,&impl); CeedChkBackend(ierr);
599e15f9bd0SJeremy L Thompson   ierr = CeedBasisSetData(basis, impl); CeedChkBackend(ierr);
600868539c2SNatalie Beams 
601868539c2SNatalie Beams   // Copy qref to the GPU
602868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dqref, nqpts*sizeof(qref[0]));
603e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
604e0582403Sabdelfattah83   magma_setvector(nqpts, sizeof(qref[0]), qref, 1, impl->dqref, 1, data->queue);
605868539c2SNatalie Beams 
606868539c2SNatalie Beams   // Copy interp to the GPU
607868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dinterp, nqpts*ndof*sizeof(interp[0]));
608e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
609e0582403Sabdelfattah83   magma_setvector(nqpts*ndof, sizeof(interp[0]), interp, 1, impl->dinterp, 1,
610e0582403Sabdelfattah83                   data->queue);
611868539c2SNatalie Beams 
612868539c2SNatalie Beams   // Copy grad to the GPU
613868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dgrad, nqpts*ndof*dim*sizeof(grad[0]));
614e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
615e0582403Sabdelfattah83   magma_setvector(nqpts*ndof*dim, sizeof(grad[0]), grad, 1, impl->dgrad, 1,
616e0582403Sabdelfattah83                   data->queue);
617868539c2SNatalie Beams 
618868539c2SNatalie Beams   // Copy qweight to the GPU
619868539c2SNatalie Beams   ierr = magma_malloc((void **)&impl->dqweight, nqpts*sizeof(qweight[0]));
620e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
621e0582403Sabdelfattah83   magma_setvector(nqpts, sizeof(qweight[0]), qweight, 1, impl->dqweight, 1,
622e0582403Sabdelfattah83                   data->queue);
623868539c2SNatalie Beams 
624e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
6257f5b9731SStan Tomov }
626