xref: /libCEED/rust/libceed-sys/c-src/backends/ref/ceed-ref.c (revision ae3cba8242975de624b6eb0f70528af789964ca1)
1*ae3cba82Scamierjs // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2*ae3cba82Scamierjs // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3*ae3cba82Scamierjs // reserved. See files LICENSE and NOTICE for details.
4*ae3cba82Scamierjs //
5*ae3cba82Scamierjs // This file is part of CEED, a collection of benchmarks, miniapps, software
6*ae3cba82Scamierjs // libraries and APIs for efficient high-order finite element and spectral
7*ae3cba82Scamierjs // element discretizations for exascale applications. For more information and
8*ae3cba82Scamierjs // source code availability see http://github.com/ceed.
9*ae3cba82Scamierjs //
10*ae3cba82Scamierjs // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11*ae3cba82Scamierjs // a collaborative effort of two U.S. Department of Energy organizations (Office
12*ae3cba82Scamierjs // of Science and the National Nuclear Security Administration) responsible for
13*ae3cba82Scamierjs // the planning and preparation of a capable exascale ecosystem, including
14*ae3cba82Scamierjs // software, applications, hardware, advanced system engineering and early
15*ae3cba82Scamierjs // testbed platforms, in support of the nation's exascale computing imperative.
16*ae3cba82Scamierjs 
17*ae3cba82Scamierjs #include <ceed-impl.h>
18*ae3cba82Scamierjs #include <string.h>
19*ae3cba82Scamierjs 
20*ae3cba82Scamierjs typedef struct {
21*ae3cba82Scamierjs   CeedScalar *array;
22*ae3cba82Scamierjs   CeedScalar *array_allocated;
23*ae3cba82Scamierjs } CeedVector_Ref;
24*ae3cba82Scamierjs 
25*ae3cba82Scamierjs typedef struct {
26*ae3cba82Scamierjs   const CeedInt *indices;
27*ae3cba82Scamierjs   CeedInt *indices_allocated;
28*ae3cba82Scamierjs } CeedElemRestriction_Ref;
29*ae3cba82Scamierjs 
30*ae3cba82Scamierjs typedef struct {
31*ae3cba82Scamierjs   CeedVector etmp;
32*ae3cba82Scamierjs   CeedVector qdata;
33*ae3cba82Scamierjs } CeedOperator_Ref;
34*ae3cba82Scamierjs 
35*ae3cba82Scamierjs static int CeedVectorSetArray_Ref(CeedVector vec, CeedMemType mtype,
36*ae3cba82Scamierjs                                   CeedCopyMode cmode, CeedScalar *array) {
37*ae3cba82Scamierjs   CeedVector_Ref *impl = vec->data;
38*ae3cba82Scamierjs   int ierr;
39*ae3cba82Scamierjs 
40*ae3cba82Scamierjs   if (mtype != CEED_MEM_HOST)
41*ae3cba82Scamierjs     return CeedError(vec->ceed, 1, "Only MemType = HOST supported");
42*ae3cba82Scamierjs   ierr = CeedFree(&impl->array_allocated); CeedChk(ierr);
43*ae3cba82Scamierjs   switch (cmode) {
44*ae3cba82Scamierjs   case CEED_COPY_VALUES:
45*ae3cba82Scamierjs     ierr = CeedMalloc(vec->length, &impl->array_allocated); CeedChk(ierr);
46*ae3cba82Scamierjs     impl->array = impl->array_allocated;
47*ae3cba82Scamierjs     if (array) memcpy(impl->array, array, vec->length * sizeof(array[0]));
48*ae3cba82Scamierjs     break;
49*ae3cba82Scamierjs   case CEED_OWN_POINTER:
50*ae3cba82Scamierjs     impl->array_allocated = array;
51*ae3cba82Scamierjs     impl->array = array;
52*ae3cba82Scamierjs     break;
53*ae3cba82Scamierjs   case CEED_USE_POINTER:
54*ae3cba82Scamierjs     impl->array = array;
55*ae3cba82Scamierjs   }
56*ae3cba82Scamierjs   return 0;
57*ae3cba82Scamierjs }
58*ae3cba82Scamierjs 
59*ae3cba82Scamierjs static int CeedVectorGetArray_Ref(CeedVector vec, CeedMemType mtype,
60*ae3cba82Scamierjs                                   CeedScalar **array) {
61*ae3cba82Scamierjs   CeedVector_Ref *impl = vec->data;
62*ae3cba82Scamierjs   int ierr;
63*ae3cba82Scamierjs 
64*ae3cba82Scamierjs   if (mtype != CEED_MEM_HOST)
65*ae3cba82Scamierjs     return CeedError(vec->ceed, 1, "Can only provide to HOST memory");
66*ae3cba82Scamierjs   if (!impl->array) { // Allocate if array is not yet allocated
67*ae3cba82Scamierjs     ierr = CeedVectorSetArray(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL);
68*ae3cba82Scamierjs     CeedChk(ierr);
69*ae3cba82Scamierjs   }
70*ae3cba82Scamierjs   *array = impl->array;
71*ae3cba82Scamierjs   return 0;
72*ae3cba82Scamierjs }
73*ae3cba82Scamierjs 
74*ae3cba82Scamierjs static int CeedVectorGetArrayRead_Ref(CeedVector vec, CeedMemType mtype,
75*ae3cba82Scamierjs                                       const CeedScalar **array) {
76*ae3cba82Scamierjs   CeedVector_Ref *impl = vec->data;
77*ae3cba82Scamierjs   int ierr;
78*ae3cba82Scamierjs 
79*ae3cba82Scamierjs   if (mtype != CEED_MEM_HOST)
80*ae3cba82Scamierjs     return CeedError(vec->ceed, 1, "Can only provide to HOST memory");
81*ae3cba82Scamierjs   if (!impl->array) { // Allocate if array is not yet allocated
82*ae3cba82Scamierjs     ierr = CeedVectorSetArray(vec, CEED_MEM_HOST, CEED_COPY_VALUES, NULL);
83*ae3cba82Scamierjs     CeedChk(ierr);
84*ae3cba82Scamierjs   }
85*ae3cba82Scamierjs   *array = impl->array;
86*ae3cba82Scamierjs   return 0;
87*ae3cba82Scamierjs }
88*ae3cba82Scamierjs 
89*ae3cba82Scamierjs static int CeedVectorRestoreArray_Ref(CeedVector vec, CeedScalar **array) {
90*ae3cba82Scamierjs   *array = NULL;
91*ae3cba82Scamierjs   return 0;
92*ae3cba82Scamierjs }
93*ae3cba82Scamierjs 
94*ae3cba82Scamierjs static int CeedVectorRestoreArrayRead_Ref(CeedVector vec,
95*ae3cba82Scamierjs     const CeedScalar **array) {
96*ae3cba82Scamierjs   *array = NULL;
97*ae3cba82Scamierjs   return 0;
98*ae3cba82Scamierjs }
99*ae3cba82Scamierjs 
100*ae3cba82Scamierjs static int CeedVectorDestroy_Ref(CeedVector vec) {
101*ae3cba82Scamierjs   CeedVector_Ref *impl = vec->data;
102*ae3cba82Scamierjs   int ierr;
103*ae3cba82Scamierjs 
104*ae3cba82Scamierjs   ierr = CeedFree(&impl->array_allocated); CeedChk(ierr);
105*ae3cba82Scamierjs   ierr = CeedFree(&vec->data); CeedChk(ierr);
106*ae3cba82Scamierjs   return 0;
107*ae3cba82Scamierjs }
108*ae3cba82Scamierjs 
109*ae3cba82Scamierjs static int CeedVectorCreate_Ref(Ceed ceed, CeedInt n, CeedVector vec) {
110*ae3cba82Scamierjs   CeedVector_Ref *impl;
111*ae3cba82Scamierjs   int ierr;
112*ae3cba82Scamierjs 
113*ae3cba82Scamierjs   vec->SetArray = CeedVectorSetArray_Ref;
114*ae3cba82Scamierjs   vec->GetArray = CeedVectorGetArray_Ref;
115*ae3cba82Scamierjs   vec->GetArrayRead = CeedVectorGetArrayRead_Ref;
116*ae3cba82Scamierjs   vec->RestoreArray = CeedVectorRestoreArray_Ref;
117*ae3cba82Scamierjs   vec->RestoreArrayRead = CeedVectorRestoreArrayRead_Ref;
118*ae3cba82Scamierjs   vec->Destroy = CeedVectorDestroy_Ref;
119*ae3cba82Scamierjs   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
120*ae3cba82Scamierjs   vec->data = impl;
121*ae3cba82Scamierjs   return 0;
122*ae3cba82Scamierjs }
123*ae3cba82Scamierjs 
124*ae3cba82Scamierjs static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
125*ae3cba82Scamierjs                                         CeedTransposeMode tmode, CeedVector u,
126*ae3cba82Scamierjs                                         CeedVector v, CeedRequest *request) {
127*ae3cba82Scamierjs   CeedElemRestriction_Ref *impl = r->data;
128*ae3cba82Scamierjs   int ierr;
129*ae3cba82Scamierjs   const CeedScalar *uu;
130*ae3cba82Scamierjs   CeedScalar *vv;
131*ae3cba82Scamierjs 
132*ae3cba82Scamierjs   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
133*ae3cba82Scamierjs   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
134*ae3cba82Scamierjs   if (tmode == CEED_NOTRANSPOSE) {
135*ae3cba82Scamierjs     for (CeedInt i=0; i<r->nelem*r->elemsize; i++) vv[i] = uu[impl->indices[i]];
136*ae3cba82Scamierjs   } else {
137*ae3cba82Scamierjs     for (CeedInt i=0; i<r->nelem*r->elemsize; i++) vv[impl->indices[i]] += uu[i];
138*ae3cba82Scamierjs   }
139*ae3cba82Scamierjs   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
140*ae3cba82Scamierjs   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
141*ae3cba82Scamierjs   if (request != CEED_REQUEST_IMMEDIATE) *request = NULL;
142*ae3cba82Scamierjs   return 0;
143*ae3cba82Scamierjs }
144*ae3cba82Scamierjs 
145*ae3cba82Scamierjs static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
146*ae3cba82Scamierjs   CeedElemRestriction_Ref *impl = r->data;
147*ae3cba82Scamierjs   int ierr;
148*ae3cba82Scamierjs 
149*ae3cba82Scamierjs   ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr);
150*ae3cba82Scamierjs   ierr = CeedFree(&r->data); CeedChk(ierr);
151*ae3cba82Scamierjs   return 0;
152*ae3cba82Scamierjs }
153*ae3cba82Scamierjs 
154*ae3cba82Scamierjs static int CeedElemRestrictionCreate_Ref(CeedElemRestriction r,
155*ae3cba82Scamierjs     CeedMemType mtype,
156*ae3cba82Scamierjs     CeedCopyMode cmode, const CeedInt *indices) {
157*ae3cba82Scamierjs   int ierr;
158*ae3cba82Scamierjs   CeedElemRestriction_Ref *impl;
159*ae3cba82Scamierjs 
160*ae3cba82Scamierjs   if (mtype != CEED_MEM_HOST)
161*ae3cba82Scamierjs     return CeedError(r->ceed, 1, "Only MemType = HOST supported");
162*ae3cba82Scamierjs   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
163*ae3cba82Scamierjs   switch (cmode) {
164*ae3cba82Scamierjs   case CEED_COPY_VALUES:
165*ae3cba82Scamierjs     ierr = CeedMalloc(r->nelem*r->elemsize, &impl->indices_allocated);
166*ae3cba82Scamierjs     CeedChk(ierr);
167*ae3cba82Scamierjs     memcpy(impl->indices_allocated, indices,
168*ae3cba82Scamierjs            r->nelem * r->elemsize * sizeof(indices[0]));
169*ae3cba82Scamierjs     impl->indices = impl->indices_allocated;
170*ae3cba82Scamierjs     break;
171*ae3cba82Scamierjs   case CEED_OWN_POINTER:
172*ae3cba82Scamierjs     impl->indices_allocated = (CeedInt*)indices;
173*ae3cba82Scamierjs     impl->indices = impl->indices_allocated;
174*ae3cba82Scamierjs     break;
175*ae3cba82Scamierjs   case CEED_USE_POINTER:
176*ae3cba82Scamierjs     impl->indices = indices;
177*ae3cba82Scamierjs   }
178*ae3cba82Scamierjs   r->data = impl;
179*ae3cba82Scamierjs   r->Apply = CeedElemRestrictionApply_Ref;
180*ae3cba82Scamierjs   r->Destroy = CeedElemRestrictionDestroy_Ref;
181*ae3cba82Scamierjs   return 0;
182*ae3cba82Scamierjs }
183*ae3cba82Scamierjs 
184*ae3cba82Scamierjs // Contracts on the middle index
185*ae3cba82Scamierjs // NOTRANSPOSE: V_ajc = T_jb U_abc
186*ae3cba82Scamierjs // TRANSPOSE:   V_ajc = T_bj U_abc
187*ae3cba82Scamierjs static int CeedTensorContract_Ref(Ceed ceed,
188*ae3cba82Scamierjs                                   CeedInt A, CeedInt B, CeedInt C, CeedInt J,
189*ae3cba82Scamierjs                                   const CeedScalar *t, CeedTransposeMode tmode,
190*ae3cba82Scamierjs                                   const CeedScalar *u, CeedScalar *v) {
191*ae3cba82Scamierjs   CeedInt tstride0 = B, tstride1 = 1;
192*ae3cba82Scamierjs   if (tmode == CEED_TRANSPOSE) {
193*ae3cba82Scamierjs     tstride0 = 1; tstride1 = J;
194*ae3cba82Scamierjs   }
195*ae3cba82Scamierjs 
196*ae3cba82Scamierjs   for (CeedInt a=0; a<A; a++) {
197*ae3cba82Scamierjs     for (CeedInt j=0; j<J; j++) {
198*ae3cba82Scamierjs       for (CeedInt c=0; c<C; c++)
199*ae3cba82Scamierjs         v[(a*J+j)*C+c] = 0;
200*ae3cba82Scamierjs       for (CeedInt b=0; b<B; b++) {
201*ae3cba82Scamierjs         for (CeedInt c=0; c<C; c++) {
202*ae3cba82Scamierjs           v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c];
203*ae3cba82Scamierjs         }
204*ae3cba82Scamierjs       }
205*ae3cba82Scamierjs     }
206*ae3cba82Scamierjs   }
207*ae3cba82Scamierjs   return 0;
208*ae3cba82Scamierjs }
209*ae3cba82Scamierjs 
210*ae3cba82Scamierjs static int CeedBasisApply_Ref(CeedBasis basis, CeedTransposeMode tmode,
211*ae3cba82Scamierjs                               CeedEvalMode emode,
212*ae3cba82Scamierjs                               const CeedScalar *u, CeedScalar *v) {
213*ae3cba82Scamierjs   int ierr;
214*ae3cba82Scamierjs   const CeedInt dim = basis->dim;
215*ae3cba82Scamierjs   const CeedInt ndof = basis->ndof;
216*ae3cba82Scamierjs 
217*ae3cba82Scamierjs   switch (emode) {
218*ae3cba82Scamierjs   case CEED_EVAL_NONE: break;
219*ae3cba82Scamierjs   case CEED_EVAL_INTERP: {
220*ae3cba82Scamierjs     CeedInt P = basis->P1d, Q = basis->Q1d;
221*ae3cba82Scamierjs     if (tmode == CEED_TRANSPOSE) {
222*ae3cba82Scamierjs       P = basis->Q1d; Q = basis->P1d;
223*ae3cba82Scamierjs     }
224*ae3cba82Scamierjs     CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1;
225*ae3cba82Scamierjs     CeedScalar tmp[2][Q*CeedPowInt(P>Q?P:Q, dim-1)];
226*ae3cba82Scamierjs     for (CeedInt d=0; d<dim; d++) {
227*ae3cba82Scamierjs       ierr = CeedTensorContract_Ref(basis->ceed, pre, P, post, Q, basis->interp1d,
228*ae3cba82Scamierjs                                     tmode,
229*ae3cba82Scamierjs                                     d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); CeedChk(ierr);
230*ae3cba82Scamierjs       pre /= P;
231*ae3cba82Scamierjs       post *= Q;
232*ae3cba82Scamierjs     }
233*ae3cba82Scamierjs   } break;
234*ae3cba82Scamierjs   case CEED_EVAL_WEIGHT: {
235*ae3cba82Scamierjs     if (tmode == CEED_TRANSPOSE)
236*ae3cba82Scamierjs       return CeedError(basis->ceed, 1, "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE");
237*ae3cba82Scamierjs     CeedInt Q = basis->Q1d;
238*ae3cba82Scamierjs     for (CeedInt d=0; d<dim; d++) {
239*ae3cba82Scamierjs       CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d);
240*ae3cba82Scamierjs       for (CeedInt i=0; i<pre; i++) {
241*ae3cba82Scamierjs         for (CeedInt j=0; j<Q; j++) {
242*ae3cba82Scamierjs           for (CeedInt k=0; k<post; k++) {
243*ae3cba82Scamierjs             v[(i*Q + j)*post + k] = basis->qweight1d[j]
244*ae3cba82Scamierjs               * (d == 0 ? 1 : v[(i*Q + j)*post + k]);
245*ae3cba82Scamierjs           }
246*ae3cba82Scamierjs         }
247*ae3cba82Scamierjs       }
248*ae3cba82Scamierjs     }
249*ae3cba82Scamierjs   } break;
250*ae3cba82Scamierjs   default:
251*ae3cba82Scamierjs     return CeedError(basis->ceed, 1, "EvalMode %d not supported", emode);
252*ae3cba82Scamierjs   }
253*ae3cba82Scamierjs   return 0;
254*ae3cba82Scamierjs }
255*ae3cba82Scamierjs 
256*ae3cba82Scamierjs static int CeedBasisDestroy_Ref(CeedBasis basis) {
257*ae3cba82Scamierjs   return 0;
258*ae3cba82Scamierjs }
259*ae3cba82Scamierjs 
260*ae3cba82Scamierjs static int CeedBasisCreateTensorH1_Ref(Ceed ceed, CeedInt dim, CeedInt P1d,
261*ae3cba82Scamierjs                                        CeedInt Q1d, const CeedScalar *interp1d,
262*ae3cba82Scamierjs                                        const CeedScalar *grad1d,
263*ae3cba82Scamierjs                                        const CeedScalar *qref1d,
264*ae3cba82Scamierjs                                        const CeedScalar *qweight1d,
265*ae3cba82Scamierjs                                        CeedBasis basis) {
266*ae3cba82Scamierjs   basis->Apply = CeedBasisApply_Ref;
267*ae3cba82Scamierjs   basis->Destroy = CeedBasisDestroy_Ref;
268*ae3cba82Scamierjs   return 0;
269*ae3cba82Scamierjs }
270*ae3cba82Scamierjs 
271*ae3cba82Scamierjs static int CeedQFunctionApply_Ref(CeedQFunction qf, void *qdata, CeedInt Q,
272*ae3cba82Scamierjs                                   const CeedScalar *const *u,
273*ae3cba82Scamierjs                                   CeedScalar *const *v) {
274*ae3cba82Scamierjs   int ierr;
275*ae3cba82Scamierjs   ierr = qf->function(qf->ctx, qdata, Q, u, v); CeedChk(ierr);
276*ae3cba82Scamierjs   return 0;
277*ae3cba82Scamierjs }
278*ae3cba82Scamierjs 
279*ae3cba82Scamierjs static int CeedQFunctionDestroy_Ref(CeedQFunction qf) {
280*ae3cba82Scamierjs   return 0;
281*ae3cba82Scamierjs }
282*ae3cba82Scamierjs 
283*ae3cba82Scamierjs static int CeedQFunctionCreate_Ref(CeedQFunction qf) {
284*ae3cba82Scamierjs   qf->Apply = CeedQFunctionApply_Ref;
285*ae3cba82Scamierjs   qf->Destroy = CeedQFunctionDestroy_Ref;
286*ae3cba82Scamierjs   return 0;
287*ae3cba82Scamierjs }
288*ae3cba82Scamierjs 
289*ae3cba82Scamierjs static int CeedOperatorDestroy_Ref(CeedOperator op) {
290*ae3cba82Scamierjs   CeedOperator_Ref *impl = op->data;
291*ae3cba82Scamierjs   int ierr;
292*ae3cba82Scamierjs 
293*ae3cba82Scamierjs   ierr = CeedVectorDestroy(&impl->etmp); CeedChk(ierr);
294*ae3cba82Scamierjs   ierr = CeedVectorDestroy(&impl->qdata); CeedChk(ierr);
295*ae3cba82Scamierjs   ierr = CeedFree(&op->data); CeedChk(ierr);
296*ae3cba82Scamierjs   return 0;
297*ae3cba82Scamierjs }
298*ae3cba82Scamierjs 
299*ae3cba82Scamierjs static int CeedOperatorApply_Ref(CeedOperator op, CeedVector qdata,
300*ae3cba82Scamierjs                                  CeedVector ustate,
301*ae3cba82Scamierjs                                  CeedVector residual, CeedRequest *request) {
302*ae3cba82Scamierjs   CeedOperator_Ref *impl = op->data;
303*ae3cba82Scamierjs   CeedVector etmp;
304*ae3cba82Scamierjs   CeedInt Q;
305*ae3cba82Scamierjs   CeedScalar *Eu;
306*ae3cba82Scamierjs   char *qd;
307*ae3cba82Scamierjs   int ierr;
308*ae3cba82Scamierjs 
309*ae3cba82Scamierjs   if (!impl->etmp) {
310*ae3cba82Scamierjs     ierr = CeedVectorCreate(op->ceed,
311*ae3cba82Scamierjs                             op->Erestrict->nelem * op->Erestrict->elemsize,
312*ae3cba82Scamierjs                             &impl->etmp); CeedChk(ierr);
313*ae3cba82Scamierjs   }
314*ae3cba82Scamierjs   etmp = impl->etmp;
315*ae3cba82Scamierjs   if (op->qf->inmode != CEED_EVAL_NONE || op->qf->inmode != CEED_EVAL_WEIGHT) {
316*ae3cba82Scamierjs     ierr = CeedElemRestrictionApply(op->Erestrict, CEED_NOTRANSPOSE, ustate, etmp,
317*ae3cba82Scamierjs                                     CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
318*ae3cba82Scamierjs   }
319*ae3cba82Scamierjs   ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
320*ae3cba82Scamierjs   ierr = CeedVectorGetArray(etmp, CEED_MEM_HOST, &Eu); CeedChk(ierr);
321*ae3cba82Scamierjs   ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, (CeedScalar**)&qd); CeedChk(ierr);
322*ae3cba82Scamierjs   for (CeedInt e=0; e<op->Erestrict->nelem; e++) {
323*ae3cba82Scamierjs     CeedScalar BEu[Q], BEv[Q], *out[1];
324*ae3cba82Scamierjs     const CeedScalar *in[1];
325*ae3cba82Scamierjs     ierr = CeedBasisApply(op->basis, CEED_NOTRANSPOSE, op->qf->inmode,
326*ae3cba82Scamierjs                           &Eu[e*op->Erestrict->elemsize], BEu); CeedChk(ierr);
327*ae3cba82Scamierjs     in[0] = BEu;
328*ae3cba82Scamierjs     out[0] = BEv;
329*ae3cba82Scamierjs     ierr = CeedQFunctionApply(op->qf, &qd[e*Q*op->qf->qdatasize], Q, in, out);
330*ae3cba82Scamierjs     CeedChk(ierr);
331*ae3cba82Scamierjs     ierr = CeedBasisApply(op->basis, CEED_TRANSPOSE, op->qf->outmode, BEv,
332*ae3cba82Scamierjs                           &Eu[e*op->Erestrict->elemsize]); CeedChk(ierr);
333*ae3cba82Scamierjs   }
334*ae3cba82Scamierjs   ierr = CeedVectorRestoreArray(etmp, &Eu); CeedChk(ierr);
335*ae3cba82Scamierjs   if (residual) {
336*ae3cba82Scamierjs     ierr = CeedElemRestrictionApply(op->Erestrict, CEED_TRANSPOSE, etmp, residual,
337*ae3cba82Scamierjs                                     CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
338*ae3cba82Scamierjs   }
339*ae3cba82Scamierjs   if (request != CEED_REQUEST_IMMEDIATE) *request = NULL;
340*ae3cba82Scamierjs   return 0;
341*ae3cba82Scamierjs }
342*ae3cba82Scamierjs 
343*ae3cba82Scamierjs static int CeedOperatorGetQData_Ref(CeedOperator op, CeedVector *qdata) {
344*ae3cba82Scamierjs   CeedOperator_Ref *impl = op->data;
345*ae3cba82Scamierjs   int ierr;
346*ae3cba82Scamierjs 
347*ae3cba82Scamierjs   if (!impl->qdata) {
348*ae3cba82Scamierjs     CeedInt Q;
349*ae3cba82Scamierjs     ierr = CeedBasisGetNumQuadraturePoints(op->basis, &Q); CeedChk(ierr);
350*ae3cba82Scamierjs     ierr = CeedVectorCreate(op->ceed,
351*ae3cba82Scamierjs                             op->Erestrict->nelem * Q * op->basis->ndof,
352*ae3cba82Scamierjs                             &impl->qdata); CeedChk(ierr);
353*ae3cba82Scamierjs   }
354*ae3cba82Scamierjs   *qdata = impl->qdata;
355*ae3cba82Scamierjs   return 0;
356*ae3cba82Scamierjs }
357*ae3cba82Scamierjs 
358*ae3cba82Scamierjs static int CeedOperatorCreate_Ref(CeedOperator op) {
359*ae3cba82Scamierjs   CeedOperator_Ref *impl;
360*ae3cba82Scamierjs   int ierr;
361*ae3cba82Scamierjs 
362*ae3cba82Scamierjs   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
363*ae3cba82Scamierjs   op->data = impl;
364*ae3cba82Scamierjs   op->Destroy = CeedOperatorDestroy_Ref;
365*ae3cba82Scamierjs   op->Apply = CeedOperatorApply_Ref;
366*ae3cba82Scamierjs   op->GetQData = CeedOperatorGetQData_Ref;
367*ae3cba82Scamierjs   return 0;
368*ae3cba82Scamierjs }
369*ae3cba82Scamierjs 
370*ae3cba82Scamierjs static int CeedInit_Ref(const char *resource, Ceed ceed) {
371*ae3cba82Scamierjs   if (strcmp(resource, "/cpu/self")
372*ae3cba82Scamierjs       && strcmp(resource, "/cpu/self/ref"))
373*ae3cba82Scamierjs     return CeedError(ceed, 1, "Ref backend cannot use resource: %s", resource);
374*ae3cba82Scamierjs   ceed->VecCreate = CeedVectorCreate_Ref;
375*ae3cba82Scamierjs   ceed->BasisCreateTensorH1 = CeedBasisCreateTensorH1_Ref;
376*ae3cba82Scamierjs   ceed->ElemRestrictionCreate = CeedElemRestrictionCreate_Ref;
377*ae3cba82Scamierjs   ceed->QFunctionCreate = CeedQFunctionCreate_Ref;
378*ae3cba82Scamierjs   ceed->OperatorCreate = CeedOperatorCreate_Ref;
379*ae3cba82Scamierjs   return 0;
380*ae3cba82Scamierjs }
381*ae3cba82Scamierjs 
382*ae3cba82Scamierjs __attribute__((constructor))
383*ae3cba82Scamierjs static void Register(void) {
384*ae3cba82Scamierjs   CeedRegister("/cpu/self/ref", CeedInit_Ref);
385*ae3cba82Scamierjs }
386