xref: /libCEED/backends/ref/ceed-ref-operator.c (revision 3d576824e8d990e1f48c6609089904bee9170514)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed.h>
18 #include <ceed-backend.h>
19 #include <math.h>
20 #include <stdbool.h>
21 #include <stddef.h>
22 #include <stdint.h>
23 #include "ceed-ref.h"
24 
25 //------------------------------------------------------------------------------
26 // Setup Input/Output Fields
27 //------------------------------------------------------------------------------
28 static int CeedOperatorSetupFields_Ref(CeedQFunction qf, CeedOperator op,
29                                        bool inOrOut,
30                                        CeedVector *fullevecs, CeedVector *evecs,
31                                        CeedVector *qvecs, CeedInt starte,
32                                        CeedInt numfields, CeedInt Q) {
33   CeedInt dim, ierr, size, P;
34   Ceed ceed;
35   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
36   CeedBasis basis;
37   CeedElemRestriction Erestrict;
38   CeedOperatorField *opfields;
39   CeedQFunctionField *qffields;
40   if (inOrOut) {
41     ierr = CeedOperatorGetFields(op, NULL, &opfields); CeedChk(ierr);
42     ierr = CeedQFunctionGetFields(qf, NULL, &qffields); CeedChk(ierr);
43   } else {
44     ierr = CeedOperatorGetFields(op, &opfields, NULL); CeedChk(ierr);
45     ierr = CeedQFunctionGetFields(qf, &qffields, NULL); CeedChk(ierr);
46   }
47 
48   // Loop over fields
49   for (CeedInt i=0; i<numfields; i++) {
50     CeedEvalMode emode;
51     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
52 
53     if (emode != CEED_EVAL_WEIGHT) {
54       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &Erestrict);
55       CeedChk(ierr);
56       ierr = CeedElemRestrictionCreateVector(Erestrict, NULL,
57                                              &fullevecs[i+starte]);
58       CeedChk(ierr);
59     }
60 
61     switch(emode) {
62     case CEED_EVAL_NONE:
63       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
64       ierr = CeedVectorCreate(ceed, Q*size, &qvecs[i]); CeedChk(ierr);
65       break;
66     case CEED_EVAL_INTERP:
67       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
68       ierr = CeedElemRestrictionGetElementSize(Erestrict, &P);
69       CeedChk(ierr);
70       ierr = CeedVectorCreate(ceed, P*size, &evecs[i]); CeedChk(ierr);
71       ierr = CeedVectorCreate(ceed, Q*size, &qvecs[i]); CeedChk(ierr);
72       break;
73     case CEED_EVAL_GRAD:
74       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
75       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
76       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
77       ierr = CeedElemRestrictionGetElementSize(Erestrict, &P);
78       CeedChk(ierr);
79       ierr = CeedVectorCreate(ceed, P*size/dim, &evecs[i]); CeedChk(ierr);
80       ierr = CeedVectorCreate(ceed, Q*size, &qvecs[i]); CeedChk(ierr);
81       break;
82     case CEED_EVAL_WEIGHT: // Only on input fields
83       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
84       ierr = CeedVectorCreate(ceed, Q, &qvecs[i]); CeedChk(ierr);
85       ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
86                             CEED_VECTOR_NONE, qvecs[i]); CeedChk(ierr);
87       break;
88     case CEED_EVAL_DIV:
89       break; // Not implemented
90     case CEED_EVAL_CURL:
91       break; // Not implemented
92     }
93   }
94   return 0;
95 }
96 
97 //------------------------------------------------------------------------------
98 // Setup Operator
99 //------------------------------------------------------------------------------/*
100 static int CeedOperatorSetup_Ref(CeedOperator op) {
101   int ierr;
102   bool setupdone;
103   ierr = CeedOperatorIsSetupDone(op, &setupdone); CeedChk(ierr);
104   if (setupdone) return 0;
105   Ceed ceed;
106   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
107   CeedOperator_Ref *impl;
108   ierr = CeedOperatorGetData(op, &impl); CeedChk(ierr);
109   CeedQFunction qf;
110   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
111   CeedInt Q, numinputfields, numoutputfields;
112   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
113   ierr = CeedQFunctionIsIdentity(qf, &impl->identityqf); CeedChk(ierr);
114   ierr = CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
115   CeedChk(ierr);
116   CeedOperatorField *opinputfields, *opoutputfields;
117   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
118   CeedChk(ierr);
119   CeedQFunctionField *qfinputfields, *qfoutputfields;
120   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
121   CeedChk(ierr);
122 
123   // Allocate
124   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
125   CeedChk(ierr);
126   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
127   CeedChk(ierr);
128 
129   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
130   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
131   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
132   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
133   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
134 
135   impl->numein = numinputfields; impl->numeout = numoutputfields;
136 
137   // Set up infield and outfield evecs and qvecs
138   // Infields
139   ierr = CeedOperatorSetupFields_Ref(qf, op, 0, impl->evecs,
140                                      impl->evecsin, impl->qvecsin, 0,
141                                      numinputfields, Q);
142   CeedChk(ierr);
143   // Outfields
144   ierr = CeedOperatorSetupFields_Ref(qf, op, 1, impl->evecs,
145                                      impl->evecsout, impl->qvecsout,
146                                      numinputfields, numoutputfields, Q);
147   CeedChk(ierr);
148 
149   // Identity QFunctions
150   if (impl->identityqf) {
151     CeedEvalMode inmode, outmode;
152     CeedQFunctionField *infields, *outfields;
153     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
154 
155     for (CeedInt i=0; i<numinputfields; i++) {
156       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
157       CeedChk(ierr);
158       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
159       CeedChk(ierr);
160 
161       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
162       impl->qvecsout[i] = impl->qvecsin[i];
163       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr);
164     }
165   }
166 
167   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
168 
169   return 0;
170 }
171 
172 //------------------------------------------------------------------------------
173 // Setup Operator Inputs
174 //------------------------------------------------------------------------------
175 static inline int CeedOperatorSetupInputs_Ref(CeedInt numinputfields,
176     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
177     CeedVector invec, const bool skipactive, CeedOperator_Ref *impl,
178     CeedRequest *request) {
179   CeedInt ierr;
180   CeedEvalMode emode;
181   CeedVector vec;
182   CeedElemRestriction Erestrict;
183   uint64_t state;
184 
185   for (CeedInt i=0; i<numinputfields; i++) {
186     // Get input vector
187     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
188     if (vec == CEED_VECTOR_ACTIVE) {
189       if (skipactive)
190         continue;
191       else
192         vec = invec;
193     }
194 
195     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
196     CeedChk(ierr);
197     // Restrict and Evec
198     if (emode == CEED_EVAL_WEIGHT) { // Skip
199     } else {
200       // Restrict
201       ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
202       // Skip restriction if input is unchanged
203       if (state != impl->inputstate[i] || vec == invec) {
204         ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
205         CeedChk(ierr);
206         ierr = CeedElemRestrictionApply(Erestrict, CEED_NOTRANSPOSE, vec,
207                                         impl->evecs[i], request); CeedChk(ierr);
208         impl->inputstate[i] = state;
209       }
210       // Get evec
211       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
212                                     (const CeedScalar **) &impl->edata[i]);
213       CeedChk(ierr);
214     }
215   }
216   return 0;
217 }
218 
219 //------------------------------------------------------------------------------
220 // Input Basis Action
221 //------------------------------------------------------------------------------
222 static inline int CeedOperatorInputBasis_Ref(CeedInt e, CeedInt Q,
223     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
224     CeedInt numinputfields, const bool skipactive, CeedOperator_Ref *impl) {
225   CeedInt ierr;
226   CeedInt dim, elemsize, size;
227   CeedElemRestriction Erestrict;
228   CeedEvalMode emode;
229   CeedBasis basis;
230 
231   for (CeedInt i=0; i<numinputfields; i++) {
232     // Skip active input
233     if (skipactive) {
234       CeedVector vec;
235       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
236       if (vec == CEED_VECTOR_ACTIVE)
237         continue;
238     }
239     // Get elemsize, emode, size
240     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
241     CeedChk(ierr);
242     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
243     CeedChk(ierr);
244     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
245     CeedChk(ierr);
246     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
247     // Basis action
248     switch(emode) {
249     case CEED_EVAL_NONE:
250       ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
251                                 CEED_USE_POINTER,
252                                 &impl->edata[i][e*Q*size]); CeedChk(ierr);
253       break;
254     case CEED_EVAL_INTERP:
255       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
256       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
257                                 CEED_USE_POINTER,
258                                 &impl->edata[i][e*elemsize*size]);
259       CeedChk(ierr);
260       ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE,
261                             CEED_EVAL_INTERP, impl->evecsin[i],
262                             impl->qvecsin[i]); CeedChk(ierr);
263       break;
264     case CEED_EVAL_GRAD:
265       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
266       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
267       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
268                                 CEED_USE_POINTER,
269                                 &impl->edata[i][e*elemsize*size/dim]);
270       CeedChk(ierr);
271       ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE,
272                             CEED_EVAL_GRAD, impl->evecsin[i],
273                             impl->qvecsin[i]); CeedChk(ierr);
274       break;
275     case CEED_EVAL_WEIGHT:
276       break;  // No action
277     // LCOV_EXCL_START
278     case CEED_EVAL_DIV:
279     case CEED_EVAL_CURL: {
280       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
281       CeedChk(ierr);
282       Ceed ceed;
283       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
284       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
285       // LCOV_EXCL_STOP
286     }
287     }
288   }
289   return 0;
290 }
291 
292 //------------------------------------------------------------------------------
293 // Output Basis Action
294 //------------------------------------------------------------------------------
295 static inline int CeedOperatorOutputBasis_Ref(CeedInt e, CeedInt Q,
296     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
297     CeedInt numinputfields, CeedInt numoutputfields, CeedOperator op,
298     CeedOperator_Ref *impl) {
299   CeedInt ierr;
300   CeedInt dim, elemsize, size;
301   CeedElemRestriction Erestrict;
302   CeedEvalMode emode;
303   CeedBasis basis;
304 
305   for (CeedInt i=0; i<numoutputfields; i++) {
306     // Get elemsize, emode, size
307     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
308     CeedChk(ierr);
309     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
310     CeedChk(ierr);
311     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
312     CeedChk(ierr);
313     ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
314     // Basis action
315     switch(emode) {
316     case CEED_EVAL_NONE:
317       break; // No action
318     case CEED_EVAL_INTERP:
319       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
320       CeedChk(ierr);
321       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
322                                 CEED_USE_POINTER,
323                                 &impl->edata[i + numinputfields][e*elemsize*size]);
324       CeedChk(ierr);
325       ierr = CeedBasisApply(basis, 1, CEED_TRANSPOSE,
326                             CEED_EVAL_INTERP, impl->qvecsout[i],
327                             impl->evecsout[i]); CeedChk(ierr);
328       break;
329     case CEED_EVAL_GRAD:
330       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
331       CeedChk(ierr);
332       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
333       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
334                                 CEED_USE_POINTER,
335                                 &impl->edata[i + numinputfields][e*elemsize*size/dim]);
336       CeedChk(ierr);
337       ierr = CeedBasisApply(basis, 1, CEED_TRANSPOSE,
338                             CEED_EVAL_GRAD, impl->qvecsout[i],
339                             impl->evecsout[i]); CeedChk(ierr);
340       break;
341     // LCOV_EXCL_START
342     case CEED_EVAL_WEIGHT: {
343       Ceed ceed;
344       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
345       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
346                        "evaluation mode");
347     }
348     case CEED_EVAL_DIV:
349     case CEED_EVAL_CURL: {
350       Ceed ceed;
351       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
352       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
353       // LCOV_EXCL_STOP
354     }
355     }
356   }
357   return 0;
358 }
359 
360 //------------------------------------------------------------------------------
361 // Restore Input Vectors
362 //------------------------------------------------------------------------------
363 static inline int CeedOperatorRestoreInputs_Ref(CeedInt numinputfields,
364     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
365     const bool skipactive, CeedOperator_Ref *impl) {
366   CeedInt ierr;
367   CeedEvalMode emode;
368 
369   for (CeedInt i=0; i<numinputfields; i++) {
370     // Skip active inputs
371     if (skipactive) {
372       CeedVector vec;
373       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
374       if (vec == CEED_VECTOR_ACTIVE)
375         continue;
376     }
377     // Restore input
378     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
379     CeedChk(ierr);
380     if (emode == CEED_EVAL_WEIGHT) { // Skip
381     } else {
382       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
383                                         (const CeedScalar **) &impl->edata[i]);
384       CeedChk(ierr);
385     }
386   }
387   return 0;
388 }
389 
390 //------------------------------------------------------------------------------
391 // Operator Apply
392 //------------------------------------------------------------------------------
393 static int CeedOperatorApplyAdd_Ref(CeedOperator op, CeedVector invec,
394                                     CeedVector outvec, CeedRequest *request) {
395   int ierr;
396   CeedOperator_Ref *impl;
397   ierr = CeedOperatorGetData(op, &impl); CeedChk(ierr);
398   CeedQFunction qf;
399   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
400   CeedInt Q, numelements, numinputfields, numoutputfields, size;
401   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
402   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
403   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
404   CeedChk(ierr);
405   CeedOperatorField *opinputfields, *opoutputfields;
406   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
407   CeedChk(ierr);
408   CeedQFunctionField *qfinputfields, *qfoutputfields;
409   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
410   CeedChk(ierr);
411   CeedEvalMode emode;
412   CeedVector vec;
413   CeedElemRestriction Erestrict;
414 
415   // Setup
416   ierr = CeedOperatorSetup_Ref(op); CeedChk(ierr);
417 
418   // Input Evecs and Restriction
419   ierr = CeedOperatorSetupInputs_Ref(numinputfields, qfinputfields,
420                                      opinputfields, invec, false, impl,
421                                      request); CeedChk(ierr);
422 
423   // Output Evecs
424   for (CeedInt i=0; i<numoutputfields; i++) {
425     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
426                               &impl->edata[i + numinputfields]); CeedChk(ierr);
427   }
428 
429   // Loop through elements
430   for (CeedInt e=0; e<numelements; e++) {
431     // Output pointers
432     for (CeedInt i=0; i<numoutputfields; i++) {
433       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
434       CeedChk(ierr);
435       if (emode == CEED_EVAL_NONE) {
436         ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
437         CeedChk(ierr);
438         ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
439                                   CEED_USE_POINTER,
440                                   &impl->edata[i + numinputfields][e*Q*size]);
441         CeedChk(ierr);
442       }
443     }
444 
445     // Input basis apply
446     ierr = CeedOperatorInputBasis_Ref(e, Q, qfinputfields, opinputfields,
447                                       numinputfields, false, impl);
448     CeedChk(ierr);
449 
450     // Q function
451     if (!impl->identityqf) {
452       ierr = CeedQFunctionApply(qf, Q, impl->qvecsin, impl->qvecsout);
453       CeedChk(ierr);
454     }
455 
456     // Output basis apply
457     ierr = CeedOperatorOutputBasis_Ref(e, Q, qfoutputfields, opoutputfields,
458                                        numinputfields, numoutputfields, op, impl);
459     CeedChk(ierr);
460   }
461 
462   // Output restriction
463   for (CeedInt i=0; i<numoutputfields; i++) {
464     // Restore evec
465     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
466                                   &impl->edata[i + numinputfields]);
467     CeedChk(ierr);
468     // Get output vector
469     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
470     // Active
471     if (vec == CEED_VECTOR_ACTIVE)
472       vec = outvec;
473     // Restrict
474     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
475     CeedChk(ierr);
476     ierr = CeedElemRestrictionApply(Erestrict, CEED_TRANSPOSE,
477                                     impl->evecs[i+impl->numein], vec, request);
478     CeedChk(ierr);
479   }
480 
481   // Restore input arrays
482   ierr = CeedOperatorRestoreInputs_Ref(numinputfields, qfinputfields,
483                                        opinputfields, false, impl);
484   CeedChk(ierr);
485 
486   return 0;
487 }
488 
489 //------------------------------------------------------------------------------
490 // Assemble Linear QFunction
491 //------------------------------------------------------------------------------
492 static int CeedOperatorLinearAssembleQFunction_Ref(CeedOperator op,
493     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
494   int ierr;
495   CeedOperator_Ref *impl;
496   ierr = CeedOperatorGetData(op, &impl); CeedChk(ierr);
497   CeedQFunction qf;
498   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
499   CeedInt Q, numelements, numinputfields, numoutputfields, size;
500   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
501   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
502   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
503   CeedChk(ierr);
504   CeedOperatorField *opinputfields, *opoutputfields;
505   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
506   CeedChk(ierr);
507   CeedQFunctionField *qfinputfields, *qfoutputfields;
508   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
509   CeedChk(ierr);
510   CeedVector vec;
511   CeedInt numactivein = 0, numactiveout = 0;
512   CeedVector *activein = NULL;
513   CeedScalar *a, *tmp;
514   Ceed ceed, ceedparent;
515   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
516   ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceedparent); CeedChk(ierr);
517   ceedparent = ceedparent ? ceedparent : ceed;
518 
519   // Setup
520   ierr = CeedOperatorSetup_Ref(op); CeedChk(ierr);
521 
522   // Check for identity
523   if (impl->identityqf)
524     // LCOV_EXCL_START
525     return CeedError(ceed, 1, "Assembling identity QFunctions not supported");
526   // LCOV_EXCL_STOP
527 
528   // Input Evecs and Restriction
529   ierr = CeedOperatorSetupInputs_Ref(numinputfields, qfinputfields,
530                                      opinputfields, NULL, true, impl, request);
531   CeedChk(ierr);
532 
533   // Count number of active input fields
534   for (CeedInt i=0; i<numinputfields; i++) {
535     // Get input vector
536     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
537     // Check if active input
538     if (vec == CEED_VECTOR_ACTIVE) {
539       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
540       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
541       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
542       CeedChk(ierr);
543       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
544       for (CeedInt field=0; field<size; field++) {
545         ierr = CeedVectorCreate(ceed, Q, &activein[numactivein+field]);
546         CeedChk(ierr);
547         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
548                                   CEED_USE_POINTER, &tmp[field*Q]);
549         CeedChk(ierr);
550       }
551       numactivein += size;
552       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
553     }
554   }
555 
556   // Count number of active output fields
557   for (CeedInt i=0; i<numoutputfields; i++) {
558     // Get output vector
559     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
560     // Check if active output
561     if (vec == CEED_VECTOR_ACTIVE) {
562       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
563       numactiveout += size;
564     }
565   }
566 
567   // Check sizes
568   if (!numactivein || !numactiveout)
569     // LCOV_EXCL_START
570     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
571                      "and outputs");
572   // LCOV_EXCL_STOP
573 
574   // Create output restriction
575   CeedInt strides[3] = {1, Q, numactivein*numactiveout*Q}; /* *NOPAD* */
576   ierr = CeedElemRestrictionCreateStrided(ceedparent, numelements, Q,
577                                           numactivein*numactiveout,
578                                           numactivein*numactiveout*numelements*Q,
579                                           strides, rstr); CeedChk(ierr);
580   // Create assembled vector
581   ierr = CeedVectorCreate(ceedparent, numelements*Q*numactivein*numactiveout,
582                           assembled); CeedChk(ierr);
583   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
584   ierr = CeedVectorGetArray(*assembled, CEED_MEM_HOST, &a); CeedChk(ierr);
585 
586   // Loop through elements
587   for (CeedInt e=0; e<numelements; e++) {
588     // Input basis apply
589     ierr = CeedOperatorInputBasis_Ref(e, Q, qfinputfields, opinputfields,
590                                       numinputfields, true, impl);
591     CeedChk(ierr);
592 
593     // Assemble QFunction
594     for (CeedInt in=0; in<numactivein; in++) {
595       // Set Inputs
596       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
597       if (numactivein > 1) {
598         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
599                                   0.0); CeedChk(ierr);
600       }
601       // Set Outputs
602       for (CeedInt out=0; out<numoutputfields; out++) {
603         // Get output vector
604         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
605         CeedChk(ierr);
606         // Check if active output
607         if (vec == CEED_VECTOR_ACTIVE) {
608           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
609                              CEED_USE_POINTER, a); CeedChk(ierr);
610           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
611           CeedChk(ierr);
612           a += size*Q; // Advance the pointer by the size of the output
613         }
614       }
615       // Apply QFunction
616       ierr = CeedQFunctionApply(qf, Q, impl->qvecsin, impl->qvecsout);
617       CeedChk(ierr);
618     }
619   }
620 
621   // Un-set output Qvecs to prevent accidental overwrite of Assembled
622   for (CeedInt out=0; out<numoutputfields; out++) {
623     // Get output vector
624     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
625     CeedChk(ierr);
626     // Check if active output
627     if (vec == CEED_VECTOR_ACTIVE) {
628       CeedVectorTakeArray(impl->qvecsout[out], CEED_MEM_HOST, NULL);
629       CeedChk(ierr);
630     }
631   }
632 
633   // Restore input arrays
634   ierr = CeedOperatorRestoreInputs_Ref(numinputfields, qfinputfields,
635                                        opinputfields, true, impl);
636   CeedChk(ierr);
637 
638   // Restore output
639   ierr = CeedVectorRestoreArray(*assembled, &a); CeedChk(ierr);
640 
641   // Cleanup
642   for (CeedInt i=0; i<numactivein; i++) {
643     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
644   }
645   ierr = CeedFree(&activein); CeedChk(ierr);
646 
647   return 0;
648 }
649 
650 //------------------------------------------------------------------------------
651 // Get Basis Emode Pointer
652 //------------------------------------------------------------------------------
653 static inline void CeedOperatorGetBasisPointer_Ref(const CeedScalar **basisptr,
654     CeedEvalMode emode, const CeedScalar *identity, const CeedScalar *interp,
655     const CeedScalar *grad) {
656   switch (emode) {
657   case CEED_EVAL_NONE:
658     *basisptr = identity;
659     break;
660   case CEED_EVAL_INTERP:
661     *basisptr = interp;
662     break;
663   case CEED_EVAL_GRAD:
664     *basisptr = grad;
665     break;
666   case CEED_EVAL_WEIGHT:
667   case CEED_EVAL_DIV:
668   case CEED_EVAL_CURL:
669     break; // Caught by QF Assembly
670   }
671 }
672 
673 //------------------------------------------------------------------------------
674 // Create point block restriction
675 //------------------------------------------------------------------------------
676 static int CreatePBRestriction_Ref(CeedElemRestriction rstr,
677                                    CeedElemRestriction *pbRstr) {
678   int ierr;
679   Ceed ceed;
680   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr);
681   const CeedInt *offsets;
682   ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets);
683   CeedChk(ierr);
684 
685   // Expand offsets
686   CeedInt nelem, ncomp, elemsize, compstride, max = 1, *pbOffsets;
687   ierr = CeedElemRestrictionGetNumElements(rstr, &nelem); CeedChk(ierr);
688   ierr = CeedElemRestrictionGetNumComponents(rstr, &ncomp); CeedChk(ierr);
689   ierr = CeedElemRestrictionGetElementSize(rstr, &elemsize); CeedChk(ierr);
690   ierr = CeedElemRestrictionGetCompStride(rstr, &compstride); CeedChk(ierr);
691   CeedInt shift = ncomp;
692   if (compstride != 1)
693     shift *= ncomp;
694   ierr = CeedCalloc(nelem*elemsize, &pbOffsets); CeedChk(ierr);
695   for (CeedInt i = 0; i < nelem*elemsize; i++) {
696     pbOffsets[i] = offsets[i]*shift;
697     if (pbOffsets[i] > max)
698       max = pbOffsets[i];
699   }
700 
701   // Create new restriction
702   ierr = CeedElemRestrictionCreate(ceed, nelem, elemsize, ncomp*ncomp, 1,
703                                    max + ncomp*ncomp, CEED_MEM_HOST,
704                                    CEED_OWN_POINTER, pbOffsets, pbRstr);
705   CeedChk(ierr);
706 
707   // Cleanup
708   ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr);
709 
710   return 0;
711 }
712 
713 //------------------------------------------------------------------------------
714 // Assemble diagonal common code
715 //------------------------------------------------------------------------------
716 static inline int CeedOperatorAssembleAddDiagonalCore_Ref(CeedOperator op,
717     CeedVector assembled, CeedRequest *request, const bool pointBlock) {
718   int ierr;
719   Ceed ceed;
720   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
721 
722   // Assemble QFunction
723   CeedQFunction qf;
724   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
725   CeedInt numinputfields, numoutputfields;
726   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
727   CeedChk(ierr);
728   CeedVector assembledqf;
729   CeedElemRestriction rstr;
730   ierr = CeedOperatorLinearAssembleQFunction(op,  &assembledqf, &rstr, request);
731   CeedChk(ierr);
732   ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr);
733   CeedScalar maxnorm = 0;
734   ierr = CeedVectorNorm(assembledqf, CEED_NORM_MAX, &maxnorm); CeedChk(ierr);
735 
736   // Determine active input basis
737   CeedOperatorField *opfields;
738   CeedQFunctionField *qffields;
739   ierr = CeedOperatorGetFields(op, &opfields, NULL); CeedChk(ierr);
740   ierr = CeedQFunctionGetFields(qf, &qffields, NULL); CeedChk(ierr);
741   CeedInt numemodein = 0, ncomp, dim = 1;
742   CeedEvalMode *emodein = NULL;
743   CeedBasis basisin = NULL;
744   CeedElemRestriction rstrin = NULL;
745   for (CeedInt i=0; i<numinputfields; i++) {
746     CeedVector vec;
747     ierr = CeedOperatorFieldGetVector(opfields[i], &vec); CeedChk(ierr);
748     if (vec == CEED_VECTOR_ACTIVE) {
749       CeedElemRestriction rstr;
750       ierr = CeedOperatorFieldGetBasis(opfields[i], &basisin); CeedChk(ierr);
751       ierr = CeedBasisGetNumComponents(basisin, &ncomp); CeedChk(ierr);
752       ierr = CeedBasisGetDimension(basisin, &dim); CeedChk(ierr);
753       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &rstr);
754       CeedChk(ierr);
755       if (rstrin && rstrin != rstr)
756         // LCOV_EXCL_START
757         return CeedError(ceed, 1,
758                          "Multi-field non-composite operator diagonal assembly not supported");
759       // LCOV_EXCL_STOP
760       rstrin = rstr;
761       CeedEvalMode emode;
762       ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode);
763       CeedChk(ierr);
764       switch (emode) {
765       case CEED_EVAL_NONE:
766       case CEED_EVAL_INTERP:
767         ierr = CeedRealloc(numemodein + 1, &emodein); CeedChk(ierr);
768         emodein[numemodein] = emode;
769         numemodein += 1;
770         break;
771       case CEED_EVAL_GRAD:
772         ierr = CeedRealloc(numemodein + dim, &emodein); CeedChk(ierr);
773         for (CeedInt d=0; d<dim; d++)
774           emodein[numemodein+d] = emode;
775         numemodein += dim;
776         break;
777       case CEED_EVAL_WEIGHT:
778       case CEED_EVAL_DIV:
779       case CEED_EVAL_CURL:
780         break; // Caught by QF Assembly
781       }
782     }
783   }
784 
785   // Determine active output basis
786   ierr = CeedOperatorGetFields(op, NULL, &opfields); CeedChk(ierr);
787   ierr = CeedQFunctionGetFields(qf, NULL, &qffields); CeedChk(ierr);
788   CeedInt numemodeout = 0;
789   CeedEvalMode *emodeout = NULL;
790   CeedBasis basisout = NULL;
791   CeedElemRestriction rstrout = NULL;
792   for (CeedInt i=0; i<numoutputfields; i++) {
793     CeedVector vec;
794     ierr = CeedOperatorFieldGetVector(opfields[i], &vec); CeedChk(ierr);
795     if (vec == CEED_VECTOR_ACTIVE) {
796       CeedElemRestriction rstr;
797       ierr = CeedOperatorFieldGetBasis(opfields[i], &basisout); CeedChk(ierr);
798       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &rstr);
799       CeedChk(ierr);
800       if (rstrout && rstrout != rstr)
801         // LCOV_EXCL_START
802         return CeedError(ceed, 1,
803                          "Multi-field non-composite operator diagonal assembly not supported");
804       // LCOV_EXCL_STOP
805       rstrout = rstr;
806       CeedEvalMode emode;
807       ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
808       switch (emode) {
809       case CEED_EVAL_NONE:
810       case CEED_EVAL_INTERP:
811         ierr = CeedRealloc(numemodeout + 1, &emodeout); CeedChk(ierr);
812         emodeout[numemodeout] = emode;
813         numemodeout += 1;
814         break;
815       case CEED_EVAL_GRAD:
816         ierr = CeedRealloc(numemodeout + dim, &emodeout); CeedChk(ierr);
817         for (CeedInt d=0; d<dim; d++)
818           emodeout[numemodeout+d] = emode;
819         numemodeout += dim;
820         break;
821       case CEED_EVAL_WEIGHT:
822       case CEED_EVAL_DIV:
823       case CEED_EVAL_CURL:
824         break; // Caught by QF Assembly
825       }
826     }
827   }
828 
829   // Assemble point-block diagonal restriction, if needed
830   CeedElemRestriction diagrstr = rstrout;
831   if (pointBlock) {
832     ierr = CreatePBRestriction_Ref(rstrout, &diagrstr); CeedChk(ierr);
833   }
834 
835   // Create diagonal vector
836   CeedVector elemdiag;
837   ierr = CeedElemRestrictionCreateVector(diagrstr, NULL, &elemdiag);
838   CeedChk(ierr);
839 
840   // Assemble element operator diagonals
841   CeedScalar *elemdiagarray, *assembledqfarray;
842   ierr = CeedVectorSetValue(elemdiag, 0.0); CeedChk(ierr);
843   ierr = CeedVectorGetArray(elemdiag, CEED_MEM_HOST, &elemdiagarray);
844   CeedChk(ierr);
845   ierr = CeedVectorGetArray(assembledqf, CEED_MEM_HOST, &assembledqfarray);
846   CeedChk(ierr);
847   CeedInt nelem, nnodes, nqpts;
848   ierr = CeedElemRestrictionGetNumElements(diagrstr, &nelem); CeedChk(ierr);
849   ierr = CeedBasisGetNumNodes(basisin, &nnodes); CeedChk(ierr);
850   ierr = CeedBasisGetNumQuadraturePoints(basisin, &nqpts); CeedChk(ierr);
851   // Basis matrices
852   const CeedScalar *interpin, *interpout, *gradin, *gradout;
853   CeedScalar *identity = NULL;
854   bool evalNone = false;
855   for (CeedInt i=0; i<numemodein; i++)
856     evalNone = evalNone || (emodein[i] == CEED_EVAL_NONE);
857   for (CeedInt i=0; i<numemodeout; i++)
858     evalNone = evalNone || (emodeout[i] == CEED_EVAL_NONE);
859   if (evalNone) {
860     ierr = CeedCalloc(nqpts*nnodes, &identity); CeedChk(ierr);
861     for (CeedInt i=0; i<(nnodes<nqpts?nnodes:nqpts); i++)
862       identity[i*nnodes+i] = 1.0;
863   }
864   ierr = CeedBasisGetInterp(basisin, &interpin); CeedChk(ierr);
865   ierr = CeedBasisGetInterp(basisout, &interpout); CeedChk(ierr);
866   ierr = CeedBasisGetGrad(basisin, &gradin); CeedChk(ierr);
867   ierr = CeedBasisGetGrad(basisout, &gradout); CeedChk(ierr);
868   // Compute the diagonal of B^T D B
869   // Each element
870   const CeedScalar qfvaluebound = maxnorm*1e-12;
871   for (CeedInt e=0; e<nelem; e++) {
872     CeedInt dout = -1;
873     // Each basis eval mode pair
874     for (CeedInt eout=0; eout<numemodeout; eout++) {
875       const CeedScalar *bt = NULL;
876       if (emodeout[eout] == CEED_EVAL_GRAD)
877         dout += 1;
878       CeedOperatorGetBasisPointer_Ref(&bt, emodeout[eout], identity, interpout,
879                                       &gradout[dout*nqpts*nnodes]);
880       CeedInt din = -1;
881       for (CeedInt ein=0; ein<numemodein; ein++) {
882         const CeedScalar *b = NULL;
883         if (emodein[ein] == CEED_EVAL_GRAD)
884           din += 1;
885         CeedOperatorGetBasisPointer_Ref(&b, emodein[ein], identity, interpin,
886                                         &gradin[din*nqpts*nnodes]);
887         // Each component
888         for (CeedInt compOut=0; compOut<ncomp; compOut++)
889           // Each qpoint/node pair
890           for (CeedInt q=0; q<nqpts; q++)
891             if (pointBlock) {
892               // Point Block Diagonal
893               for (CeedInt compIn=0; compIn<ncomp; compIn++) {
894                 const CeedScalar qfvalue =
895                   assembledqfarray[((((e*numemodein+ein)*ncomp+compIn)*
896                                      numemodeout+eout)*ncomp+compOut)*nqpts+q];
897                 if (fabs(qfvalue) > qfvaluebound)
898                   for (CeedInt n=0; n<nnodes; n++)
899                     elemdiagarray[((e*ncomp+compOut)*ncomp+compIn)*nnodes+n] +=
900                       bt[q*nnodes+n] * qfvalue * b[q*nnodes+n];
901               }
902             } else {
903               // Diagonal Only
904               const CeedScalar qfvalue =
905                 assembledqfarray[((((e*numemodein+ein)*ncomp+compOut)*
906                                    numemodeout+eout)*ncomp+compOut)*nqpts+q];
907               if (fabs(qfvalue) > qfvaluebound)
908                 for (CeedInt n=0; n<nnodes; n++)
909                   elemdiagarray[(e*ncomp+compOut)*nnodes+n] +=
910                     bt[q*nnodes+n] * qfvalue * b[q*nnodes+n];
911             }
912       }
913     }
914   }
915   ierr = CeedVectorRestoreArray(elemdiag, &elemdiagarray); CeedChk(ierr);
916   ierr = CeedVectorRestoreArray(assembledqf, &assembledqfarray); CeedChk(ierr);
917 
918   // Assemble local operator diagonal
919   ierr = CeedElemRestrictionApply(diagrstr, CEED_TRANSPOSE, elemdiag,
920                                   assembled, request); CeedChk(ierr);
921 
922   // Cleanup
923   if (pointBlock) {
924     ierr = CeedElemRestrictionDestroy(&diagrstr); CeedChk(ierr);
925   }
926   ierr = CeedVectorDestroy(&assembledqf); CeedChk(ierr);
927   ierr = CeedVectorDestroy(&elemdiag); CeedChk(ierr);
928   ierr = CeedFree(&emodein); CeedChk(ierr);
929   ierr = CeedFree(&emodeout); CeedChk(ierr);
930   ierr = CeedFree(&identity); CeedChk(ierr);
931 
932   return 0;
933 }
934 
935 //------------------------------------------------------------------------------
936 // Assemble composite diagonal common code
937 //------------------------------------------------------------------------------
938 static inline int CeedOperatorLinearAssembleAddDiagonalCompositeCore_Ref(
939   CeedOperator op, CeedVector assembled, CeedRequest *request,
940   const bool pointBlock) {
941   int ierr;
942   CeedInt numSub;
943   CeedOperator *subOperators;
944   ierr = CeedOperatorGetNumSub(op, &numSub); CeedChk(ierr);
945   ierr = CeedOperatorGetSubList(op, &subOperators); CeedChk(ierr);
946   for (CeedInt i = 0; i < numSub; i++) {
947     ierr = CeedOperatorAssembleAddDiagonalCore_Ref(subOperators[i], assembled,
948            request, pointBlock); CeedChk(ierr);
949   }
950   return 0;
951 }
952 
953 //------------------------------------------------------------------------------
954 // Assemble Linear Diagonal
955 //------------------------------------------------------------------------------
956 static int CeedOperatorLinearAssembleAddDiagonal_Ref(CeedOperator op,
957     CeedVector assembled, CeedRequest *request) {
958   int ierr;
959   bool isComposite;
960   ierr = CeedOperatorIsComposite(op, &isComposite); CeedChk(ierr);
961   if (isComposite) {
962     return CeedOperatorLinearAssembleAddDiagonalCompositeCore_Ref(op, assembled,
963            request, false);
964   } else {
965     return CeedOperatorAssembleAddDiagonalCore_Ref(op, assembled, request, false);
966   }
967 }
968 
969 //------------------------------------------------------------------------------
970 // Assemble Linear Point Block Diagonal
971 //------------------------------------------------------------------------------
972 static int CeedOperatorLinearAssembleAddPointBlockDiagonal_Ref(CeedOperator op,
973     CeedVector assembled, CeedRequest *request) {
974   int ierr;
975   bool isComposite;
976   ierr = CeedOperatorIsComposite(op, &isComposite); CeedChk(ierr);
977   if (isComposite) {
978     return CeedOperatorLinearAssembleAddDiagonalCompositeCore_Ref(op, assembled,
979            request, true);
980   } else {
981     return CeedOperatorAssembleAddDiagonalCore_Ref(op, assembled, request, true);
982   }
983 }
984 
985 //------------------------------------------------------------------------------
986 // Create FDM Element Inverse
987 //------------------------------------------------------------------------------
988 int CeedOperatorCreateFDMElementInverse_Ref(CeedOperator op,
989     CeedOperator *fdminv, CeedRequest *request) {
990   int ierr;
991   Ceed ceed, ceedparent;
992   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
993   ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceedparent); CeedChk(ierr);
994   ceedparent = ceedparent ? ceedparent : ceed;
995   CeedQFunction qf;
996   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
997 
998   // Determine active input basis
999   bool interp = false, grad = false;
1000   CeedBasis basis = NULL;
1001   CeedElemRestriction rstr = NULL;
1002   CeedOperatorField *opfields;
1003   CeedQFunctionField *qffields;
1004   ierr = CeedOperatorGetFields(op, &opfields, NULL); CeedChk(ierr);
1005   ierr = CeedQFunctionGetFields(qf, &qffields, NULL); CeedChk(ierr);
1006   CeedInt numinputfields;
1007   ierr = CeedQFunctionGetNumArgs(qf, &numinputfields, NULL); CeedChk(ierr);
1008   for (CeedInt i=0; i<numinputfields; i++) {
1009     CeedVector vec;
1010     ierr = CeedOperatorFieldGetVector(opfields[i], &vec); CeedChk(ierr);
1011     if (vec == CEED_VECTOR_ACTIVE) {
1012       CeedEvalMode emode;
1013       ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
1014       interp = interp || emode == CEED_EVAL_INTERP;
1015       grad = grad || emode == CEED_EVAL_GRAD;
1016       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
1017       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &rstr);
1018       CeedChk(ierr);
1019     }
1020   }
1021   if (!basis)
1022     // LCOV_EXCL_START
1023     return CeedError(ceed, 1, "No active field set");
1024   // LCOV_EXCL_STOP
1025   CeedInt P1d, Q1d, elemsize, nqpts, dim, ncomp = 1, nelem = 1, lsize = 1;
1026   ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChk(ierr);
1027   ierr = CeedBasisGetNumNodes(basis, &elemsize); CeedChk(ierr);
1028   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChk(ierr);
1029   ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpts); CeedChk(ierr);
1030   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
1031   ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr);
1032   ierr = CeedElemRestrictionGetNumElements(rstr, &nelem); CeedChk(ierr);
1033   ierr = CeedElemRestrictionGetLVectorSize(rstr, &lsize); CeedChk(ierr);
1034 
1035   // Build and diagonalize 1D Mass and Laplacian
1036   bool tensorbasis;
1037   ierr = CeedBasisIsTensor(basis, &tensorbasis); CeedChk(ierr);
1038   if (!tensorbasis)
1039     // LCOV_EXCL_START
1040     return CeedError(ceed, 1, "FDMElementInverse only supported for tensor "
1041                      "bases");
1042   // LCOV_EXCL_STOP
1043   CeedScalar *work, *mass, *laplace, *x, *x2, *lambda;
1044   ierr = CeedMalloc(Q1d*P1d, &work); CeedChk(ierr);
1045   ierr = CeedMalloc(P1d*P1d, &mass); CeedChk(ierr);
1046   ierr = CeedMalloc(P1d*P1d, &laplace); CeedChk(ierr);
1047   ierr = CeedMalloc(P1d*P1d, &x); CeedChk(ierr);
1048   ierr = CeedMalloc(P1d*P1d, &x2); CeedChk(ierr);
1049   ierr = CeedMalloc(P1d, &lambda); CeedChk(ierr);
1050   // -- Mass
1051   const CeedScalar *interp1d, *grad1d, *qweight1d;
1052   ierr = CeedBasisGetInterp1D(basis, &interp1d); CeedChk(ierr);
1053   ierr = CeedBasisGetGrad1D(basis, &grad1d); CeedChk(ierr);
1054   ierr = CeedBasisGetQWeights(basis, &qweight1d); CeedChk(ierr);
1055   for (CeedInt i=0; i<Q1d; i++)
1056     for (CeedInt j=0; j<P1d; j++)
1057       work[i+j*Q1d] = interp1d[i*P1d+j]*qweight1d[i];
1058   ierr = CeedMatrixMultiply(ceed, (const CeedScalar *)work,
1059                             (const CeedScalar *)interp1d, mass, P1d, P1d, Q1d);
1060   CeedChk(ierr);
1061   // -- Laplacian
1062   for (CeedInt i=0; i<Q1d; i++)
1063     for (CeedInt j=0; j<P1d; j++)
1064       work[i+j*Q1d] = grad1d[i*P1d+j]*qweight1d[i];
1065   ierr = CeedMatrixMultiply(ceed, (const CeedScalar *)work,
1066                             (const CeedScalar *)grad1d, laplace, P1d, P1d, Q1d);
1067   CeedChk(ierr);
1068   // -- Diagonalize
1069   ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P1d);
1070   CeedChk(ierr);
1071   ierr = CeedFree(&work); CeedChk(ierr);
1072   ierr = CeedFree(&mass); CeedChk(ierr);
1073   ierr = CeedFree(&laplace); CeedChk(ierr);
1074   for (CeedInt i=0; i<P1d; i++)
1075     for (CeedInt j=0; j<P1d; j++)
1076       x2[i+j*P1d] = x[j+i*P1d];
1077   ierr = CeedFree(&x); CeedChk(ierr);
1078 
1079   // Assemble QFunction
1080   CeedVector assembled;
1081   CeedElemRestriction rstr_qf;
1082   ierr =  CeedOperatorLinearAssembleQFunction(op, &assembled, &rstr_qf,
1083           request); CeedChk(ierr);
1084   ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr);
1085   CeedScalar maxnorm = 0;
1086   ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &maxnorm); CeedChk(ierr);
1087 
1088   // Calculate element averages
1089   CeedInt nfields = ((interp?1:0) + (grad?dim:0))*((interp?1:0) + (grad?dim:0));
1090   CeedScalar *elemavg;
1091   const CeedScalar *assembledarray, *qweightsarray;
1092   CeedVector qweights;
1093   ierr = CeedVectorCreate(ceedparent, nqpts, &qweights); CeedChk(ierr);
1094   ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
1095                         CEED_VECTOR_NONE, qweights); CeedChk(ierr);
1096   ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembledarray);
1097   CeedChk(ierr);
1098   ierr = CeedVectorGetArrayRead(qweights, CEED_MEM_HOST, &qweightsarray);
1099   CeedChk(ierr);
1100   ierr = CeedCalloc(nelem, &elemavg); CeedChk(ierr);
1101   for (CeedInt e=0; e<nelem; e++) {
1102     CeedInt count = 0;
1103     for (CeedInt q=0; q<nqpts; q++)
1104       for (CeedInt i=0; i<ncomp*ncomp*nfields; i++)
1105         if (fabs(assembledarray[e*nelem*nqpts*ncomp*ncomp*nfields +
1106                                                                   i*nqpts + q]) > maxnorm*1e-12) {
1107           elemavg[e] += assembledarray[e*nelem*nqpts*ncomp*ncomp*nfields +
1108                                        i*nqpts + q] / qweightsarray[q];
1109           count++;
1110         }
1111     if (count)
1112       elemavg[e] /= count;
1113   }
1114   ierr = CeedVectorRestoreArrayRead(assembled, &assembledarray); CeedChk(ierr);
1115   ierr = CeedVectorDestroy(&assembled); CeedChk(ierr);
1116   ierr = CeedVectorRestoreArrayRead(qweights, &qweightsarray); CeedChk(ierr);
1117   ierr = CeedVectorDestroy(&qweights); CeedChk(ierr);
1118 
1119   // Build FDM diagonal
1120   CeedVector qdata;
1121   CeedScalar *qdataarray;
1122   ierr = CeedVectorCreate(ceedparent, nelem*ncomp*lsize, &qdata); CeedChk(ierr);
1123   ierr = CeedVectorSetArray(qdata, CEED_MEM_HOST, CEED_COPY_VALUES, NULL);
1124   CeedChk(ierr);
1125   ierr = CeedVectorGetArray(qdata, CEED_MEM_HOST, &qdataarray); CeedChk(ierr);
1126   for (CeedInt e=0; e<nelem; e++)
1127     for (CeedInt c=0; c<ncomp; c++)
1128       for (CeedInt n=0; n<lsize; n++) {
1129         if (interp)
1130           qdataarray[(e*ncomp+c)*lsize+n] = 1;
1131         if (grad)
1132           for (CeedInt d=0; d<dim; d++) {
1133             CeedInt i = (n / CeedIntPow(P1d, d)) % P1d;
1134             qdataarray[(e*ncomp+c)*lsize+n] += lambda[i];
1135           }
1136         qdataarray[(e*ncomp+c)*lsize+n] = 1 / (elemavg[e] *
1137                                                qdataarray[(e*ncomp+c)*lsize+n]);
1138       }
1139   ierr = CeedFree(&elemavg); CeedChk(ierr);
1140   ierr = CeedVectorRestoreArray(qdata, &qdataarray); CeedChk(ierr);
1141 
1142   // Setup FDM operator
1143   // -- Basis
1144   CeedBasis fdm_basis;
1145   CeedScalar *graddummy, *qrefdummy, *qweightdummy;
1146   ierr = CeedCalloc(P1d*P1d, &graddummy); CeedChk(ierr);
1147   ierr = CeedCalloc(P1d, &qrefdummy); CeedChk(ierr);
1148   ierr = CeedCalloc(P1d, &qweightdummy); CeedChk(ierr);
1149   ierr = CeedBasisCreateTensorH1(ceedparent, dim, ncomp, P1d, P1d, x2,
1150                                  graddummy, qrefdummy, qweightdummy,
1151                                  &fdm_basis); CeedChk(ierr);
1152   ierr = CeedFree(&graddummy); CeedChk(ierr);
1153   ierr = CeedFree(&qrefdummy); CeedChk(ierr);
1154   ierr = CeedFree(&qweightdummy); CeedChk(ierr);
1155   ierr = CeedFree(&x2); CeedChk(ierr);
1156   ierr = CeedFree(&lambda); CeedChk(ierr);
1157 
1158   // -- Restriction
1159   CeedElemRestriction rstr_i;
1160   CeedInt strides[3] = {1, lsize, lsize*ncomp};
1161   ierr = CeedElemRestrictionCreateStrided(ceedparent, nelem, lsize, ncomp,
1162                                           lsize*nelem*ncomp, strides, &rstr_i);
1163   CeedChk(ierr);
1164   // -- QFunction
1165   CeedQFunction mass_qf;
1166   ierr = CeedQFunctionCreateInteriorByName(ceedparent, "MassApply", &mass_qf);
1167   CeedChk(ierr);
1168   // -- Operator
1169   ierr = CeedOperatorCreate(ceedparent, mass_qf, NULL, NULL, fdminv);
1170   CeedChk(ierr);
1171   CeedOperatorSetField(*fdminv, "u", rstr_i, fdm_basis, CEED_VECTOR_ACTIVE);
1172   CeedChk(ierr);
1173   CeedOperatorSetField(*fdminv, "qdata", rstr_i, CEED_BASIS_COLLOCATED, qdata);
1174   CeedChk(ierr);
1175   CeedOperatorSetField(*fdminv, "v", rstr_i, fdm_basis, CEED_VECTOR_ACTIVE);
1176   CeedChk(ierr);
1177 
1178   // Cleanup
1179   ierr = CeedVectorDestroy(&qdata); CeedChk(ierr);
1180   ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr);
1181   ierr = CeedElemRestrictionDestroy(&rstr_i); CeedChk(ierr);
1182   ierr = CeedQFunctionDestroy(&mass_qf); CeedChk(ierr);
1183 
1184   return 0;
1185 }
1186 
1187 //------------------------------------------------------------------------------
1188 // Operator Destroy
1189 //------------------------------------------------------------------------------
1190 static int CeedOperatorDestroy_Ref(CeedOperator op) {
1191   int ierr;
1192   CeedOperator_Ref *impl;
1193   ierr = CeedOperatorGetData(op, &impl); CeedChk(ierr);
1194 
1195   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
1196     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
1197   }
1198   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
1199   ierr = CeedFree(&impl->edata); CeedChk(ierr);
1200   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
1201 
1202   for (CeedInt i=0; i<impl->numein; i++) {
1203     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
1204     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
1205   }
1206   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
1207   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
1208 
1209   for (CeedInt i=0; i<impl->numeout; i++) {
1210     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
1211     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
1212   }
1213   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
1214   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
1215 
1216   ierr = CeedFree(&impl); CeedChk(ierr);
1217   return 0;
1218 }
1219 
1220 //------------------------------------------------------------------------------
1221 // Operator Create
1222 //------------------------------------------------------------------------------
1223 int CeedOperatorCreate_Ref(CeedOperator op) {
1224   int ierr;
1225   Ceed ceed;
1226   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
1227   CeedOperator_Ref *impl;
1228 
1229   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
1230   ierr = CeedOperatorSetData(op, impl); CeedChk(ierr);
1231 
1232   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
1233                                 CeedOperatorLinearAssembleQFunction_Ref);
1234   CeedChk(ierr);
1235   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleAddDiagonal",
1236                                 CeedOperatorLinearAssembleAddDiagonal_Ref);
1237   CeedChk(ierr);
1238   ierr = CeedSetBackendFunction(ceed, "Operator", op,
1239                                 "LinearAssembleAddPointBlockDiagonal",
1240                                 CeedOperatorLinearAssembleAddPointBlockDiagonal_Ref);
1241   CeedChk(ierr);
1242   ierr = CeedSetBackendFunction(ceed, "Operator", op, "CreateFDMElementInverse",
1243                                 CeedOperatorCreateFDMElementInverse_Ref);
1244   CeedChk(ierr);
1245   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
1246                                 CeedOperatorApplyAdd_Ref); CeedChk(ierr);
1247   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
1248                                 CeedOperatorDestroy_Ref); CeedChk(ierr);
1249   return 0;
1250 }
1251 
1252 //------------------------------------------------------------------------------
1253 // Composite Operator Create
1254 //------------------------------------------------------------------------------
1255 int CeedCompositeOperatorCreate_Ref(CeedOperator op) {
1256   int ierr;
1257   Ceed ceed;
1258   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
1259   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleAddDiagonal",
1260                                 CeedOperatorLinearAssembleAddDiagonal_Ref);
1261   CeedChk(ierr);
1262   ierr = CeedSetBackendFunction(ceed, "Operator", op,
1263                                 "LinearAssembleAddPointBlockDiagonal",
1264                                 CeedOperatorLinearAssembleAddPointBlockDiagonal_Ref);
1265   CeedChk(ierr);
1266   return 0;
1267 }
1268 //------------------------------------------------------------------------------
1269