xref: /libCEED/interface/ceed-preconditioning.c (revision 8605dc91c12ed1b57bc2955803e6481504bb85c1)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <ceed-impl.h>
20 #include <math.h>
21 #include <stdbool.h>
22 #include <stdio.h>
23 #include <string.h>
24 
25 /// @file
26 /// Implementation of CeedOperator preconditioning interfaces
27 
28 /// ----------------------------------------------------------------------------
29 /// CeedOperator Library Internal Preconditioning Functions
30 /// ----------------------------------------------------------------------------
31 /// @addtogroup CeedOperatorDeveloper
32 /// @{
33 
34 /**
35   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced
36          CeedOperator functionality
37 
38   @param op  CeedOperator to create fallback for
39 
40   @return An error code: 0 - success, otherwise - failure
41 
42   @ref Developer
43 **/
44 int CeedOperatorCreateFallback(CeedOperator op) {
45   int ierr;
46 
47   // Fallback Ceed
48   const char *resource, *fallback_resource;
49   ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
50   ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
51   CeedChk(ierr);
52   if (!strcmp(resource, fallback_resource))
53     // LCOV_EXCL_START
54     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
55                      "Backend %s cannot create an operator"
56                      "fallback to resource %s", resource, fallback_resource);
57   // LCOV_EXCL_STOP
58 
59   // Fallback Ceed
60   Ceed ceed_ref;
61   if (!op->ceed->op_fallback_ceed) {
62     ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr);
63     ceed_ref->op_fallback_parent = op->ceed;
64     ceed_ref->Error = op->ceed->Error;
65     op->ceed->op_fallback_ceed = ceed_ref;
66   }
67   ceed_ref = op->ceed->op_fallback_ceed;
68 
69   // Clone Op
70   CeedOperator op_ref;
71   ierr = CeedCalloc(1, &op_ref); CeedChk(ierr);
72   memcpy(op_ref, op, sizeof(*op_ref));
73   op_ref->data = NULL;
74   op_ref->is_interface_setup = false;
75   op_ref->is_backend_setup = false;
76   op_ref->ceed = ceed_ref;
77   ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr);
78   op->op_fallback = op_ref;
79 
80   // Clone QF
81   CeedQFunction qf_ref;
82   ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr);
83   memcpy(qf_ref, (op->qf), sizeof(*qf_ref));
84   qf_ref->data = NULL;
85   qf_ref->ceed = ceed_ref;
86   ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr);
87   op_ref->qf = qf_ref;
88   op->qf_fallback = qf_ref;
89   return CEED_ERROR_SUCCESS;
90 }
91 
92 /**
93   @brief Select correct basis matrix pointer based on CeedEvalMode
94 
95   @param[in] eval_mode   Current basis evaluation mode
96   @param[in] identity    Pointer to identity matrix
97   @param[in] interp      Pointer to interpolation matrix
98   @param[in] grad        Pointer to gradient matrix
99   @param[out] basis_ptr  Basis pointer to set
100 
101   @return none
102 
103   @ref Developer
104 **/
105 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode,
106     const CeedScalar *identity, const CeedScalar *interp,
107     const CeedScalar *grad, const CeedScalar **basis_ptr) {
108   switch (eval_mode) {
109   case CEED_EVAL_NONE:
110     *basis_ptr = identity;
111     break;
112   case CEED_EVAL_INTERP:
113     *basis_ptr = interp;
114     break;
115   case CEED_EVAL_GRAD:
116     *basis_ptr = grad;
117     break;
118   case CEED_EVAL_WEIGHT:
119   case CEED_EVAL_DIV:
120   case CEED_EVAL_CURL:
121     break; // Caught by QF Assembly
122   }
123 }
124 
125 /**
126   @brief Create point block restriction for active operator field
127 
128   @param[in] rstr              Original CeedElemRestriction for active field
129   @param[out] pointblock_rstr  Address of the variable where the newly created
130                                  CeedElemRestriction will be stored
131 
132   @return An error code: 0 - success, otherwise - failure
133 
134   @ref Developer
135 **/
136 static int CeedOperatorCreateActivePointBlockRestriction(
137   CeedElemRestriction rstr,
138   CeedElemRestriction *pointblock_rstr) {
139   int ierr;
140   Ceed ceed;
141   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr);
142   const CeedInt *offsets;
143   ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets);
144   CeedChk(ierr);
145 
146   // Expand offsets
147   CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1,
148                                                       *pointblock_offsets;
149   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
150   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
151   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
152   ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr);
153   CeedInt shift = num_comp;
154   if (comp_stride != 1)
155     shift *= num_comp;
156   ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets);
157   CeedChk(ierr);
158   for (CeedInt i = 0; i < num_elem*elem_size; i++) {
159     pointblock_offsets[i] = offsets[i]*shift;
160     if (pointblock_offsets[i] > max)
161       max = pointblock_offsets[i];
162   }
163 
164   // Create new restriction
165   ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp,
166                                    1, max + num_comp*num_comp, CEED_MEM_HOST,
167                                    CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr);
168   CeedChk(ierr);
169 
170   // Cleanup
171   ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr);
172 
173   return CEED_ERROR_SUCCESS;
174 }
175 
176 /**
177   @brief Core logic for assembling operator diagonal or point block diagonal
178 
179   @param[in] op             CeedOperator to assemble point block diagonal
180   @param[in] request        Address of CeedRequest for non-blocking completion, else
181                               CEED_REQUEST_IMMEDIATE
182   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
183   @param[out] assembled     CeedVector to store assembled diagonal
184 
185   @return An error code: 0 - success, otherwise - failure
186 
187   @ref Developer
188 **/
189 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op,
190     CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
191   int ierr;
192   Ceed ceed;
193   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
194 
195   // Assemble QFunction
196   CeedQFunction qf;
197   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
198   CeedInt num_input_fields, num_output_fields;
199   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
200   CeedChk(ierr);
201   CeedVector assembled_qf;
202   CeedElemRestriction rstr;
203   ierr = CeedOperatorLinearAssembleQFunction(op,  &assembled_qf, &rstr, request);
204   CeedChk(ierr);
205   CeedInt layout[3];
206   ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr);
207   ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr);
208   CeedScalar max_norm = 0;
209   ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
210 
211   // Determine active input basis
212   CeedOperatorField *op_fields;
213   CeedQFunctionField *qf_fields;
214   ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr);
215   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
216   CeedInt num_eval_mode_in = 0, num_comp, dim = 1;
217   CeedEvalMode *eval_mode_in = NULL;
218   CeedBasis basis_in = NULL;
219   CeedElemRestriction rstr_in = NULL;
220   for (CeedInt i=0; i<num_input_fields; i++) {
221     CeedVector vec;
222     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
223     if (vec == CEED_VECTOR_ACTIVE) {
224       CeedElemRestriction rstr;
225       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr);
226       ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr);
227       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
228       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
229       if (rstr_in && rstr_in != rstr)
230         // LCOV_EXCL_START
231         return CeedError(ceed, CEED_ERROR_BACKEND,
232                          "Multi-field non-composite operator diagonal assembly not supported");
233       // LCOV_EXCL_STOP
234       rstr_in = rstr;
235       CeedEvalMode eval_mode;
236       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
237       switch (eval_mode) {
238       case CEED_EVAL_NONE:
239       case CEED_EVAL_INTERP:
240         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
241         eval_mode_in[num_eval_mode_in] = eval_mode;
242         num_eval_mode_in += 1;
243         break;
244       case CEED_EVAL_GRAD:
245         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
246         for (CeedInt d=0; d<dim; d++)
247           eval_mode_in[num_eval_mode_in+d] = eval_mode;
248         num_eval_mode_in += dim;
249         break;
250       case CEED_EVAL_WEIGHT:
251       case CEED_EVAL_DIV:
252       case CEED_EVAL_CURL:
253         break; // Caught by QF Assembly
254       }
255     }
256   }
257 
258   // Determine active output basis
259   ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr);
260   ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr);
261   CeedInt num_eval_mode_out = 0;
262   CeedEvalMode *eval_mode_out = NULL;
263   CeedBasis basis_out = NULL;
264   CeedElemRestriction rstr_out = NULL;
265   for (CeedInt i=0; i<num_output_fields; i++) {
266     CeedVector vec;
267     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
268     if (vec == CEED_VECTOR_ACTIVE) {
269       CeedElemRestriction rstr;
270       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr);
271       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
272       if (rstr_out && rstr_out != rstr)
273         // LCOV_EXCL_START
274         return CeedError(ceed, CEED_ERROR_BACKEND,
275                          "Multi-field non-composite operator diagonal assembly not supported");
276       // LCOV_EXCL_STOP
277       rstr_out = rstr;
278       CeedEvalMode eval_mode;
279       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
280       switch (eval_mode) {
281       case CEED_EVAL_NONE:
282       case CEED_EVAL_INTERP:
283         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
284         eval_mode_out[num_eval_mode_out] = eval_mode;
285         num_eval_mode_out += 1;
286         break;
287       case CEED_EVAL_GRAD:
288         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
289         for (CeedInt d=0; d<dim; d++)
290           eval_mode_out[num_eval_mode_out+d] = eval_mode;
291         num_eval_mode_out += dim;
292         break;
293       case CEED_EVAL_WEIGHT:
294       case CEED_EVAL_DIV:
295       case CEED_EVAL_CURL:
296         break; // Caught by QF Assembly
297       }
298     }
299   }
300 
301   // Assemble point block diagonal restriction, if needed
302   CeedElemRestriction diag_rstr = rstr_out;
303   if (is_pointblock) {
304     ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr);
305     CeedChk(ierr);
306   }
307 
308   // Create diagonal vector
309   CeedVector elem_diag;
310   ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag);
311   CeedChk(ierr);
312 
313   // Assemble element operator diagonals
314   CeedScalar *elem_diag_array, *assembled_qf_array;
315   ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr);
316   ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array);
317   CeedChk(ierr);
318   ierr = CeedVectorGetArray(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
319   CeedChk(ierr);
320   CeedInt num_elem, num_nodes, num_qpts;
321   ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr);
322   ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr);
323   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
324   // Basis matrices
325   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
326   CeedScalar *identity = NULL;
327   bool evalNone = false;
328   for (CeedInt i=0; i<num_eval_mode_in; i++)
329     evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE);
330   for (CeedInt i=0; i<num_eval_mode_out; i++)
331     evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE);
332   if (evalNone) {
333     ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr);
334     for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++)
335       identity[i*num_nodes+i] = 1.0;
336   }
337   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
338   ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr);
339   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
340   ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr);
341   // Compute the diagonal of B^T D B
342   // Each element
343   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
344   for (CeedInt e=0; e<num_elem; e++) {
345     CeedInt d_out = -1;
346     // Each basis eval mode pair
347     for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) {
348       const CeedScalar *bt = NULL;
349       if (eval_mode_out[e_out] == CEED_EVAL_GRAD)
350         d_out += 1;
351       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out,
352                                   &grad_out[d_out*num_qpts*num_nodes], &bt);
353       CeedInt d_in = -1;
354       for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) {
355         const CeedScalar *b = NULL;
356         if (eval_mode_in[e_in] == CEED_EVAL_GRAD)
357           d_in += 1;
358         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in,
359                                     &grad_in[d_in*num_qpts*num_nodes], &b);
360         // Each component
361         for (CeedInt c_out=0; c_out<num_comp; c_out++)
362           // Each qpoint/node pair
363           for (CeedInt q=0; q<num_qpts; q++)
364             if (is_pointblock) {
365               // Point Block Diagonal
366               for (CeedInt c_in=0; c_in<num_comp; c_in++) {
367                 const CeedScalar qf_value =
368                   assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)*
369                                                      num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
370                 if (fabs(qf_value) > qf_value_bound)
371                   for (CeedInt n=0; n<num_nodes; n++)
372                     elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] +=
373                       bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
374               }
375             } else {
376               // Diagonal Only
377               const CeedScalar qf_value =
378                 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)*
379                                                    num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
380               if (fabs(qf_value) > qf_value_bound)
381                 for (CeedInt n=0; n<num_nodes; n++)
382                   elem_diag_array[(e*num_comp+c_out)*num_nodes+n] +=
383                     bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
384             }
385       }
386     }
387   }
388   ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr);
389   ierr = CeedVectorRestoreArray(assembled_qf, &assembled_qf_array); CeedChk(ierr);
390 
391   // Assemble local operator diagonal
392   ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag,
393                                   assembled, request); CeedChk(ierr);
394 
395   // Cleanup
396   if (is_pointblock) {
397     ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr);
398   }
399   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
400   ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr);
401   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
402   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
403   ierr = CeedFree(&identity); CeedChk(ierr);
404 
405   return CEED_ERROR_SUCCESS;
406 }
407 
408 /**
409   @brief Core logic for assembling composite operator diagonal
410 
411   @param[in] op             CeedOperator to assemble point block diagonal
412   @param[in] request        Address of CeedRequest for non-blocking completion, else
413                             CEED_REQUEST_IMMEDIATE
414   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
415   @param[out] assembled     CeedVector to store assembled diagonal
416 
417   @return An error code: 0 - success, otherwise - failure
418 
419   @ref Developer
420 **/
421 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(
422   CeedOperator op, CeedRequest *request, const bool is_pointblock,
423   CeedVector assembled) {
424   int ierr;
425   CeedInt num_sub;
426   CeedOperator *suboperators;
427   ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr);
428   ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr);
429   for (CeedInt i = 0; i < num_sub; i++) {
430     ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request,
431            is_pointblock, assembled); CeedChk(ierr);
432   }
433   return CEED_ERROR_SUCCESS;
434 }
435 
436 /**
437   @brief Build nonzero pattern for non-composite operator
438 
439   Users should generally use CeedOperatorLinearAssembleSymbolic()
440 
441   @param[in] op      CeedOperator to assemble nonzero pattern
442   @param[in] offset  Offset for number of entries
443   @param[out] rows   Row number for each entry
444   @param[out] cols   Column number for each entry
445 
446   @return An error code: 0 - success, otherwise - failure
447 
448   @ref Developer
449 **/
450 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset,
451     CeedInt *rows, CeedInt *cols) {
452   int ierr;
453   Ceed ceed = op->ceed;
454   if (op->is_composite)
455     // LCOV_EXCL_START
456     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
457                      "Composite operator not supported");
458   // LCOV_EXCL_STOP
459 
460   CeedElemRestriction rstr_in;
461   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr);
462   CeedInt num_elem, elem_size, num_nodes, num_comp;
463   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
464   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
465   ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr);
466   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
467   CeedInt layout_er[3];
468   ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr);
469 
470   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
471 
472   // Determine elem_dof relation
473   CeedVector index_vec;
474   ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr);
475   CeedScalar *array;
476   ierr = CeedVectorGetArray(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr);
477   for (CeedInt i = 0; i < num_nodes; ++i) {
478     array[i] = i;
479   }
480   ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr);
481   CeedVector elem_dof;
482   ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof);
483   CeedChk(ierr);
484   ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr);
485   CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec,
486                            elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
487   const CeedScalar *elem_dof_a;
488   ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a);
489   CeedChk(ierr);
490   ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr);
491 
492   // Determine i, j locations for element matrices
493   CeedInt count = 0;
494   for (int e = 0; e < num_elem; ++e) {
495     for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
496       for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
497         for (int i = 0; i < elem_size; ++i) {
498           for (int j = 0; j < elem_size; ++j) {
499             const CeedInt elem_dof_index_row = (i)*layout_er[0] +
500                                                (comp_out)*layout_er[1] + e*layout_er[2];
501             const CeedInt elem_dof_index_col = (j)*layout_er[0] +
502                                                (comp_in)*layout_er[1] + e*layout_er[2];
503 
504             const CeedInt row = elem_dof_a[elem_dof_index_row];
505             const CeedInt col = elem_dof_a[elem_dof_index_col];
506 
507             rows[offset + count] = row;
508             cols[offset + count] = col;
509             count++;
510           }
511         }
512       }
513     }
514   }
515   if (count != local_num_entries)
516     // LCOV_EXCL_START
517     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
518   // LCOV_EXCL_STOP
519   ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr);
520   ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr);
521 
522   return CEED_ERROR_SUCCESS;
523 }
524 
525 /**
526   @brief Assemble nonzero entries for non-composite operator
527 
528   Users should generally use CeedOperatorLinearAssemble()
529 
530   @param[in] op       CeedOperator to assemble
531   @param[out] offset  Offest for number of entries
532   @param[out] values  Values to assemble into matrix
533 
534   @return An error code: 0 - success, otherwise - failure
535 
536   @ref Developer
537 **/
538 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
539                                       CeedVector values) {
540   int ierr;
541   Ceed ceed = op->ceed;
542   if (op->is_composite)
543     // LCOV_EXCL_START
544     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
545                      "Composite operator not supported");
546   // LCOV_EXCL_STOP
547 
548   // Assemble QFunction
549   CeedQFunction qf;
550   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
551   CeedVector assembled_qf;
552   CeedElemRestriction rstr_q;
553   ierr = CeedOperatorLinearAssembleQFunction(
554            op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
555 
556   CeedInt qf_length;
557   ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr);
558 
559   CeedInt num_input_fields, num_output_fields;
560   CeedOperatorField *input_fields;
561   CeedOperatorField *output_fields;
562   ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields,
563                                &num_output_fields, &output_fields); CeedChk(ierr);
564 
565   // Determine active input basis
566   CeedQFunctionField *qf_fields;
567   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
568   CeedInt num_eval_mode_in = 0, dim = 1;
569   CeedEvalMode *eval_mode_in = NULL;
570   CeedBasis basis_in = NULL;
571   CeedElemRestriction rstr_in = NULL;
572   for (CeedInt i=0; i<num_input_fields; i++) {
573     CeedVector vec;
574     ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr);
575     if (vec == CEED_VECTOR_ACTIVE) {
576       ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in);
577       CeedChk(ierr);
578       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
579       ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in);
580       CeedChk(ierr);
581       CeedEvalMode eval_mode;
582       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
583       CeedChk(ierr);
584       switch (eval_mode) {
585       case CEED_EVAL_NONE:
586       case CEED_EVAL_INTERP:
587         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
588         eval_mode_in[num_eval_mode_in] = eval_mode;
589         num_eval_mode_in += 1;
590         break;
591       case CEED_EVAL_GRAD:
592         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
593         for (CeedInt d=0; d<dim; d++) {
594           eval_mode_in[num_eval_mode_in+d] = eval_mode;
595         }
596         num_eval_mode_in += dim;
597         break;
598       case CEED_EVAL_WEIGHT:
599       case CEED_EVAL_DIV:
600       case CEED_EVAL_CURL:
601         break; // Caught by QF Assembly
602       }
603     }
604   }
605 
606   // Determine active output basis
607   ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr);
608   CeedInt num_eval_mode_out = 0;
609   CeedEvalMode *eval_mode_out = NULL;
610   CeedBasis basis_out = NULL;
611   CeedElemRestriction rstr_out = NULL;
612   for (CeedInt i=0; i<num_output_fields; i++) {
613     CeedVector vec;
614     ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr);
615     if (vec == CEED_VECTOR_ACTIVE) {
616       ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out);
617       CeedChk(ierr);
618       ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out);
619       CeedChk(ierr);
620       CeedEvalMode eval_mode;
621       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
622       CeedChk(ierr);
623       switch (eval_mode) {
624       case CEED_EVAL_NONE:
625       case CEED_EVAL_INTERP:
626         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
627         eval_mode_out[num_eval_mode_out] = eval_mode;
628         num_eval_mode_out += 1;
629         break;
630       case CEED_EVAL_GRAD:
631         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
632         for (CeedInt d=0; d<dim; d++) {
633           eval_mode_out[num_eval_mode_out+d] = eval_mode;
634         }
635         num_eval_mode_out += dim;
636         break;
637       case CEED_EVAL_WEIGHT:
638       case CEED_EVAL_DIV:
639       case CEED_EVAL_CURL:
640         break; // Caught by QF Assembly
641       }
642     }
643   }
644 
645   if (num_eval_mode_in == 0 || num_eval_mode_out == 0)
646     // LCOV_EXCL_START
647     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
648                      "Cannot assemble operator with out inputs/outputs");
649   // LCOV_EXCL_STOP
650 
651   CeedInt num_elem, elem_size, num_qpts, num_comp;
652   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
653   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
654   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
655   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
656 
657   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
658 
659   // loop over elements and put in data structure
660   const CeedScalar *interp_in, *grad_in;
661   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
662   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
663 
664   const CeedScalar *assembled_qf_array;
665   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
666   CeedChk(ierr);
667 
668   CeedInt layout_qf[3];
669   ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr);
670   ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr);
671 
672   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
673   CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size];
674   CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size];
675   CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in *
676                                      num_qpts]; // logically 3-tensor
677   CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in];
678   CeedScalar elem_mat[elem_size * elem_size];
679   int count = 0;
680   CeedScalar *vals;
681   ierr = CeedVectorGetArray(values, CEED_MEM_HOST, &vals); CeedChk(ierr);
682   for (int e = 0; e < num_elem; ++e) {
683     for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
684       for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
685         for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) {
686           B_mat_in[ell] = 0.0;
687         }
688         for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) {
689           B_mat_out[ell] = 0.0;
690         }
691         // Store block-diagonal D matrix as collection of small dense blocks
692         for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) {
693           D_mat[ell] = 0.0;
694         }
695         // form element matrix itself (for each block component)
696         for (int ell = 0; ell < elem_size*elem_size; ++ell) {
697           elem_mat[ell] = 0.0;
698         }
699         for (int q = 0; q < num_qpts; ++q) {
700           for (int n = 0; n < elem_size; ++n) {
701             CeedInt d_in = -1;
702             for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) {
703               const int qq = num_eval_mode_in*q;
704               if (eval_mode_in[e_in] == CEED_EVAL_INTERP) {
705                 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n];
706               } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) {
707                 d_in += 1;
708                 B_mat_in[(qq+e_in)*elem_size + n] +=
709                   grad_in[(d_in*num_qpts+q) * elem_size + n];
710               } else {
711                 // LCOV_EXCL_START
712                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
713                 // LCOV_EXCL_STOP
714               }
715             }
716             CeedInt d_out = -1;
717             for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) {
718               const int qq = num_eval_mode_out*q;
719               if (eval_mode_out[e_out] == CEED_EVAL_INTERP) {
720                 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n];
721               } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) {
722                 d_out += 1;
723                 B_mat_out[(qq+e_out)*elem_size + n] +=
724                   grad_in[(d_out*num_qpts+q) * elem_size + n];
725               } else {
726                 // LCOV_EXCL_START
727                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
728                 // LCOV_EXCL_STOP
729               }
730             }
731           }
732           for (int ei = 0; ei < num_eval_mode_out; ++ei) {
733             for (int ej = 0; ej < num_eval_mode_in; ++ej) {
734               const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp
735                                           +comp_out;
736               const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] +
737                                 e*layout_qf[2];
738               D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index];
739             }
740           }
741         }
742         // Compute B^T*D
743         for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) {
744           BTD[ell] = 0.0;
745         }
746         for (int j = 0; j<elem_size; ++j) {
747           for (int q = 0; q<num_qpts; ++q) {
748             int qq = num_eval_mode_out*q;
749             for (int ei = 0; ei < num_eval_mode_in; ++ei) {
750               for (int ej = 0; ej < num_eval_mode_out; ++ej) {
751                 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] +=
752                   B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q];
753               }
754             }
755           }
756         }
757 
758         ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size,
759                                   elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr);
760 
761         // put element matrix in coordinate data structure
762         for (int i = 0; i < elem_size; ++i) {
763           for (int j = 0; j < elem_size; ++j) {
764             vals[offset + count] = elem_mat[i*elem_size + j];
765             count++;
766           }
767         }
768       }
769     }
770   }
771   if (count != local_num_entries)
772     // LCOV_EXCL_START
773     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
774   // LCOV_EXCL_STOP
775   ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr);
776 
777   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
778   CeedChk(ierr);
779   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
780   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
781   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
782 
783   return CEED_ERROR_SUCCESS;
784 }
785 
786 /**
787   @brief Count number of entries for assembled CeedOperator
788 
789   @param[in] op            CeedOperator to assemble
790   @param[out] num_entries  Number of entries in assembled representation
791 
792   @return An error code: 0 - success, otherwise - failure
793 
794   @ref Utility
795 **/
796 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op,
797     CeedInt *num_entries) {
798   int ierr;
799   CeedElemRestriction rstr;
800   CeedInt num_elem, elem_size, num_comp;
801 
802   if (op->is_composite)
803     // LCOV_EXCL_START
804     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
805                      "Composite operator not supported");
806   // LCOV_EXCL_STOP
807   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr);
808   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
809   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
810   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
811   *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
812 
813   return CEED_ERROR_SUCCESS;
814 }
815 
816 /**
817   @brief Common code for creating a multigrid coarse operator and level
818            transfer operators for a CeedOperator
819 
820   @param[in] op_fine       Fine grid operator
821   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
822   @param[in] rstr_coarse   Coarse grid restriction
823   @param[in] basis_coarse  Coarse grid active vector basis
824   @param[in] basis_c_to_f  Basis for coarse to fine interpolation
825   @param[out] op_coarse    Coarse grid operator
826   @param[out] op_prolong   Coarse to fine operator
827   @param[out] op_restrict  Fine to coarse operator
828 
829   @return An error code: 0 - success, otherwise - failure
830 
831   @ref Developer
832 **/
833 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
834     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
835     CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
836     CeedOperator *op_restrict) {
837   int ierr;
838   Ceed ceed;
839   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
840 
841   // Check for composite operator
842   bool is_composite;
843   ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr);
844   if (is_composite)
845     // LCOV_EXCL_START
846     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
847                      "Automatic multigrid setup for composite operators not supported");
848   // LCOV_EXCL_STOP
849 
850   // Coarse Grid
851   ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT,
852                             op_coarse); CeedChk(ierr);
853   CeedElemRestriction rstr_fine = NULL;
854   // -- Clone input fields
855   for (int i = 0; i < op_fine->qf->num_input_fields; i++) {
856     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
857       rstr_fine = op_fine->input_fields[i]->elem_restr;
858       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
859                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
860       CeedChk(ierr);
861     } else {
862       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
863                                   op_fine->input_fields[i]->elem_restr,
864                                   op_fine->input_fields[i]->basis,
865                                   op_fine->input_fields[i]->vec); CeedChk(ierr);
866     }
867   }
868   // -- Clone output fields
869   for (int i = 0; i < op_fine->qf->num_output_fields; i++) {
870     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
871       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
872                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
873       CeedChk(ierr);
874     } else {
875       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
876                                   op_fine->output_fields[i]->elem_restr,
877                                   op_fine->output_fields[i]->basis,
878                                   op_fine->output_fields[i]->vec); CeedChk(ierr);
879     }
880   }
881 
882   // Multiplicity vector
883   CeedVector mult_vec, mult_e_vec;
884   ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec);
885   CeedChk(ierr);
886   ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr);
887   ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine,
888                                   mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
889   ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr);
890   ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec,
891                                   CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
892   ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr);
893   ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr);
894 
895   // Restriction
896   CeedInt num_comp;
897   ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr);
898   CeedQFunction qf_restrict;
899   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict);
900   CeedChk(ierr);
901   CeedInt *num_comp_r_data;
902   ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr);
903   num_comp_r_data[0] = num_comp;
904   CeedQFunctionContext ctx_r;
905   ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr);
906   ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER,
907                                      sizeof(*num_comp_r_data), num_comp_r_data);
908   CeedChk(ierr);
909   ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr);
910   ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr);
911   ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE);
912   CeedChk(ierr);
913   ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE);
914   CeedChk(ierr);
915   ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp,
916                                 CEED_EVAL_INTERP); CeedChk(ierr);
917 
918   ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE,
919                             CEED_QFUNCTION_NONE, op_restrict);
920   CeedChk(ierr);
921   ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine,
922                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
923   CeedChk(ierr);
924   ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine,
925                               CEED_BASIS_COLLOCATED, mult_vec);
926   CeedChk(ierr);
927   ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f,
928                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
929 
930   // Prolongation
931   CeedQFunction qf_prolong;
932   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong);
933   CeedChk(ierr);
934   CeedInt *num_comp_p_data;
935   ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr);
936   num_comp_p_data[0] = num_comp;
937   CeedQFunctionContext ctx_p;
938   ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr);
939   ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER,
940                                      sizeof(*num_comp_p_data), num_comp_p_data);
941   CeedChk(ierr);
942   ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr);
943   ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr);
944   ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP);
945   CeedChk(ierr);
946   ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE);
947   CeedChk(ierr);
948   ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE);
949   CeedChk(ierr);
950 
951   ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE,
952                             CEED_QFUNCTION_NONE, op_prolong);
953   CeedChk(ierr);
954   ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f,
955                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
956   ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine,
957                               CEED_BASIS_COLLOCATED, mult_vec);
958   CeedChk(ierr);
959   ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine,
960                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
961   CeedChk(ierr);
962 
963   // Cleanup
964   ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr);
965   ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr);
966   ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr);
967   ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr);
968   return CEED_ERROR_SUCCESS;
969 }
970 
971 /**
972   @brief Build 1D mass matrix and Laplacian with perturbation
973 
974   @param[in] interp_1d    Interpolation matrix in one dimension
975   @param[in] grad_1d      Gradient matrix in one dimension
976   @param[in] q_weight_1d  Quadrature weights in one dimension
977   @param[in] P_1d         Number of basis nodes in one dimension
978   @param[in] Q_1d         Number of quadrature points in one dimension
979   @param[in] dim          Dimension of basis
980   @param[out] mass        Assembled mass matrix in one dimension
981   @param[out] laplace     Assembled perturbed Laplacian in one dimension
982 
983   @return An error code: 0 - success, otherwise - failure
984 
985   @ref Developer
986 **/
987 CeedPragmaOptimizeOff
988 static int CeedBuildMassLaplace(const CeedScalar *interp_1d,
989                                 const CeedScalar *grad_1d,
990                                 const CeedScalar *q_weight_1d, CeedInt P_1d,
991                                 CeedInt Q_1d, CeedInt dim,
992                                 CeedScalar *mass, CeedScalar *laplace) {
993 
994   for (CeedInt i=0; i<P_1d; i++)
995     for (CeedInt j=0; j<P_1d; j++) {
996       CeedScalar sum = 0.0;
997       for (CeedInt k=0; k<Q_1d; k++)
998         sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j];
999       mass[i+j*P_1d] = sum;
1000     }
1001   // -- Laplacian
1002   for (CeedInt i=0; i<P_1d; i++)
1003     for (CeedInt j=0; j<P_1d; j++) {
1004       CeedScalar sum = 0.0;
1005       for (CeedInt k=0; k<Q_1d; k++)
1006         sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j];
1007       laplace[i+j*P_1d] = sum;
1008     }
1009   CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4;
1010   for (CeedInt i=0; i<P_1d; i++)
1011     laplace[i+P_1d*i] += perturbation;
1012   return CEED_ERROR_SUCCESS;
1013 }
1014 CeedPragmaOptimizeOn
1015 
1016 /// @}
1017 
1018 /// ----------------------------------------------------------------------------
1019 /// CeedOperator Public API
1020 /// ----------------------------------------------------------------------------
1021 /// @addtogroup CeedOperatorUser
1022 /// @{
1023 
1024 /**
1025   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1026 
1027   This returns a CeedVector containing a matrix at each quadrature point
1028     providing the action of the CeedQFunction associated with the CeedOperator.
1029     The vector 'assembled' is of shape
1030       [num_elements, num_input_fields, num_output_fields, num_quad_points]
1031     and contains column-major matrices representing the action of the
1032     CeedQFunction for a corresponding quadrature point on an element. Inputs and
1033     outputs are in the order provided by the user when adding CeedOperator fields.
1034     For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and
1035     'v', provided in that order, would result in an assembled QFunction that
1036     consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting
1037     on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1038 
1039   Note: Calling this function asserts that setup is complete
1040           and sets the CeedOperator as immutable.
1041 
1042   @param op              CeedOperator to assemble CeedQFunction
1043   @param[out] assembled  CeedVector to store assembled CeedQFunction at
1044                            quadrature points
1045   @param[out] rstr       CeedElemRestriction for CeedVector containing assembled
1046                            CeedQFunction
1047   @param request         Address of CeedRequest for non-blocking completion, else
1048                            @ref CEED_REQUEST_IMMEDIATE
1049 
1050   @return An error code: 0 - success, otherwise - failure
1051 
1052   @ref User
1053 **/
1054 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled,
1055                                         CeedElemRestriction *rstr,
1056                                         CeedRequest *request) {
1057   int ierr;
1058   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1059 
1060   // Backend version
1061   if (op->LinearAssembleQFunction) {
1062     ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); CeedChk(ierr);
1063     return CEED_ERROR_SUCCESS;
1064   } else {
1065     // Fallback to reference Ceed
1066     if (!op->op_fallback) {
1067       ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1068     }
1069     // Assemble
1070     ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled,
1071            rstr, request); CeedChk(ierr);
1072     return CEED_ERROR_SUCCESS;
1073   }
1074 }
1075 
1076 /**
1077   @brief Assemble the diagonal of a square linear CeedOperator
1078 
1079   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1080 
1081   Note: Currently only non-composite CeedOperators with a single field and
1082           composite CeedOperators with single field sub-operators are supported.
1083 
1084   Note: Calling this function asserts that setup is complete
1085           and sets the CeedOperator as immutable.
1086 
1087   @param op              CeedOperator to assemble CeedQFunction
1088   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1089   @param request         Address of CeedRequest for non-blocking completion, else
1090                            @ref CEED_REQUEST_IMMEDIATE
1091 
1092   @return An error code: 0 - success, otherwise - failure
1093 
1094   @ref User
1095 **/
1096 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled,
1097                                        CeedRequest *request) {
1098   int ierr;
1099   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1100 
1101   // Use backend version, if available
1102   if (op->LinearAssembleDiagonal) {
1103     ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr);
1104     return CEED_ERROR_SUCCESS;
1105   } else if (op->LinearAssembleAddDiagonal) {
1106     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1107     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1108     return CEED_ERROR_SUCCESS;
1109   } else {
1110     // Check for valid fallback resource
1111     const char *resource, *fallback_resource;
1112     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1113     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1114     CeedChk(ierr);
1115     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1116       // Fallback to reference Ceed
1117       if (!op->op_fallback) {
1118         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1119       }
1120       // Assemble
1121       ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request);
1122       CeedChk(ierr);
1123       return CEED_ERROR_SUCCESS;
1124     }
1125   }
1126 
1127   // Default interface implementation
1128   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1129   ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request);
1130   CeedChk(ierr);
1131   return CEED_ERROR_SUCCESS;
1132 }
1133 
1134 /**
1135   @brief Assemble the diagonal of a square linear CeedOperator
1136 
1137   This sums into a CeedVector the diagonal of a linear CeedOperator.
1138 
1139   Note: Currently only non-composite CeedOperators with a single field and
1140           composite CeedOperators with single field sub-operators are supported.
1141 
1142   Note: Calling this function asserts that setup is complete
1143           and sets the CeedOperator as immutable.
1144 
1145   @param op              CeedOperator to assemble CeedQFunction
1146   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1147   @param request         Address of CeedRequest for non-blocking completion, else
1148                            @ref CEED_REQUEST_IMMEDIATE
1149 
1150   @return An error code: 0 - success, otherwise - failure
1151 
1152   @ref User
1153 **/
1154 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled,
1155     CeedRequest *request) {
1156   int ierr;
1157   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1158 
1159   // Use backend version, if available
1160   if (op->LinearAssembleAddDiagonal) {
1161     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1162     return CEED_ERROR_SUCCESS;
1163   } else {
1164     // Check for valid fallback resource
1165     const char *resource, *fallback_resource;
1166     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1167     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1168     CeedChk(ierr);
1169     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1170       // Fallback to reference Ceed
1171       if (!op->op_fallback) {
1172         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1173       }
1174       // Assemble
1175       ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled,
1176              request); CeedChk(ierr);
1177       return CEED_ERROR_SUCCESS;
1178     }
1179   }
1180 
1181   // Default interface implementation
1182   bool is_composite;
1183   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1184   if (is_composite) {
1185     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1186            false, assembled); CeedChk(ierr);
1187     return CEED_ERROR_SUCCESS;
1188   } else {
1189     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled);
1190     CeedChk(ierr);
1191     return CEED_ERROR_SUCCESS;
1192   }
1193 }
1194 
1195 /**
1196   @brief Assemble the point block diagonal of a square linear CeedOperator
1197 
1198   This overwrites a CeedVector with the point block diagonal of a linear
1199     CeedOperator.
1200 
1201   Note: Currently only non-composite CeedOperators with a single field and
1202           composite CeedOperators with single field sub-operators are supported.
1203 
1204   Note: Calling this function asserts that setup is complete
1205           and sets the CeedOperator as immutable.
1206 
1207   @param op              CeedOperator to assemble CeedQFunction
1208   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1209                            diagonal, provided in row-major form with an
1210                            @a num_comp * @a num_comp block at each node. The dimensions
1211                            of this vector are derived from the active vector
1212                            for the CeedOperator. The array has shape
1213                            [nodes, component out, component in].
1214   @param request         Address of CeedRequest for non-blocking completion, else
1215                            CEED_REQUEST_IMMEDIATE
1216 
1217   @return An error code: 0 - success, otherwise - failure
1218 
1219   @ref User
1220 **/
1221 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op,
1222     CeedVector assembled, CeedRequest *request) {
1223   int ierr;
1224   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1225 
1226   // Use backend version, if available
1227   if (op->LinearAssemblePointBlockDiagonal) {
1228     ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request);
1229     CeedChk(ierr);
1230     return CEED_ERROR_SUCCESS;
1231   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1232     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1233     ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled,
1234            request); CeedChk(ierr);
1235     return CEED_ERROR_SUCCESS;
1236   } else {
1237     // Check for valid fallback resource
1238     const char *resource, *fallback_resource;
1239     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1240     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1241     CeedChk(ierr);
1242     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1243       // Fallback to reference Ceed
1244       if (!op->op_fallback) {
1245         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1246       }
1247       // Assemble
1248       ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback,
1249              assembled, request); CeedChk(ierr);
1250       return CEED_ERROR_SUCCESS;
1251     }
1252   }
1253 
1254   // Default interface implementation
1255   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1256   ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request);
1257   CeedChk(ierr);
1258   return CEED_ERROR_SUCCESS;
1259 }
1260 
1261 /**
1262   @brief Assemble the point block diagonal of a square linear CeedOperator
1263 
1264   This sums into a CeedVector with the point block diagonal of a linear
1265     CeedOperator.
1266 
1267   Note: Currently only non-composite CeedOperators with a single field and
1268           composite CeedOperators with single field sub-operators are supported.
1269 
1270   Note: Calling this function asserts that setup is complete
1271           and sets the CeedOperator as immutable.
1272 
1273   @param op              CeedOperator to assemble CeedQFunction
1274   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1275                            diagonal, provided in row-major form with an
1276                            @a num_comp * @a num_comp block at each node. The dimensions
1277                            of this vector are derived from the active vector
1278                            for the CeedOperator. The array has shape
1279                            [nodes, component out, component in].
1280   @param request         Address of CeedRequest for non-blocking completion, else
1281                            CEED_REQUEST_IMMEDIATE
1282 
1283   @return An error code: 0 - success, otherwise - failure
1284 
1285   @ref User
1286 **/
1287 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op,
1288     CeedVector assembled, CeedRequest *request) {
1289   int ierr;
1290   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1291 
1292   // Use backend version, if available
1293   if (op->LinearAssembleAddPointBlockDiagonal) {
1294     ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request);
1295     CeedChk(ierr);
1296     return CEED_ERROR_SUCCESS;
1297   } else {
1298     // Check for valid fallback resource
1299     const char *resource, *fallback_resource;
1300     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1301     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1302     CeedChk(ierr);
1303     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1304       // Fallback to reference Ceed
1305       if (!op->op_fallback) {
1306         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1307       }
1308       // Assemble
1309       ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback,
1310              assembled, request); CeedChk(ierr);
1311       return CEED_ERROR_SUCCESS;
1312     }
1313   }
1314 
1315   // Default interface implemenation
1316   bool is_composite;
1317   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1318   if (is_composite) {
1319     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1320            true, assembled); CeedChk(ierr);
1321     return CEED_ERROR_SUCCESS;
1322   } else {
1323     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled);
1324     CeedChk(ierr);
1325     return CEED_ERROR_SUCCESS;
1326   }
1327 }
1328 
1329 /**
1330    @brief Fully assemble the nonzero pattern of a linear operator.
1331 
1332    Expected to be used in conjunction with CeedOperatorLinearAssemble()
1333 
1334    The assembly routines use coordinate format, with num_entries tuples of the
1335    form (i, j, value) which indicate that value should be added to the matrix
1336    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1337    This function returns the number of entries and their (i, j) locations,
1338    while CeedOperatorLinearAssemble() provides the values in the same
1339    ordering.
1340 
1341    This will generally be slow unless your operator is low-order.
1342 
1343   Note: Calling this function asserts that setup is complete
1344           and sets the CeedOperator as immutable.
1345 
1346    @param[in]  op           CeedOperator to assemble
1347    @param[out] num_entries  Number of entries in coordinate nonzero pattern
1348    @param[out] rows         Row number for each entry
1349    @param[out] cols         Column number for each entry
1350 
1351    @ref User
1352 **/
1353 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedInt *num_entries,
1354                                        CeedInt **rows, CeedInt **cols) {
1355   int ierr;
1356   CeedInt num_suboperators, single_entries;
1357   CeedOperator *sub_operators;
1358   bool is_composite;
1359   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1360 
1361   // Use backend version, if available
1362   if (op->LinearAssembleSymbolic) {
1363     ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr);
1364     return CEED_ERROR_SUCCESS;
1365   } else {
1366     // Check for valid fallback resource
1367     const char *resource, *fallback_resource;
1368     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1369     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1370     CeedChk(ierr);
1371     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1372       // Fallback to reference Ceed
1373       if (!op->op_fallback) {
1374         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1375       }
1376       // Assemble
1377       ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows,
1378              cols); CeedChk(ierr);
1379       return CEED_ERROR_SUCCESS;
1380     }
1381   }
1382 
1383   // Default interface implementation
1384 
1385   // count entries and allocate rows, cols arrays
1386   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1387   *num_entries = 0;
1388   if (is_composite) {
1389     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1390     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1391     for (int k = 0; k < num_suboperators; ++k) {
1392       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1393              &single_entries); CeedChk(ierr);
1394       *num_entries += single_entries;
1395     }
1396   } else {
1397     ierr = CeedSingleOperatorAssemblyCountEntries(op,
1398            &single_entries); CeedChk(ierr);
1399     *num_entries += single_entries;
1400   }
1401   ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr);
1402   ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr);
1403 
1404   // assemble nonzero locations
1405   CeedInt offset = 0;
1406   if (is_composite) {
1407     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1408     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1409     for (int k = 0; k < num_suboperators; ++k) {
1410       ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows,
1411              *cols); CeedChk(ierr);
1412       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1413              &single_entries);
1414       CeedChk(ierr);
1415       offset += single_entries;
1416     }
1417   } else {
1418     ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols);
1419     CeedChk(ierr);
1420   }
1421 
1422   return CEED_ERROR_SUCCESS;
1423 }
1424 
1425 /**
1426    @brief Fully assemble the nonzero entries of a linear operator.
1427 
1428    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic()
1429 
1430    The assembly routines use coordinate format, with num_entries tuples of the
1431    form (i, j, value) which indicate that value should be added to the matrix
1432    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1433    This function returns the values of the nonzero entries to be added, their
1434    (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1435 
1436    This will generally be slow unless your operator is low-order.
1437 
1438   Note: Calling this function asserts that setup is complete
1439           and sets the CeedOperator as immutable.
1440 
1441    @param[in]  op      CeedOperator to assemble
1442    @param[out] values  Values to assemble into matrix
1443 
1444    @ref User
1445 **/
1446 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1447   int ierr;
1448   CeedInt num_suboperators, single_entries = 0;
1449   CeedOperator *sub_operators;
1450   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1451 
1452   // Use backend version, if available
1453   if (op->LinearAssemble) {
1454     ierr = op->LinearAssemble(op, values); CeedChk(ierr);
1455     return CEED_ERROR_SUCCESS;
1456   } else {
1457     // Check for valid fallback resource
1458     const char *resource, *fallback_resource;
1459     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1460     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1461     CeedChk(ierr);
1462     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1463       // Fallback to reference Ceed
1464       if (!op->op_fallback) {
1465         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1466       }
1467       // Assemble
1468       ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr);
1469       return CEED_ERROR_SUCCESS;
1470     }
1471   }
1472 
1473   // Default interface implementation
1474   bool is_composite;
1475   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1476 
1477   CeedInt offset = 0;
1478   if (is_composite) {
1479     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1480     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1481     for (int k = 0; k < num_suboperators; ++k) {
1482       ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values);
1483       CeedChk(ierr);
1484       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1485              &single_entries);
1486       CeedChk(ierr);
1487       offset += single_entries;
1488     }
1489   } else {
1490     ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr);
1491   }
1492 
1493   return CEED_ERROR_SUCCESS;
1494 }
1495 
1496 /**
1497   @brief Create a multigrid coarse operator and level transfer operators
1498            for a CeedOperator, creating the prolongation basis from the
1499            fine and coarse grid interpolation
1500 
1501   Note: Calling this function asserts that setup is complete
1502           and sets the CeedOperator as immutable.
1503 
1504   @param[in] op_fine       Fine grid operator
1505   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
1506   @param[in] rstr_coarse   Coarse grid restriction
1507   @param[in] basis_coarse  Coarse grid active vector basis
1508   @param[out] op_coarse    Coarse grid operator
1509   @param[out] op_prolong   Coarse to fine operator
1510   @param[out] op_restrict  Fine to coarse operator
1511 
1512   @return An error code: 0 - success, otherwise - failure
1513 
1514   @ref User
1515 **/
1516 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine,
1517                                      CeedVector p_mult_fine,
1518                                      CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1519                                      CeedOperator *op_coarse, CeedOperator *op_prolong,
1520                                      CeedOperator *op_restrict) {
1521   int ierr;
1522   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
1523   Ceed ceed;
1524   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1525 
1526   // Check for compatible quadrature spaces
1527   CeedBasis basis_fine;
1528   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1529   CeedInt Q_f, Q_c;
1530   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1531   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1532   if (Q_f != Q_c)
1533     // LCOV_EXCL_START
1534     return CeedError(ceed, CEED_ERROR_DIMENSION,
1535                      "Bases must have compatible quadrature spaces");
1536   // LCOV_EXCL_STOP
1537 
1538   // Coarse to fine basis
1539   CeedInt P_f, P_c, Q = Q_f;
1540   bool is_tensor_f, is_tensor_c;
1541   ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr);
1542   ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr);
1543   CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau;
1544   if (is_tensor_f && is_tensor_c) {
1545     ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr);
1546     ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr);
1547     ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr);
1548   } else if (!is_tensor_f && !is_tensor_c) {
1549     ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr);
1550     ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr);
1551   } else {
1552     // LCOV_EXCL_START
1553     return CeedError(ceed, CEED_ERROR_MINOR,
1554                      "Bases must both be tensor or non-tensor");
1555     // LCOV_EXCL_STOP
1556   }
1557 
1558   ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr);
1559   ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr);
1560   ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr);
1561   ierr = CeedMalloc(Q, &tau); CeedChk(ierr);
1562   if (is_tensor_f) {
1563     memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]);
1564     memcpy(interp_c, basis_coarse->interp_1d,
1565            Q*P_c*sizeof basis_coarse->interp_1d[0]);
1566   } else {
1567     memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]);
1568     memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]);
1569   }
1570 
1571   // -- QR Factorization, interp_f = Q R
1572   ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr);
1573 
1574   // -- Apply Qtranspose, interp_c = Qtranspose interp_c
1575   ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE,
1576                                Q, P_c, P_f, P_c, 1); CeedChk(ierr);
1577 
1578   // -- Apply Rinv, interp_c_to_f = Rinv interp_c
1579   for (CeedInt j=0; j<P_c; j++) { // Column j
1580     interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1];
1581     for (CeedInt i=P_f-2; i>=0; i--) { // Row i
1582       interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i];
1583       for (CeedInt k=i+1; k<P_f; k++)
1584         interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k];
1585       interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i];
1586     }
1587   }
1588   ierr = CeedFree(&tau); CeedChk(ierr);
1589   ierr = CeedFree(&interp_c); CeedChk(ierr);
1590   ierr = CeedFree(&interp_f); CeedChk(ierr);
1591 
1592   // Complete with interp_c_to_f versions of code
1593   if (is_tensor_f) {
1594     ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine,
1595            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
1596     CeedChk(ierr);
1597   } else {
1598     ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine,
1599            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
1600     CeedChk(ierr);
1601   }
1602 
1603   // Cleanup
1604   ierr = CeedFree(&interp_c_to_f); CeedChk(ierr);
1605   return CEED_ERROR_SUCCESS;
1606 }
1607 
1608 /**
1609   @brief Create a multigrid coarse operator and level transfer operators
1610            for a CeedOperator with a tensor basis for the active basis
1611 
1612   Note: Calling this function asserts that setup is complete
1613           and sets the CeedOperator as immutable.
1614 
1615   @param[in] op_fine        Fine grid operator
1616   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1617   @param[in] rstr_coarse    Coarse grid restriction
1618   @param[in] basis_coarse   Coarse grid active vector basis
1619   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1620   @param[out] op_coarse     Coarse grid operator
1621   @param[out] op_prolong    Coarse to fine operator
1622   @param[out] op_restrict   Fine to coarse operator
1623 
1624   @return An error code: 0 - success, otherwise - failure
1625 
1626   @ref User
1627 **/
1628 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine,
1629     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1630     const CeedScalar *interp_c_to_f, CeedOperator *op_coarse,
1631     CeedOperator *op_prolong, CeedOperator *op_restrict) {
1632   int ierr;
1633   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
1634   Ceed ceed;
1635   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1636 
1637   // Check for compatible quadrature spaces
1638   CeedBasis basis_fine;
1639   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1640   CeedInt Q_f, Q_c;
1641   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1642   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1643   if (Q_f != Q_c)
1644     // LCOV_EXCL_START
1645     return CeedError(ceed, CEED_ERROR_DIMENSION,
1646                      "Bases must have compatible quadrature spaces");
1647   // LCOV_EXCL_STOP
1648 
1649   // Coarse to fine basis
1650   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
1651   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
1652   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
1653   ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr);
1654   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
1655   CeedChk(ierr);
1656   P_1d_c = dim == 1 ? num_nodes_c :
1657            dim == 2 ? sqrt(num_nodes_c) :
1658            cbrt(num_nodes_c);
1659   CeedScalar *q_ref, *q_weight, *grad;
1660   ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr);
1661   ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr);
1662   ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr);
1663   CeedBasis basis_c_to_f;
1664   ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f,
1665                                  interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
1666   CeedChk(ierr);
1667   ierr = CeedFree(&q_ref); CeedChk(ierr);
1668   ierr = CeedFree(&q_weight); CeedChk(ierr);
1669   ierr = CeedFree(&grad); CeedChk(ierr);
1670 
1671   // Core code
1672   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
1673                                           basis_coarse, basis_c_to_f, op_coarse,
1674                                           op_prolong, op_restrict);
1675   CeedChk(ierr);
1676   return CEED_ERROR_SUCCESS;
1677 }
1678 
1679 /**
1680   @brief Create a multigrid coarse operator and level transfer operators
1681            for a CeedOperator with a non-tensor basis for the active vector
1682 
1683   Note: Calling this function asserts that setup is complete
1684           and sets the CeedOperator as immutable.
1685 
1686   @param[in] op_fine        Fine grid operator
1687   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1688   @param[in] rstr_coarse    Coarse grid restriction
1689   @param[in] basis_coarse   Coarse grid active vector basis
1690   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1691   @param[out] op_coarse     Coarse grid operator
1692   @param[out] op_prolong    Coarse to fine operator
1693   @param[out] op_restrict   Fine to coarse operator
1694 
1695   @return An error code: 0 - success, otherwise - failure
1696 
1697   @ref User
1698 **/
1699 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine,
1700                                        CeedVector p_mult_fine,
1701                                        CeedElemRestriction rstr_coarse,
1702                                        CeedBasis basis_coarse,
1703                                        const CeedScalar *interp_c_to_f,
1704                                        CeedOperator *op_coarse,
1705                                        CeedOperator *op_prolong,
1706                                        CeedOperator *op_restrict) {
1707   int ierr;
1708   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
1709   Ceed ceed;
1710   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1711 
1712   // Check for compatible quadrature spaces
1713   CeedBasis basis_fine;
1714   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1715   CeedInt Q_f, Q_c;
1716   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1717   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1718   if (Q_f != Q_c)
1719     // LCOV_EXCL_START
1720     return CeedError(ceed, CEED_ERROR_DIMENSION,
1721                      "Bases must have compatible quadrature spaces");
1722   // LCOV_EXCL_STOP
1723 
1724   // Coarse to fine basis
1725   CeedElemTopology topo;
1726   ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr);
1727   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
1728   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
1729   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
1730   ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr);
1731   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
1732   CeedChk(ierr);
1733   CeedScalar *q_ref, *q_weight, *grad;
1734   ierr = CeedCalloc(num_nodes_f, &q_ref); CeedChk(ierr);
1735   ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr);
1736   ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr);
1737   CeedBasis basis_c_to_f;
1738   ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f,
1739                            interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
1740   CeedChk(ierr);
1741   ierr = CeedFree(&q_ref); CeedChk(ierr);
1742   ierr = CeedFree(&q_weight); CeedChk(ierr);
1743   ierr = CeedFree(&grad); CeedChk(ierr);
1744 
1745   // Core code
1746   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
1747                                           basis_coarse, basis_c_to_f, op_coarse,
1748                                           op_prolong, op_restrict);
1749   CeedChk(ierr);
1750   return CEED_ERROR_SUCCESS;
1751 }
1752 
1753 /**
1754   @brief Build a FDM based approximate inverse for each element for a
1755            CeedOperator
1756 
1757   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization
1758     Method based approximate inverse. This function obtains the simultaneous
1759     diagonalization for the 1D mass and Laplacian operators,
1760       M = V^T V, K = V^T S V.
1761     The assembled QFunction is used to modify the eigenvalues from simultaneous
1762     diagonalization and obtain an approximate inverse of the form
1763       V^T S^hat V. The CeedOperator must be linear and non-composite. The
1764     associated CeedQFunction must therefore also be linear.
1765 
1766   Note: Calling this function asserts that setup is complete
1767           and sets the CeedOperator as immutable.
1768 
1769   @param op            CeedOperator to create element inverses
1770   @param[out] fdm_inv  CeedOperator to apply the action of a FDM based inverse
1771                          for each element
1772   @param request       Address of CeedRequest for non-blocking completion, else
1773                          @ref CEED_REQUEST_IMMEDIATE
1774 
1775   @return An error code: 0 - success, otherwise - failure
1776 
1777   @ref Backend
1778 **/
1779 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv,
1780                                         CeedRequest *request) {
1781   int ierr;
1782   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1783 
1784   // Use backend version, if available
1785   if (op->CreateFDMElementInverse) {
1786     ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr);
1787     return CEED_ERROR_SUCCESS;
1788   } else {
1789     // Check for valid fallback resource
1790     const char *resource, *fallback_resource;
1791     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1792     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1793     CeedChk(ierr);
1794     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1795       // Fallback to reference Ceed
1796       if (!op->op_fallback) {
1797         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1798       }
1799       // Assemble
1800       ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request);
1801       CeedChk(ierr);
1802       return CEED_ERROR_SUCCESS;
1803     }
1804   }
1805 
1806   // Interface implementation
1807   Ceed ceed, ceed_parent;
1808   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
1809   ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr);
1810   ceed_parent = ceed_parent ? ceed_parent : ceed;
1811   CeedQFunction qf;
1812   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
1813 
1814   // Determine active input basis
1815   bool interp = false, grad = false;
1816   CeedBasis basis = NULL;
1817   CeedElemRestriction rstr = NULL;
1818   CeedOperatorField *op_fields;
1819   CeedQFunctionField *qf_fields;
1820   CeedInt num_input_fields;
1821   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL);
1822   CeedChk(ierr);
1823   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
1824   for (CeedInt i=0; i<num_input_fields; i++) {
1825     CeedVector vec;
1826     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
1827     if (vec == CEED_VECTOR_ACTIVE) {
1828       CeedEvalMode eval_mode;
1829       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
1830       interp = interp || eval_mode == CEED_EVAL_INTERP;
1831       grad = grad || eval_mode == CEED_EVAL_GRAD;
1832       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr);
1833       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
1834     }
1835   }
1836   if (!basis)
1837     // LCOV_EXCL_START
1838     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
1839   // LCOV_EXCL_STOP
1840   CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1,
1841                                                 l_size = 1;
1842   ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr);
1843   ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr);
1844   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr);
1845   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
1846   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
1847   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
1848   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
1849   ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr);
1850 
1851   // Build and diagonalize 1D Mass and Laplacian
1852   bool tensor_basis;
1853   ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr);
1854   if (!tensor_basis)
1855     // LCOV_EXCL_START
1856     return CeedError(ceed, CEED_ERROR_BACKEND,
1857                      "FDMElementInverse only supported for tensor "
1858                      "bases");
1859   // LCOV_EXCL_STOP
1860   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
1861   ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr);
1862   ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr);
1863   ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr);
1864   ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr);
1865   ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr);
1866   // -- Build matrices
1867   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
1868   ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr);
1869   ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr);
1870   ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr);
1871   ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim,
1872                               mass, laplace); CeedChk(ierr);
1873 
1874   // -- Diagonalize
1875   ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d);
1876   CeedChk(ierr);
1877   ierr = CeedFree(&mass); CeedChk(ierr);
1878   ierr = CeedFree(&laplace); CeedChk(ierr);
1879   for (CeedInt i=0; i<P_1d; i++)
1880     for (CeedInt j=0; j<P_1d; j++)
1881       fdm_interp[i+j*P_1d] = x[j+i*P_1d];
1882   ierr = CeedFree(&x); CeedChk(ierr);
1883 
1884   // Assemble QFunction
1885   CeedVector assembled;
1886   CeedElemRestriction rstr_qf;
1887   ierr =  CeedOperatorLinearAssembleQFunction(op, &assembled, &rstr_qf, request);
1888   CeedChk(ierr);
1889   CeedInt layout[3];
1890   ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr);
1891   ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr);
1892   CeedScalar max_norm = 0;
1893   ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
1894 
1895   // Calculate element averages
1896   CeedInt num_modes = (interp?1:0) + (grad?dim:0);
1897   CeedScalar *elem_avg;
1898   const CeedScalar *assembled_array, *q_weight_array;
1899   CeedVector q_weight;
1900   ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr);
1901   ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
1902                         CEED_VECTOR_NONE, q_weight); CeedChk(ierr);
1903   ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array);
1904   CeedChk(ierr);
1905   ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array);
1906   CeedChk(ierr);
1907   ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr);
1908   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
1909   for (CeedInt e=0; e<num_elem; e++) {
1910     CeedInt count = 0;
1911     for (CeedInt q=0; q<num_qpts; q++)
1912       for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++)
1913         if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) >
1914             qf_value_bound) {
1915           elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] /
1916                          q_weight_array[q];
1917           count++;
1918         }
1919     if (count) {
1920       elem_avg[e] /= count;
1921     } else {
1922       elem_avg[e] = 1.0;
1923     }
1924   }
1925   ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr);
1926   ierr = CeedVectorDestroy(&assembled); CeedChk(ierr);
1927   ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr);
1928   ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr);
1929 
1930   // Build FDM diagonal
1931   CeedVector q_data;
1932   CeedScalar *q_data_array, *fdm_diagonal;
1933   ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr);
1934   const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON;
1935   for (CeedInt c=0; c<num_comp; c++)
1936     for (CeedInt n=0; n<elem_size; n++) {
1937       if (interp)
1938         fdm_diagonal[c*elem_size + n] = 1.0;
1939       if (grad)
1940         for (CeedInt d=0; d<dim; d++) {
1941           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
1942           fdm_diagonal[c*elem_size + n] += lambda[i];
1943         }
1944       if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound)
1945         fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound;
1946     }
1947   ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data);
1948   CeedChk(ierr);
1949   ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr);
1950   ierr = CeedVectorGetArray(q_data, CEED_MEM_HOST, &q_data_array); CeedChk(ierr);
1951   for (CeedInt e=0; e<num_elem; e++)
1952     for (CeedInt c=0; c<num_comp; c++)
1953       for (CeedInt n=0; n<elem_size; n++)
1954         q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] *
1955             fdm_diagonal[c*elem_size + n]);
1956   ierr = CeedFree(&elem_avg); CeedChk(ierr);
1957   ierr = CeedFree(&fdm_diagonal); CeedChk(ierr);
1958   ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr);
1959 
1960   // Setup FDM operator
1961   // -- Basis
1962   CeedBasis fdm_basis;
1963   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
1964   ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr);
1965   ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr);
1966   ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr);
1967   ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d,
1968                                  fdm_interp, grad_dummy, q_ref_dummy,
1969                                  q_weight_dummy, &fdm_basis); CeedChk(ierr);
1970   ierr = CeedFree(&fdm_interp); CeedChk(ierr);
1971   ierr = CeedFree(&grad_dummy); CeedChk(ierr);
1972   ierr = CeedFree(&q_ref_dummy); CeedChk(ierr);
1973   ierr = CeedFree(&q_weight_dummy); CeedChk(ierr);
1974   ierr = CeedFree(&lambda); CeedChk(ierr);
1975 
1976   // -- Restriction
1977   CeedElemRestriction rstr_qd_i;
1978   CeedInt strides[3] = {1, elem_size, elem_size*num_comp};
1979   ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size,
1980                                           num_comp, num_elem*num_comp*elem_size,
1981                                           strides, &rstr_qd_i); CeedChk(ierr);
1982   // -- QFunction
1983   CeedQFunction qf_fdm;
1984   ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm);
1985   CeedChk(ierr);
1986   ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP);
1987   CeedChk(ierr);
1988   ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE);
1989   CeedChk(ierr);
1990   ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP);
1991   CeedChk(ierr);
1992   // -- QFunction context
1993   CeedInt *num_comp_data;
1994   ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr);
1995   num_comp_data[0] = num_comp;
1996   CeedQFunctionContext ctx_fdm;
1997   ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr);
1998   ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER,
1999                                      sizeof(*num_comp_data), num_comp_data);
2000   CeedChk(ierr);
2001   ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr);
2002   ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr);
2003   // -- Operator
2004   ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv);
2005   CeedChk(ierr);
2006   CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2007   CeedChk(ierr);
2008   CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED,
2009                        q_data); CeedChk(ierr);
2010   CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2011   CeedChk(ierr);
2012 
2013   // Cleanup
2014   ierr = CeedVectorDestroy(&q_data); CeedChk(ierr);
2015   ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr);
2016   ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr);
2017   ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr);
2018 
2019   return CEED_ERROR_SUCCESS;
2020 }
2021 
2022 /// @}
2023