xref: /libCEED/interface/ceed-preconditioning.c (revision 7e7773b5b3fd61233915b5fe058a2730d1639517)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <ceed-impl.h>
20 #include <math.h>
21 #include <stdbool.h>
22 #include <stdio.h>
23 #include <string.h>
24 
25 /// @file
26 /// Implementation of CeedOperator preconditioning interfaces
27 
28 /// ----------------------------------------------------------------------------
29 /// CeedOperator Library Internal Preconditioning Functions
30 /// ----------------------------------------------------------------------------
31 /// @addtogroup CeedOperatorDeveloper
32 /// @{
33 
34 /**
35   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced
36          CeedOperator functionality
37 
38   @param op  CeedOperator to create fallback for
39 
40   @return An error code: 0 - success, otherwise - failure
41 
42   @ref Developer
43 **/
44 int CeedOperatorCreateFallback(CeedOperator op) {
45   int ierr;
46 
47   // Fallback Ceed
48   const char *resource, *fallback_resource;
49   ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
50   ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
51   CeedChk(ierr);
52   if (!strcmp(resource, fallback_resource))
53     // LCOV_EXCL_START
54     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
55                      "Backend %s cannot create an operator"
56                      "fallback to resource %s", resource, fallback_resource);
57   // LCOV_EXCL_STOP
58 
59   // Fallback Ceed
60   Ceed ceed_ref;
61   if (!op->ceed->op_fallback_ceed) {
62     ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr);
63     ceed_ref->op_fallback_parent = op->ceed;
64     ceed_ref->Error = op->ceed->Error;
65     op->ceed->op_fallback_ceed = ceed_ref;
66   }
67   ceed_ref = op->ceed->op_fallback_ceed;
68 
69   // Clone Op
70   CeedOperator op_ref;
71   ierr = CeedCalloc(1, &op_ref); CeedChk(ierr);
72   memcpy(op_ref, op, sizeof(*op_ref));
73   op_ref->data = NULL;
74   op_ref->interface_setup = false;
75   op_ref->backend_setup = false;
76   op_ref->ceed = ceed_ref;
77   ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr);
78   op->op_fallback = op_ref;
79 
80   // Clone QF
81   CeedQFunction qf_ref;
82   ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr);
83   memcpy(qf_ref, (op->qf), sizeof(*qf_ref));
84   qf_ref->data = NULL;
85   qf_ref->ceed = ceed_ref;
86   ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr);
87   op_ref->qf = qf_ref;
88   op->qf_fallback = qf_ref;
89   return CEED_ERROR_SUCCESS;
90 }
91 
92 /**
93   @brief Select correct basis matrix pointer based on CeedEvalMode
94 
95   @param[in] eval_mode   Current basis evaluation mode
96   @param[in] identity    Pointer to identity matrix
97   @param[in] interp      Pointer to interpolation matrix
98   @param[in] grad        Pointer to gradient matrix
99   @param[out] basis_ptr  Basis pointer to set
100 
101   @return none
102 
103   @ref Developer
104 **/
105 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode,
106     const CeedScalar *identity, const CeedScalar *interp,
107     const CeedScalar *grad, const CeedScalar **basis_ptr) {
108   switch (eval_mode) {
109   case CEED_EVAL_NONE:
110     *basis_ptr = identity;
111     break;
112   case CEED_EVAL_INTERP:
113     *basis_ptr = interp;
114     break;
115   case CEED_EVAL_GRAD:
116     *basis_ptr = grad;
117     break;
118   case CEED_EVAL_WEIGHT:
119   case CEED_EVAL_DIV:
120   case CEED_EVAL_CURL:
121     break; // Caught by QF Assembly
122   }
123 }
124 
125 /**
126   @brief Create point block restriction for active operator field
127 
128   @param[in] rstr              Original CeedElemRestriction for active field
129   @param[out] pointblock_rstr  Address of the variable where the newly created
130                                  CeedElemRestriction will be stored
131 
132   @return An error code: 0 - success, otherwise - failure
133 
134   @ref Developer
135 **/
136 static int CeedOperatorCreateActivePointBlockRestriction(
137   CeedElemRestriction rstr,
138   CeedElemRestriction *pointblock_rstr) {
139   int ierr;
140   Ceed ceed;
141   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr);
142   const CeedInt *offsets;
143   ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets);
144   CeedChk(ierr);
145 
146   // Expand offsets
147   CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1,
148                                                       *pointblock_offsets;
149   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
150   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
151   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
152   ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr);
153   CeedInt shift = num_comp;
154   if (comp_stride != 1)
155     shift *= num_comp;
156   ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets);
157   CeedChk(ierr);
158   for (CeedInt i = 0; i < num_elem*elem_size; i++) {
159     pointblock_offsets[i] = offsets[i]*shift;
160     if (pointblock_offsets[i] > max)
161       max = pointblock_offsets[i];
162   }
163 
164   // Create new restriction
165   ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp,
166                                    1, max + num_comp*num_comp, CEED_MEM_HOST,
167                                    CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr);
168   CeedChk(ierr);
169 
170   // Cleanup
171   ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr);
172 
173   return CEED_ERROR_SUCCESS;
174 }
175 
176 /**
177   @brief Core logic for assembling operator diagonal or point block diagonal
178 
179   @param[in] op             CeedOperator to assemble point block diagonal
180   @param[in] request        Address of CeedRequest for non-blocking completion, else
181                               CEED_REQUEST_IMMEDIATE
182   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
183   @param[out] assembled     CeedVector to store assembled diagonal
184 
185   @return An error code: 0 - success, otherwise - failure
186 
187   @ref Developer
188 **/
189 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op,
190     CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
191   int ierr;
192   Ceed ceed;
193   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
194 
195   // Assemble QFunction
196   CeedQFunction qf;
197   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
198   CeedInt num_input_fields, num_output_fields;
199   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
200   CeedChk(ierr);
201   CeedVector assembled_qf;
202   CeedElemRestriction rstr;
203   ierr = CeedOperatorLinearAssembleQFunction(op,  &assembled_qf, &rstr, request);
204   CeedChk(ierr);
205   CeedInt layout[3];
206   ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr);
207   ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr);
208   CeedScalar max_norm = 0;
209   ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
210 
211   // Determine active input basis
212   CeedOperatorField *op_fields;
213   CeedQFunctionField *qf_fields;
214   ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr);
215   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
216   CeedInt num_eval_mode_in = 0, num_comp, dim = 1;
217   CeedEvalMode *eval_mode_in = NULL;
218   CeedBasis basis_in = NULL;
219   CeedElemRestriction rstr_in = NULL;
220   for (CeedInt i=0; i<num_input_fields; i++) {
221     CeedVector vec;
222     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
223     if (vec == CEED_VECTOR_ACTIVE) {
224       CeedElemRestriction rstr;
225       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr);
226       ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr);
227       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
228       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
229       if (rstr_in && rstr_in != rstr)
230         // LCOV_EXCL_START
231         return CeedError(ceed, CEED_ERROR_BACKEND,
232                          "Multi-field non-composite operator diagonal assembly not supported");
233       // LCOV_EXCL_STOP
234       rstr_in = rstr;
235       CeedEvalMode eval_mode;
236       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
237       switch (eval_mode) {
238       case CEED_EVAL_NONE:
239       case CEED_EVAL_INTERP:
240         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
241         eval_mode_in[num_eval_mode_in] = eval_mode;
242         num_eval_mode_in += 1;
243         break;
244       case CEED_EVAL_GRAD:
245         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
246         for (CeedInt d=0; d<dim; d++)
247           eval_mode_in[num_eval_mode_in+d] = eval_mode;
248         num_eval_mode_in += dim;
249         break;
250       case CEED_EVAL_WEIGHT:
251       case CEED_EVAL_DIV:
252       case CEED_EVAL_CURL:
253         break; // Caught by QF Assembly
254       }
255     }
256   }
257 
258   // Determine active output basis
259   ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr);
260   ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr);
261   CeedInt num_eval_mode_out = 0;
262   CeedEvalMode *eval_mode_out = NULL;
263   CeedBasis basis_out = NULL;
264   CeedElemRestriction rstr_out = NULL;
265   for (CeedInt i=0; i<num_output_fields; i++) {
266     CeedVector vec;
267     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
268     if (vec == CEED_VECTOR_ACTIVE) {
269       CeedElemRestriction rstr;
270       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr);
271       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
272       if (rstr_out && rstr_out != rstr)
273         // LCOV_EXCL_START
274         return CeedError(ceed, CEED_ERROR_BACKEND,
275                          "Multi-field non-composite operator diagonal assembly not supported");
276       // LCOV_EXCL_STOP
277       rstr_out = rstr;
278       CeedEvalMode eval_mode;
279       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
280       switch (eval_mode) {
281       case CEED_EVAL_NONE:
282       case CEED_EVAL_INTERP:
283         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
284         eval_mode_out[num_eval_mode_out] = eval_mode;
285         num_eval_mode_out += 1;
286         break;
287       case CEED_EVAL_GRAD:
288         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
289         for (CeedInt d=0; d<dim; d++)
290           eval_mode_out[num_eval_mode_out+d] = eval_mode;
291         num_eval_mode_out += dim;
292         break;
293       case CEED_EVAL_WEIGHT:
294       case CEED_EVAL_DIV:
295       case CEED_EVAL_CURL:
296         break; // Caught by QF Assembly
297       }
298     }
299   }
300 
301   // Assemble point block diagonal restriction, if needed
302   CeedElemRestriction diag_rstr = rstr_out;
303   if (is_pointblock) {
304     ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr);
305     CeedChk(ierr);
306   }
307 
308   // Create diagonal vector
309   CeedVector elem_diag;
310   ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag);
311   CeedChk(ierr);
312 
313   // Assemble element operator diagonals
314   CeedScalar *elem_diag_array, *assembled_qf_array;
315   ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr);
316   ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array);
317   CeedChk(ierr);
318   ierr = CeedVectorGetArray(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
319   CeedChk(ierr);
320   CeedInt num_elem, num_nodes, num_qpts;
321   ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr);
322   ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr);
323   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
324   // Basis matrices
325   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
326   CeedScalar *identity = NULL;
327   bool evalNone = false;
328   for (CeedInt i=0; i<num_eval_mode_in; i++)
329     evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE);
330   for (CeedInt i=0; i<num_eval_mode_out; i++)
331     evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE);
332   if (evalNone) {
333     ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr);
334     for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++)
335       identity[i*num_nodes+i] = 1.0;
336   }
337   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
338   ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr);
339   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
340   ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr);
341   // Compute the diagonal of B^T D B
342   // Each element
343   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
344   for (CeedInt e=0; e<num_elem; e++) {
345     CeedInt d_out = -1;
346     // Each basis eval mode pair
347     for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) {
348       const CeedScalar *bt = NULL;
349       if (eval_mode_out[e_out] == CEED_EVAL_GRAD)
350         d_out += 1;
351       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out,
352                                   &grad_out[d_out*num_qpts*num_nodes], &bt);
353       CeedInt d_in = -1;
354       for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) {
355         const CeedScalar *b = NULL;
356         if (eval_mode_in[e_in] == CEED_EVAL_GRAD)
357           d_in += 1;
358         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in,
359                                     &grad_in[d_in*num_qpts*num_nodes], &b);
360         // Each component
361         for (CeedInt c_out=0; c_out<num_comp; c_out++)
362           // Each qpoint/node pair
363           for (CeedInt q=0; q<num_qpts; q++)
364             if (is_pointblock) {
365               // Point Block Diagonal
366               for (CeedInt c_in=0; c_in<num_comp; c_in++) {
367                 const CeedScalar qf_value =
368                   assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)*
369                                                      num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
370                 if (fabs(qf_value) > qf_value_bound)
371                   for (CeedInt n=0; n<num_nodes; n++)
372                     elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] +=
373                       bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
374               }
375             } else {
376               // Diagonal Only
377               const CeedScalar qf_value =
378                 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)*
379                                                    num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
380               if (fabs(qf_value) > qf_value_bound)
381                 for (CeedInt n=0; n<num_nodes; n++)
382                   elem_diag_array[(e*num_comp+c_out)*num_nodes+n] +=
383                     bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
384             }
385       }
386     }
387   }
388   ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr);
389   ierr = CeedVectorRestoreArray(assembled_qf, &assembled_qf_array); CeedChk(ierr);
390 
391   // Assemble local operator diagonal
392   ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag,
393                                   assembled, request); CeedChk(ierr);
394 
395   // Cleanup
396   if (is_pointblock) {
397     ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr);
398   }
399   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
400   ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr);
401   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
402   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
403   ierr = CeedFree(&identity); CeedChk(ierr);
404 
405   return CEED_ERROR_SUCCESS;
406 }
407 
408 /**
409   @brief Core logic for assembling composite operator diagonal
410 
411   @param[in] op             CeedOperator to assemble point block diagonal
412   @param[in] request        Address of CeedRequest for non-blocking completion, else
413                             CEED_REQUEST_IMMEDIATE
414   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
415   @param[out] assembled     CeedVector to store assembled diagonal
416 
417   @return An error code: 0 - success, otherwise - failure
418 
419   @ref Developer
420 **/
421 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(
422   CeedOperator op, CeedRequest *request, const bool is_pointblock,
423   CeedVector assembled) {
424   int ierr;
425   CeedInt num_sub;
426   CeedOperator *suboperators;
427   ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr);
428   ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr);
429   for (CeedInt i = 0; i < num_sub; i++) {
430     ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request,
431            is_pointblock, assembled); CeedChk(ierr);
432   }
433   return CEED_ERROR_SUCCESS;
434 }
435 
436 /**
437   @brief Build nonzero pattern for non-composite operator
438 
439   Users should generally use CeedOperatorLinearAssembleSymbolic()
440 
441   @param[in] op      CeedOperator to assemble nonzero pattern
442   @param[in] offset  Offset for number of entries
443   @param[out] rows   Row number for each entry
444   @param[out] cols   Column number for each entry
445 
446   @return An error code: 0 - success, otherwise - failure
447 
448   @ref Developer
449 **/
450 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset,
451     CeedInt *rows, CeedInt *cols) {
452   int ierr;
453   Ceed ceed = op->ceed;
454   if (op->composite)
455     // LCOV_EXCL_START
456     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
457                      "Composite operator not supported");
458   // LCOV_EXCL_STOP
459 
460   CeedElemRestriction rstr_in;
461   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr);
462   CeedInt num_elem, elem_size, num_nodes, num_comp;
463   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
464   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
465   ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr);
466   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
467   CeedInt layout_er[3];
468   ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr);
469 
470   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
471 
472   // Determine elem_dof relation
473   CeedVector index_vec;
474   ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr);
475   CeedScalar *array;
476   ierr = CeedVectorGetArray(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr);
477   for (CeedInt i = 0; i < num_nodes; ++i) {
478     array[i] = i;
479   }
480   ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr);
481   CeedVector elem_dof;
482   ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof);
483   CeedChk(ierr);
484   ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr);
485   CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec,
486                            elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
487   const CeedScalar *elem_dof_a;
488   ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a);
489   CeedChk(ierr);
490   ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr);
491 
492   // Determine i, j locations for element matrices
493   CeedInt count = 0;
494   for (int e = 0; e < num_elem; ++e) {
495     for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
496       for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
497         for (int i = 0; i < elem_size; ++i) {
498           for (int j = 0; j < elem_size; ++j) {
499             const CeedInt elem_dof_index_row = (i)*layout_er[0] +
500                                                (comp_out)*layout_er[1] + e*layout_er[2];
501             const CeedInt elem_dof_index_col = (j)*layout_er[0] +
502                                                (comp_in)*layout_er[1] + e*layout_er[2];
503 
504             const CeedInt row = elem_dof_a[elem_dof_index_row];
505             const CeedInt col = elem_dof_a[elem_dof_index_col];
506 
507             rows[offset + count] = row;
508             cols[offset + count] = col;
509             count++;
510           }
511         }
512       }
513     }
514   }
515   if (count != local_num_entries)
516     // LCOV_EXCL_START
517     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
518   // LCOV_EXCL_STOP
519   ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr);
520   ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr);
521 
522   return CEED_ERROR_SUCCESS;
523 }
524 
525 /**
526   @brief Assemble nonzero entries for non-composite operator
527 
528   Users should generally use CeedOperatorLinearAssemble()
529 
530   @param[in] op       CeedOperator to assemble
531   @param[out] offset  Offest for number of entries
532   @param[out] values  Values to assemble into matrix
533 
534   @return An error code: 0 - success, otherwise - failure
535 
536   @ref Developer
537 **/
538 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
539                                       CeedVector values) {
540   int ierr;
541   Ceed ceed = op->ceed;
542   if (op->composite)
543     // LCOV_EXCL_START
544     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
545                      "Composite operator not supported");
546   // LCOV_EXCL_STOP
547 
548   // Assemble QFunction
549   CeedQFunction qf;
550   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
551   CeedVector assembled_qf;
552   CeedElemRestriction rstr_q;
553   ierr = CeedOperatorLinearAssembleQFunction(
554            op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
555 
556   CeedInt qf_length;
557   ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr);
558 
559   CeedInt num_input_fields, num_output_fields;
560   CeedOperatorField *input_fields;
561   CeedOperatorField *output_fields;
562   ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields,
563                                &num_output_fields, &output_fields); CeedChk(ierr);
564 
565   // Determine active input basis
566   CeedQFunctionField *qf_fields;
567   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
568   CeedInt num_eval_mode_in = 0, dim = 1;
569   CeedEvalMode *eval_mode_in = NULL;
570   CeedBasis basis_in = NULL;
571   CeedElemRestriction rstr_in = NULL;
572   for (CeedInt i=0; i<num_input_fields; i++) {
573     CeedVector vec;
574     ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr);
575     if (vec == CEED_VECTOR_ACTIVE) {
576       ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in);
577       CeedChk(ierr);
578       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
579       ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in);
580       CeedChk(ierr);
581       CeedEvalMode eval_mode;
582       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
583       CeedChk(ierr);
584       switch (eval_mode) {
585       case CEED_EVAL_NONE:
586       case CEED_EVAL_INTERP:
587         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
588         eval_mode_in[num_eval_mode_in] = eval_mode;
589         num_eval_mode_in += 1;
590         break;
591       case CEED_EVAL_GRAD:
592         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
593         for (CeedInt d=0; d<dim; d++) {
594           eval_mode_in[num_eval_mode_in+d] = eval_mode;
595         }
596         num_eval_mode_in += dim;
597         break;
598       case CEED_EVAL_WEIGHT:
599       case CEED_EVAL_DIV:
600       case CEED_EVAL_CURL:
601         break; // Caught by QF Assembly
602       }
603     }
604   }
605 
606   // Determine active output basis
607   ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr);
608   CeedInt num_eval_mode_out = 0;
609   CeedEvalMode *eval_mode_out = NULL;
610   CeedBasis basis_out = NULL;
611   CeedElemRestriction rstr_out = NULL;
612   for (CeedInt i=0; i<num_output_fields; i++) {
613     CeedVector vec;
614     ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr);
615     if (vec == CEED_VECTOR_ACTIVE) {
616       ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out);
617       CeedChk(ierr);
618       ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out);
619       CeedChk(ierr);
620       CeedEvalMode eval_mode;
621       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
622       CeedChk(ierr);
623       switch (eval_mode) {
624       case CEED_EVAL_NONE:
625       case CEED_EVAL_INTERP:
626         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
627         eval_mode_out[num_eval_mode_out] = eval_mode;
628         num_eval_mode_out += 1;
629         break;
630       case CEED_EVAL_GRAD:
631         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
632         for (CeedInt d=0; d<dim; d++) {
633           eval_mode_out[num_eval_mode_out+d] = eval_mode;
634         }
635         num_eval_mode_out += dim;
636         break;
637       case CEED_EVAL_WEIGHT:
638       case CEED_EVAL_DIV:
639       case CEED_EVAL_CURL:
640         break; // Caught by QF Assembly
641       }
642     }
643   }
644 
645   if (num_eval_mode_in == 0 || num_eval_mode_out == 0)
646     // LCOV_EXCL_START
647     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
648                      "Cannot assemble operator with out inputs/outputs");
649   // LCOV_EXCL_STOP
650 
651   CeedInt num_elem, elem_size, num_qpts, num_comp;
652   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
653   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
654   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
655   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
656 
657   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
658 
659   // loop over elements and put in data structure
660   const CeedScalar *interp_in, *grad_in;
661   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
662   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
663 
664   const CeedScalar *assembled_qf_array;
665   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
666   CeedChk(ierr);
667 
668   CeedInt layout_qf[3];
669   ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr);
670   ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr);
671 
672   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
673   CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size];
674   CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size];
675   CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in *
676                                      num_qpts]; // logically 3-tensor
677   CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in];
678   CeedScalar elem_mat[elem_size * elem_size];
679   int count = 0;
680   CeedScalar *vals;
681   ierr = CeedVectorGetArray(values, CEED_MEM_HOST, &vals); CeedChk(ierr);
682   for (int e = 0; e < num_elem; ++e) {
683     for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
684       for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
685         for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) {
686           B_mat_in[ell] = 0.0;
687         }
688         for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) {
689           B_mat_out[ell] = 0.0;
690         }
691         // Store block-diagonal D matrix as collection of small dense blocks
692         for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) {
693           D_mat[ell] = 0.0;
694         }
695         // form element matrix itself (for each block component)
696         for (int ell = 0; ell < elem_size*elem_size; ++ell) {
697           elem_mat[ell] = 0.0;
698         }
699         for (int q = 0; q < num_qpts; ++q) {
700           for (int n = 0; n < elem_size; ++n) {
701             CeedInt d_in = -1;
702             for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) {
703               const int qq = num_eval_mode_in*q;
704               if (eval_mode_in[e_in] == CEED_EVAL_INTERP) {
705                 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n];
706               } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) {
707                 d_in += 1;
708                 B_mat_in[(qq+e_in)*elem_size + n] +=
709                   grad_in[(d_in*num_qpts+q) * elem_size + n];
710               } else {
711                 // LCOV_EXCL_START
712                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
713                 // LCOV_EXCL_STOP
714               }
715             }
716             CeedInt d_out = -1;
717             for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) {
718               const int qq = num_eval_mode_out*q;
719               if (eval_mode_out[e_out] == CEED_EVAL_INTERP) {
720                 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n];
721               } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) {
722                 d_out += 1;
723                 B_mat_out[(qq+e_out)*elem_size + n] +=
724                   grad_in[(d_out*num_qpts+q) * elem_size + n];
725               } else {
726                 // LCOV_EXCL_START
727                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
728                 // LCOV_EXCL_STOP
729               }
730             }
731           }
732           for (int ei = 0; ei < num_eval_mode_out; ++ei) {
733             for (int ej = 0; ej < num_eval_mode_in; ++ej) {
734               const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp
735                                           +comp_out;
736               const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] +
737                                 e*layout_qf[2];
738               D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index];
739             }
740           }
741         }
742         // Compute B^T*D
743         for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) {
744           BTD[ell] = 0.0;
745         }
746         for (int j = 0; j<elem_size; ++j) {
747           for (int q = 0; q<num_qpts; ++q) {
748             int qq = num_eval_mode_out*q;
749             for (int ei = 0; ei < num_eval_mode_in; ++ei) {
750               for (int ej = 0; ej < num_eval_mode_out; ++ej) {
751                 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] +=
752                   B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q];
753               }
754             }
755           }
756         }
757 
758         ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size,
759                                   elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr);
760 
761         // put element matrix in coordinate data structure
762         for (int i = 0; i < elem_size; ++i) {
763           for (int j = 0; j < elem_size; ++j) {
764             vals[offset + count] = elem_mat[i*elem_size + j];
765             count++;
766           }
767         }
768       }
769     }
770   }
771   if (count != local_num_entries)
772     // LCOV_EXCL_START
773     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
774   // LCOV_EXCL_STOP
775   ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr);
776 
777   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
778   CeedChk(ierr);
779   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
780   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
781   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
782 
783   return CEED_ERROR_SUCCESS;
784 }
785 
786 /**
787   @brief Count number of entries for assembled CeedOperator
788 
789   @param[in] op            CeedOperator to assemble
790   @param[out] num_entries  Number of entries in assembled representation
791 
792   @return An error code: 0 - success, otherwise - failure
793 
794   @ref Utility
795 **/
796 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op,
797     CeedInt *num_entries) {
798   int ierr;
799   CeedElemRestriction rstr;
800   CeedInt num_elem, elem_size, num_comp;
801 
802   if (op->composite)
803     // LCOV_EXCL_START
804     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
805                      "Composite operator not supported");
806   // LCOV_EXCL_STOP
807   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr);
808   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
809   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
810   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
811   *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
812 
813   return CEED_ERROR_SUCCESS;
814 }
815 
816 /**
817   @brief Common code for creating a multigrid coarse operator and level
818            transfer operators for a CeedOperator
819 
820   @param[in] op_fine       Fine grid operator
821   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
822   @param[in] rstr_coarse   Coarse grid restriction
823   @param[in] basis_coarse  Coarse grid active vector basis
824   @param[in] basis_c_to_f  Basis for coarse to fine interpolation
825   @param[out] op_coarse    Coarse grid operator
826   @param[out] op_prolong   Coarse to fine operator
827   @param[out] op_restrict  Fine to coarse operator
828 
829   @return An error code: 0 - success, otherwise - failure
830 
831   @ref Developer
832 **/
833 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
834     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
835     CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
836     CeedOperator *op_restrict) {
837   int ierr;
838   Ceed ceed;
839   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
840 
841   // Check for composite operator
842   bool is_composite;
843   ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr);
844   if (is_composite)
845     // LCOV_EXCL_START
846     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
847                      "Automatic multigrid setup for composite operators not supported");
848   // LCOV_EXCL_STOP
849 
850   // Coarse Grid
851   ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT,
852                             op_coarse); CeedChk(ierr);
853   CeedElemRestriction rstr_fine = NULL;
854   // -- Clone input fields
855   for (int i = 0; i < op_fine->qf->num_input_fields; i++) {
856     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
857       rstr_fine = op_fine->input_fields[i]->elem_restr;
858       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
859                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
860       CeedChk(ierr);
861     } else {
862       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
863                                   op_fine->input_fields[i]->elem_restr,
864                                   op_fine->input_fields[i]->basis,
865                                   op_fine->input_fields[i]->vec); CeedChk(ierr);
866     }
867   }
868   // -- Clone output fields
869   for (int i = 0; i < op_fine->qf->num_output_fields; i++) {
870     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
871       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
872                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
873       CeedChk(ierr);
874     } else {
875       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
876                                   op_fine->output_fields[i]->elem_restr,
877                                   op_fine->output_fields[i]->basis,
878                                   op_fine->output_fields[i]->vec); CeedChk(ierr);
879     }
880   }
881 
882   // Multiplicity vector
883   CeedVector mult_vec, mult_e_vec;
884   ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec);
885   CeedChk(ierr);
886   ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr);
887   ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine,
888                                   mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
889   ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr);
890   ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec,
891                                   CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
892   ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr);
893   ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr);
894 
895   // Restriction
896   CeedInt num_comp;
897   ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr);
898   CeedQFunction qf_restrict;
899   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict);
900   CeedChk(ierr);
901   CeedInt *num_comp_r_data;
902   ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr);
903   num_comp_r_data[0] = num_comp;
904   CeedQFunctionContext ctx_r;
905   ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr);
906   ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER,
907                                      sizeof(*num_comp_r_data), num_comp_r_data);
908   CeedChk(ierr);
909   ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr);
910   ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr);
911   ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE);
912   CeedChk(ierr);
913   ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE);
914   CeedChk(ierr);
915   ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp,
916                                 CEED_EVAL_INTERP); CeedChk(ierr);
917 
918   ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE,
919                             CEED_QFUNCTION_NONE, op_restrict);
920   CeedChk(ierr);
921   ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine,
922                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
923   CeedChk(ierr);
924   ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine,
925                               CEED_BASIS_COLLOCATED, mult_vec);
926   CeedChk(ierr);
927   ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f,
928                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
929 
930   // Prolongation
931   CeedQFunction qf_prolong;
932   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong);
933   CeedChk(ierr);
934   CeedInt *num_comp_p_data;
935   ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr);
936   num_comp_p_data[0] = num_comp;
937   CeedQFunctionContext ctx_p;
938   ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr);
939   ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER,
940                                      sizeof(*num_comp_p_data), num_comp_p_data);
941   CeedChk(ierr);
942   ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr);
943   ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr);
944   ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP);
945   CeedChk(ierr);
946   ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE);
947   CeedChk(ierr);
948   ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE);
949   CeedChk(ierr);
950 
951   ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE,
952                             CEED_QFUNCTION_NONE, op_prolong);
953   CeedChk(ierr);
954   ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f,
955                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
956   ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine,
957                               CEED_BASIS_COLLOCATED, mult_vec);
958   CeedChk(ierr);
959   ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine,
960                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
961   CeedChk(ierr);
962 
963   // Cleanup
964   ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr);
965   ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr);
966   ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr);
967   ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr);
968   return CEED_ERROR_SUCCESS;
969 }
970 
971 /**
972   @brief Build 1D mass matrix and Laplacian with perturbation
973 
974   @param[in] interp_1d    Interpolation matrix in one dimension
975   @param[in] grad_1d      Gradient matrix in one dimension
976   @param[in] q_weight_1d  Quadrature weights in one dimension
977   @param[in] P_1d         Number of basis nodes in one dimension
978   @param[in] Q_1d         Number of quadrature points in one dimension
979   @param[in] dim          Dimension of basis
980   @param[out] mass        Assembled mass matrix in one dimension
981   @param[out] laplace     Assembled perturbed Laplacian in one dimension
982 
983   @return An error code: 0 - success, otherwise - failure
984 
985   @ref Developer
986 **/
987 CeedPragmaOptimizeOff
988 static int CeedBuildMassLaplace(const CeedScalar *interp_1d,
989                                 const CeedScalar *grad_1d,
990                                 const CeedScalar *q_weight_1d, CeedInt P_1d,
991                                 CeedInt Q_1d, CeedInt dim,
992                                 CeedScalar *mass, CeedScalar *laplace) {
993 
994   for (CeedInt i=0; i<P_1d; i++)
995     for (CeedInt j=0; j<P_1d; j++) {
996       CeedScalar sum = 0.0;
997       for (CeedInt k=0; k<Q_1d; k++)
998         sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j];
999       mass[i+j*P_1d] = sum;
1000     }
1001   // -- Laplacian
1002   for (CeedInt i=0; i<P_1d; i++)
1003     for (CeedInt j=0; j<P_1d; j++) {
1004       CeedScalar sum = 0.0;
1005       for (CeedInt k=0; k<Q_1d; k++)
1006         sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j];
1007       laplace[i+j*P_1d] = sum;
1008     }
1009   CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4;
1010   for (CeedInt i=0; i<P_1d; i++)
1011     laplace[i+P_1d*i] += perturbation;
1012   return CEED_ERROR_SUCCESS;
1013 }
1014 CeedPragmaOptimizeOn
1015 
1016 /// @}
1017 
1018 /// ----------------------------------------------------------------------------
1019 /// CeedOperator Public API
1020 /// ----------------------------------------------------------------------------
1021 /// @addtogroup CeedOperatorUser
1022 /// @{
1023 
1024 /**
1025   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1026 
1027   This returns a CeedVector containing a matrix at each quadrature point
1028     providing the action of the CeedQFunction associated with the CeedOperator.
1029     The vector 'assembled' is of shape
1030       [num_elements, num_input_fields, num_output_fields, num_quad_points]
1031     and contains column-major matrices representing the action of the
1032     CeedQFunction for a corresponding quadrature point on an element. Inputs and
1033     outputs are in the order provided by the user when adding CeedOperator fields.
1034     For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and
1035     'v', provided in that order, would result in an assembled QFunction that
1036     consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting
1037     on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1038 
1039   @param op              CeedOperator to assemble CeedQFunction
1040   @param[out] assembled  CeedVector to store assembled CeedQFunction at
1041                            quadrature points
1042   @param[out] rstr       CeedElemRestriction for CeedVector containing assembled
1043                            CeedQFunction
1044   @param request         Address of CeedRequest for non-blocking completion, else
1045                            @ref CEED_REQUEST_IMMEDIATE
1046 
1047   @return An error code: 0 - success, otherwise - failure
1048 
1049   @ref User
1050 **/
1051 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled,
1052                                         CeedElemRestriction *rstr,
1053                                         CeedRequest *request) {
1054   int ierr;
1055   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1056 
1057   // Backend version
1058   if (op->LinearAssembleQFunction) {
1059     ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); CeedChk(ierr);
1060     return CEED_ERROR_SUCCESS;
1061   } else {
1062     // Fallback to reference Ceed
1063     if (!op->op_fallback) {
1064       ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1065     }
1066     // Assemble
1067     ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled,
1068            rstr, request); CeedChk(ierr);
1069     return CEED_ERROR_SUCCESS;
1070   }
1071 }
1072 
1073 /**
1074   @brief Assemble the diagonal of a square linear CeedOperator
1075 
1076   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1077 
1078   Note: Currently only non-composite CeedOperators with a single field and
1079           composite CeedOperators with single field sub-operators are supported.
1080 
1081   @param op              CeedOperator to assemble CeedQFunction
1082   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1083   @param request         Address of CeedRequest for non-blocking completion, else
1084                            @ref CEED_REQUEST_IMMEDIATE
1085 
1086   @return An error code: 0 - success, otherwise - failure
1087 
1088   @ref User
1089 **/
1090 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled,
1091                                        CeedRequest *request) {
1092   int ierr;
1093   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1094 
1095   // Use backend version, if available
1096   if (op->LinearAssembleDiagonal) {
1097     ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr);
1098     return CEED_ERROR_SUCCESS;
1099   } else if (op->LinearAssembleAddDiagonal) {
1100     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1101     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1102     return CEED_ERROR_SUCCESS;
1103   } else {
1104     // Check for valid fallback resource
1105     const char *resource, *fallback_resource;
1106     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1107     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1108     CeedChk(ierr);
1109     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1110       // Fallback to reference Ceed
1111       if (!op->op_fallback) {
1112         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1113       }
1114       // Assemble
1115       ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request);
1116       CeedChk(ierr);
1117       return CEED_ERROR_SUCCESS;
1118     }
1119   }
1120 
1121   // Default interface implementation
1122   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1123   ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request);
1124   CeedChk(ierr);
1125   return CEED_ERROR_SUCCESS;
1126 }
1127 
1128 /**
1129   @brief Assemble the diagonal of a square linear CeedOperator
1130 
1131   This sums into a CeedVector the diagonal of a linear CeedOperator.
1132 
1133   Note: Currently only non-composite CeedOperators with a single field and
1134           composite CeedOperators with single field sub-operators are supported.
1135 
1136   @param op              CeedOperator to assemble CeedQFunction
1137   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1138   @param request         Address of CeedRequest for non-blocking completion, else
1139                            @ref CEED_REQUEST_IMMEDIATE
1140 
1141   @return An error code: 0 - success, otherwise - failure
1142 
1143   @ref User
1144 **/
1145 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled,
1146     CeedRequest *request) {
1147   int ierr;
1148   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1149 
1150   // Use backend version, if available
1151   if (op->LinearAssembleAddDiagonal) {
1152     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1153     return CEED_ERROR_SUCCESS;
1154   } else {
1155     // Check for valid fallback resource
1156     const char *resource, *fallback_resource;
1157     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1158     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1159     CeedChk(ierr);
1160     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1161       // Fallback to reference Ceed
1162       if (!op->op_fallback) {
1163         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1164       }
1165       // Assemble
1166       ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled,
1167              request); CeedChk(ierr);
1168       return CEED_ERROR_SUCCESS;
1169     }
1170   }
1171 
1172   // Default interface implementation
1173   bool is_composite;
1174   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1175   if (is_composite) {
1176     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1177            false, assembled); CeedChk(ierr);
1178     return CEED_ERROR_SUCCESS;
1179   } else {
1180     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled);
1181     CeedChk(ierr);
1182     return CEED_ERROR_SUCCESS;
1183   }
1184 }
1185 
1186 /**
1187   @brief Assemble the point block diagonal of a square linear CeedOperator
1188 
1189   This overwrites a CeedVector with the point block diagonal of a linear
1190     CeedOperator.
1191 
1192   Note: Currently only non-composite CeedOperators with a single field and
1193           composite CeedOperators with single field sub-operators are supported.
1194 
1195   @param op              CeedOperator to assemble CeedQFunction
1196   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1197                            diagonal, provided in row-major form with an
1198                            @a num_comp * @a num_comp block at each node. The dimensions
1199                            of this vector are derived from the active vector
1200                            for the CeedOperator. The array has shape
1201                            [nodes, component out, component in].
1202   @param request         Address of CeedRequest for non-blocking completion, else
1203                            CEED_REQUEST_IMMEDIATE
1204 
1205   @return An error code: 0 - success, otherwise - failure
1206 
1207   @ref User
1208 **/
1209 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op,
1210     CeedVector assembled, CeedRequest *request) {
1211   int ierr;
1212   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1213 
1214   // Use backend version, if available
1215   if (op->LinearAssemblePointBlockDiagonal) {
1216     ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request);
1217     CeedChk(ierr);
1218     return CEED_ERROR_SUCCESS;
1219   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1220     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1221     ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled,
1222            request); CeedChk(ierr);
1223     return CEED_ERROR_SUCCESS;
1224   } else {
1225     // Check for valid fallback resource
1226     const char *resource, *fallback_resource;
1227     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1228     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1229     CeedChk(ierr);
1230     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1231       // Fallback to reference Ceed
1232       if (!op->op_fallback) {
1233         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1234       }
1235       // Assemble
1236       ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback,
1237              assembled, request); CeedChk(ierr);
1238       return CEED_ERROR_SUCCESS;
1239     }
1240   }
1241 
1242   // Default interface implementation
1243   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1244   ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request);
1245   CeedChk(ierr);
1246   return CEED_ERROR_SUCCESS;
1247 }
1248 
1249 /**
1250   @brief Assemble the point block diagonal of a square linear CeedOperator
1251 
1252   This sums into a CeedVector with the point block diagonal of a linear
1253     CeedOperator.
1254 
1255   Note: Currently only non-composite CeedOperators with a single field and
1256           composite CeedOperators with single field sub-operators are supported.
1257 
1258   @param op              CeedOperator to assemble CeedQFunction
1259   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1260                            diagonal, provided in row-major form with an
1261                            @a num_comp * @a num_comp block at each node. The dimensions
1262                            of this vector are derived from the active vector
1263                            for the CeedOperator. The array has shape
1264                            [nodes, component out, component in].
1265   @param request         Address of CeedRequest for non-blocking completion, else
1266                            CEED_REQUEST_IMMEDIATE
1267 
1268   @return An error code: 0 - success, otherwise - failure
1269 
1270   @ref User
1271 **/
1272 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op,
1273     CeedVector assembled, CeedRequest *request) {
1274   int ierr;
1275   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1276 
1277   // Use backend version, if available
1278   if (op->LinearAssembleAddPointBlockDiagonal) {
1279     ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request);
1280     CeedChk(ierr);
1281     return CEED_ERROR_SUCCESS;
1282   } else {
1283     // Check for valid fallback resource
1284     const char *resource, *fallback_resource;
1285     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1286     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1287     CeedChk(ierr);
1288     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1289       // Fallback to reference Ceed
1290       if (!op->op_fallback) {
1291         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1292       }
1293       // Assemble
1294       ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback,
1295              assembled, request); CeedChk(ierr);
1296       return CEED_ERROR_SUCCESS;
1297     }
1298   }
1299 
1300   // Default interface implemenation
1301   bool is_composite;
1302   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1303   if (is_composite) {
1304     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1305            true, assembled); CeedChk(ierr);
1306     return CEED_ERROR_SUCCESS;
1307   } else {
1308     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled);
1309     CeedChk(ierr);
1310     return CEED_ERROR_SUCCESS;
1311   }
1312 }
1313 
1314 /**
1315    @brief Fully assemble the nonzero pattern of a linear operator.
1316 
1317    Expected to be used in conjunction with CeedOperatorLinearAssemble()
1318 
1319    The assembly routines use coordinate format, with num_entries tuples of the
1320    form (i, j, value) which indicate that value should be added to the matrix
1321    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1322    This function returns the number of entries and their (i, j) locations,
1323    while CeedOperatorLinearAssemble() provides the values in the same
1324    ordering.
1325 
1326    This will generally be slow unless your operator is low-order.
1327 
1328    @param[in]  op           CeedOperator to assemble
1329    @param[out] num_entries  Number of entries in coordinate nonzero pattern
1330    @param[out] rows         Row number for each entry
1331    @param[out] cols         Column number for each entry
1332 
1333    @ref User
1334 **/
1335 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedInt *num_entries,
1336                                        CeedInt **rows, CeedInt **cols) {
1337   int ierr;
1338   CeedInt num_suboperators, single_entries;
1339   CeedOperator *sub_operators;
1340   bool is_composite;
1341   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1342 
1343   // Use backend version, if available
1344   if (op->LinearAssembleSymbolic) {
1345     ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr);
1346     return CEED_ERROR_SUCCESS;
1347   } else {
1348     // Check for valid fallback resource
1349     const char *resource, *fallback_resource;
1350     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1351     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1352     CeedChk(ierr);
1353     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1354       // Fallback to reference Ceed
1355       if (!op->op_fallback) {
1356         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1357       }
1358       // Assemble
1359       ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows,
1360              cols); CeedChk(ierr);
1361       return CEED_ERROR_SUCCESS;
1362     }
1363   }
1364 
1365   // Default interface implementation
1366 
1367   // count entries and allocate rows, cols arrays
1368   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1369   *num_entries = 0;
1370   if (is_composite) {
1371     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1372     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1373     for (int k = 0; k < num_suboperators; ++k) {
1374       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1375              &single_entries); CeedChk(ierr);
1376       *num_entries += single_entries;
1377     }
1378   } else {
1379     ierr = CeedSingleOperatorAssemblyCountEntries(op,
1380            &single_entries); CeedChk(ierr);
1381     *num_entries += single_entries;
1382   }
1383   ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr);
1384   ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr);
1385 
1386   // assemble nonzero locations
1387   CeedInt offset = 0;
1388   if (is_composite) {
1389     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1390     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1391     for (int k = 0; k < num_suboperators; ++k) {
1392       ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows,
1393              *cols); CeedChk(ierr);
1394       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1395              &single_entries);
1396       CeedChk(ierr);
1397       offset += single_entries;
1398     }
1399   } else {
1400     ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols);
1401     CeedChk(ierr);
1402   }
1403 
1404   return CEED_ERROR_SUCCESS;
1405 }
1406 
1407 /**
1408    @brief Fully assemble the nonzero entries of a linear operator.
1409 
1410    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic()
1411 
1412    The assembly routines use coordinate format, with num_entries tuples of the
1413    form (i, j, value) which indicate that value should be added to the matrix
1414    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1415    This function returns the values of the nonzero entries to be added, their
1416    (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1417 
1418    This will generally be slow unless your operator is low-order.
1419 
1420    @param[in]  op      CeedOperator to assemble
1421    @param[out] values  Values to assemble into matrix
1422 
1423    @ref User
1424 **/
1425 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1426   int ierr;
1427   CeedInt num_suboperators, single_entries = 0;
1428   CeedOperator *sub_operators;
1429   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1430 
1431   // Use backend version, if available
1432   if (op->LinearAssemble) {
1433     ierr = op->LinearAssemble(op, values); CeedChk(ierr);
1434     return CEED_ERROR_SUCCESS;
1435   } else {
1436     // Check for valid fallback resource
1437     const char *resource, *fallback_resource;
1438     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1439     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1440     CeedChk(ierr);
1441     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1442       // Fallback to reference Ceed
1443       if (!op->op_fallback) {
1444         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1445       }
1446       // Assemble
1447       ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr);
1448       return CEED_ERROR_SUCCESS;
1449     }
1450   }
1451 
1452   // Default interface implementation
1453   bool is_composite;
1454   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1455 
1456   CeedInt offset = 0;
1457   if (is_composite) {
1458     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1459     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1460     for (int k = 0; k < num_suboperators; ++k) {
1461       ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values);
1462       CeedChk(ierr);
1463       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1464              &single_entries);
1465       CeedChk(ierr);
1466       offset += single_entries;
1467     }
1468   } else {
1469     ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr);
1470   }
1471 
1472   return CEED_ERROR_SUCCESS;
1473 }
1474 
1475 /**
1476   @brief Create a multigrid coarse operator and level transfer operators
1477            for a CeedOperator, creating the prolongation basis from the
1478            fine and coarse grid interpolation
1479 
1480   @param[in] op_fine       Fine grid operator
1481   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
1482   @param[in] rstr_coarse   Coarse grid restriction
1483   @param[in] basis_coarse  Coarse grid active vector basis
1484   @param[out] op_coarse    Coarse grid operator
1485   @param[out] op_prolong   Coarse to fine operator
1486   @param[out] op_restrict  Fine to coarse operator
1487 
1488   @return An error code: 0 - success, otherwise - failure
1489 
1490   @ref User
1491 **/
1492 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine,
1493                                      CeedVector p_mult_fine,
1494                                      CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1495                                      CeedOperator *op_coarse, CeedOperator *op_prolong,
1496                                      CeedOperator *op_restrict) {
1497   int ierr;
1498   Ceed ceed;
1499   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1500 
1501   // Check for compatible quadrature spaces
1502   CeedBasis basis_fine;
1503   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1504   CeedInt Q_f, Q_c;
1505   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1506   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1507   if (Q_f != Q_c)
1508     // LCOV_EXCL_START
1509     return CeedError(ceed, CEED_ERROR_DIMENSION,
1510                      "Bases must have compatible quadrature spaces");
1511   // LCOV_EXCL_STOP
1512 
1513   // Coarse to fine basis
1514   CeedInt P_f, P_c, Q = Q_f;
1515   bool is_tensor_f, is_tensor_c;
1516   ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr);
1517   ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr);
1518   CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau;
1519   if (is_tensor_f && is_tensor_c) {
1520     ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr);
1521     ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr);
1522     ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr);
1523   } else if (!is_tensor_f && !is_tensor_c) {
1524     ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr);
1525     ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr);
1526   } else {
1527     // LCOV_EXCL_START
1528     return CeedError(ceed, CEED_ERROR_MINOR,
1529                      "Bases must both be tensor or non-tensor");
1530     // LCOV_EXCL_STOP
1531   }
1532 
1533   ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr);
1534   ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr);
1535   ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr);
1536   ierr = CeedMalloc(Q, &tau); CeedChk(ierr);
1537   if (is_tensor_f) {
1538     memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]);
1539     memcpy(interp_c, basis_coarse->interp_1d,
1540            Q*P_c*sizeof basis_coarse->interp_1d[0]);
1541   } else {
1542     memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]);
1543     memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]);
1544   }
1545 
1546   // -- QR Factorization, interp_f = Q R
1547   ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr);
1548 
1549   // -- Apply Qtranspose, interp_c = Qtranspose interp_c
1550   ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE,
1551                                Q, P_c, P_f, P_c, 1); CeedChk(ierr);
1552 
1553   // -- Apply Rinv, interp_c_to_f = Rinv interp_c
1554   for (CeedInt j=0; j<P_c; j++) { // Column j
1555     interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1];
1556     for (CeedInt i=P_f-2; i>=0; i--) { // Row i
1557       interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i];
1558       for (CeedInt k=i+1; k<P_f; k++)
1559         interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k];
1560       interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i];
1561     }
1562   }
1563   ierr = CeedFree(&tau); CeedChk(ierr);
1564   ierr = CeedFree(&interp_c); CeedChk(ierr);
1565   ierr = CeedFree(&interp_f); CeedChk(ierr);
1566 
1567   // Complete with interp_c_to_f versions of code
1568   if (is_tensor_f) {
1569     ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine,
1570            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
1571     CeedChk(ierr);
1572   } else {
1573     ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine,
1574            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
1575     CeedChk(ierr);
1576   }
1577 
1578   // Cleanup
1579   ierr = CeedFree(&interp_c_to_f); CeedChk(ierr);
1580   return CEED_ERROR_SUCCESS;
1581 }
1582 
1583 /**
1584   @brief Create a multigrid coarse operator and level transfer operators
1585            for a CeedOperator with a tensor basis for the active basis
1586 
1587   @param[in] op_fine        Fine grid operator
1588   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1589   @param[in] rstr_coarse    Coarse grid restriction
1590   @param[in] basis_coarse   Coarse grid active vector basis
1591   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1592   @param[out] op_coarse     Coarse grid operator
1593   @param[out] op_prolong    Coarse to fine operator
1594   @param[out] op_restrict   Fine to coarse operator
1595 
1596   @return An error code: 0 - success, otherwise - failure
1597 
1598   @ref User
1599 **/
1600 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine,
1601     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1602     const CeedScalar *interp_c_to_f, CeedOperator *op_coarse,
1603     CeedOperator *op_prolong, CeedOperator *op_restrict) {
1604   int ierr;
1605   Ceed ceed;
1606   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1607 
1608   // Check for compatible quadrature spaces
1609   CeedBasis basis_fine;
1610   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1611   CeedInt Q_f, Q_c;
1612   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1613   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1614   if (Q_f != Q_c)
1615     // LCOV_EXCL_START
1616     return CeedError(ceed, CEED_ERROR_DIMENSION,
1617                      "Bases must have compatible quadrature spaces");
1618   // LCOV_EXCL_STOP
1619 
1620   // Coarse to fine basis
1621   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
1622   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
1623   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
1624   ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr);
1625   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
1626   CeedChk(ierr);
1627   P_1d_c = dim == 1 ? num_nodes_c :
1628            dim == 2 ? sqrt(num_nodes_c) :
1629            cbrt(num_nodes_c);
1630   CeedScalar *q_ref, *q_weight, *grad;
1631   ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr);
1632   ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr);
1633   ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr);
1634   CeedBasis basis_c_to_f;
1635   ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f,
1636                                  interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
1637   CeedChk(ierr);
1638   ierr = CeedFree(&q_ref); CeedChk(ierr);
1639   ierr = CeedFree(&q_weight); CeedChk(ierr);
1640   ierr = CeedFree(&grad); CeedChk(ierr);
1641 
1642   // Core code
1643   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
1644                                           basis_coarse, basis_c_to_f, op_coarse,
1645                                           op_prolong, op_restrict);
1646   CeedChk(ierr);
1647   return CEED_ERROR_SUCCESS;
1648 }
1649 
1650 /**
1651   @brief Create a multigrid coarse operator and level transfer operators
1652            for a CeedOperator with a non-tensor basis for the active vector
1653 
1654   @param[in] op_fine        Fine grid operator
1655   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1656   @param[in] rstr_coarse    Coarse grid restriction
1657   @param[in] basis_coarse   Coarse grid active vector basis
1658   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1659   @param[out] op_coarse     Coarse grid operator
1660   @param[out] op_prolong    Coarse to fine operator
1661   @param[out] op_restrict   Fine to coarse operator
1662 
1663   @return An error code: 0 - success, otherwise - failure
1664 
1665   @ref User
1666 **/
1667 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine,
1668                                        CeedVector p_mult_fine,
1669                                        CeedElemRestriction rstr_coarse,
1670                                        CeedBasis basis_coarse,
1671                                        const CeedScalar *interp_c_to_f,
1672                                        CeedOperator *op_coarse,
1673                                        CeedOperator *op_prolong,
1674                                        CeedOperator *op_restrict) {
1675   int ierr;
1676   Ceed ceed;
1677   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1678 
1679   // Check for compatible quadrature spaces
1680   CeedBasis basis_fine;
1681   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1682   CeedInt Q_f, Q_c;
1683   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1684   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1685   if (Q_f != Q_c)
1686     // LCOV_EXCL_START
1687     return CeedError(ceed, CEED_ERROR_DIMENSION,
1688                      "Bases must have compatible quadrature spaces");
1689   // LCOV_EXCL_STOP
1690 
1691   // Coarse to fine basis
1692   CeedElemTopology topo;
1693   ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr);
1694   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
1695   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
1696   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
1697   ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr);
1698   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
1699   CeedChk(ierr);
1700   CeedScalar *q_ref, *q_weight, *grad;
1701   ierr = CeedCalloc(num_nodes_f, &q_ref); CeedChk(ierr);
1702   ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr);
1703   ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr);
1704   CeedBasis basis_c_to_f;
1705   ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f,
1706                            interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
1707   CeedChk(ierr);
1708   ierr = CeedFree(&q_ref); CeedChk(ierr);
1709   ierr = CeedFree(&q_weight); CeedChk(ierr);
1710   ierr = CeedFree(&grad); CeedChk(ierr);
1711 
1712   // Core code
1713   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
1714                                           basis_coarse, basis_c_to_f, op_coarse,
1715                                           op_prolong, op_restrict);
1716   CeedChk(ierr);
1717   return CEED_ERROR_SUCCESS;
1718 }
1719 
1720 /**
1721   @brief Build a FDM based approximate inverse for each element for a
1722            CeedOperator
1723 
1724   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization
1725     Method based approximate inverse. This function obtains the simultaneous
1726     diagonalization for the 1D mass and Laplacian operators,
1727       M = V^T V, K = V^T S V.
1728     The assembled QFunction is used to modify the eigenvalues from simultaneous
1729     diagonalization and obtain an approximate inverse of the form
1730       V^T S^hat V. The CeedOperator must be linear and non-composite. The
1731     associated CeedQFunction must therefore also be linear.
1732 
1733   @param op            CeedOperator to create element inverses
1734   @param[out] fdm_inv  CeedOperator to apply the action of a FDM based inverse
1735                          for each element
1736   @param request       Address of CeedRequest for non-blocking completion, else
1737                          @ref CEED_REQUEST_IMMEDIATE
1738 
1739   @return An error code: 0 - success, otherwise - failure
1740 
1741   @ref Backend
1742 **/
1743 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv,
1744                                         CeedRequest *request) {
1745   int ierr;
1746   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1747 
1748   // Use backend version, if available
1749   if (op->CreateFDMElementInverse) {
1750     ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr);
1751     return CEED_ERROR_SUCCESS;
1752   } else {
1753     // Check for valid fallback resource
1754     const char *resource, *fallback_resource;
1755     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1756     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1757     CeedChk(ierr);
1758     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1759       // Fallback to reference Ceed
1760       if (!op->op_fallback) {
1761         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1762       }
1763       // Assemble
1764       ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request);
1765       CeedChk(ierr);
1766       return CEED_ERROR_SUCCESS;
1767     }
1768   }
1769 
1770   // Interface implementation
1771   Ceed ceed, ceed_parent;
1772   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
1773   ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr);
1774   ceed_parent = ceed_parent ? ceed_parent : ceed;
1775   CeedQFunction qf;
1776   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
1777 
1778   // Determine active input basis
1779   bool interp = false, grad = false;
1780   CeedBasis basis = NULL;
1781   CeedElemRestriction rstr = NULL;
1782   CeedOperatorField *op_fields;
1783   CeedQFunctionField *qf_fields;
1784   CeedInt num_input_fields;
1785   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL);
1786   CeedChk(ierr);
1787   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
1788   for (CeedInt i=0; i<num_input_fields; i++) {
1789     CeedVector vec;
1790     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
1791     if (vec == CEED_VECTOR_ACTIVE) {
1792       CeedEvalMode eval_mode;
1793       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
1794       interp = interp || eval_mode == CEED_EVAL_INTERP;
1795       grad = grad || eval_mode == CEED_EVAL_GRAD;
1796       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr);
1797       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
1798     }
1799   }
1800   if (!basis)
1801     // LCOV_EXCL_START
1802     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
1803   // LCOV_EXCL_STOP
1804   CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1,
1805                                                 l_size = 1;
1806   ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr);
1807   ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr);
1808   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr);
1809   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
1810   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
1811   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
1812   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
1813   ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr);
1814 
1815   // Build and diagonalize 1D Mass and Laplacian
1816   bool tensor_basis;
1817   ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr);
1818   if (!tensor_basis)
1819     // LCOV_EXCL_START
1820     return CeedError(ceed, CEED_ERROR_BACKEND,
1821                      "FDMElementInverse only supported for tensor "
1822                      "bases");
1823   // LCOV_EXCL_STOP
1824   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
1825   ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr);
1826   ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr);
1827   ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr);
1828   ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr);
1829   ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr);
1830   // -- Build matrices
1831   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
1832   ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr);
1833   ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr);
1834   ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr);
1835   ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim,
1836                               mass, laplace); CeedChk(ierr);
1837 
1838   // -- Diagonalize
1839   ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d);
1840   CeedChk(ierr);
1841   ierr = CeedFree(&mass); CeedChk(ierr);
1842   ierr = CeedFree(&laplace); CeedChk(ierr);
1843   for (CeedInt i=0; i<P_1d; i++)
1844     for (CeedInt j=0; j<P_1d; j++)
1845       fdm_interp[i+j*P_1d] = x[j+i*P_1d];
1846   ierr = CeedFree(&x); CeedChk(ierr);
1847 
1848   // Assemble QFunction
1849   CeedVector assembled;
1850   CeedElemRestriction rstr_qf;
1851   ierr =  CeedOperatorLinearAssembleQFunction(op, &assembled, &rstr_qf, request);
1852   CeedChk(ierr);
1853   CeedInt layout[3];
1854   ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr);
1855   ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr);
1856   CeedScalar max_norm = 0;
1857   ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
1858 
1859   // Calculate element averages
1860   CeedInt num_modes = (interp?1:0) + (grad?dim:0);
1861   CeedScalar *elem_avg;
1862   const CeedScalar *assembled_array, *q_weight_array;
1863   CeedVector q_weight;
1864   ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr);
1865   ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
1866                         CEED_VECTOR_NONE, q_weight); CeedChk(ierr);
1867   ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array);
1868   CeedChk(ierr);
1869   ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array);
1870   CeedChk(ierr);
1871   ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr);
1872   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
1873   for (CeedInt e=0; e<num_elem; e++) {
1874     CeedInt count = 0;
1875     for (CeedInt q=0; q<num_qpts; q++)
1876       for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++)
1877         if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) >
1878             qf_value_bound) {
1879           elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] /
1880                          q_weight_array[q];
1881           count++;
1882         }
1883     if (count) {
1884       elem_avg[e] /= count;
1885     } else {
1886       elem_avg[e] = 1.0;
1887     }
1888   }
1889   ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr);
1890   ierr = CeedVectorDestroy(&assembled); CeedChk(ierr);
1891   ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr);
1892   ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr);
1893 
1894   // Build FDM diagonal
1895   CeedVector q_data;
1896   CeedScalar *q_data_array, *fdm_diagonal;
1897   ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr);
1898   const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON;
1899   for (CeedInt c=0; c<num_comp; c++)
1900     for (CeedInt n=0; n<elem_size; n++) {
1901       if (interp)
1902         fdm_diagonal[c*elem_size + n] = 1.0;
1903       if (grad)
1904         for (CeedInt d=0; d<dim; d++) {
1905           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
1906           fdm_diagonal[c*elem_size + n] += lambda[i];
1907         }
1908       if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound)
1909         fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound;
1910     }
1911   ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data);
1912   CeedChk(ierr);
1913   ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr);
1914   ierr = CeedVectorGetArray(q_data, CEED_MEM_HOST, &q_data_array); CeedChk(ierr);
1915   for (CeedInt e=0; e<num_elem; e++)
1916     for (CeedInt c=0; c<num_comp; c++)
1917       for (CeedInt n=0; n<elem_size; n++)
1918         q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] *
1919             fdm_diagonal[c*elem_size + n]);
1920   ierr = CeedFree(&elem_avg); CeedChk(ierr);
1921   ierr = CeedFree(&fdm_diagonal); CeedChk(ierr);
1922   ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr);
1923 
1924   // Setup FDM operator
1925   // -- Basis
1926   CeedBasis fdm_basis;
1927   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
1928   ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr);
1929   ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr);
1930   ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr);
1931   ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d,
1932                                  fdm_interp, grad_dummy, q_ref_dummy,
1933                                  q_weight_dummy, &fdm_basis); CeedChk(ierr);
1934   ierr = CeedFree(&fdm_interp); CeedChk(ierr);
1935   ierr = CeedFree(&grad_dummy); CeedChk(ierr);
1936   ierr = CeedFree(&q_ref_dummy); CeedChk(ierr);
1937   ierr = CeedFree(&q_weight_dummy); CeedChk(ierr);
1938   ierr = CeedFree(&lambda); CeedChk(ierr);
1939 
1940   // -- Restriction
1941   CeedElemRestriction rstr_qd_i;
1942   CeedInt strides[3] = {1, elem_size, elem_size*num_comp};
1943   ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size,
1944                                           num_comp, num_elem*num_comp*elem_size,
1945                                           strides, &rstr_qd_i); CeedChk(ierr);
1946   // -- QFunction
1947   CeedQFunction qf_fdm;
1948   ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm);
1949   CeedChk(ierr);
1950   ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP);
1951   CeedChk(ierr);
1952   ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE);
1953   CeedChk(ierr);
1954   ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP);
1955   CeedChk(ierr);
1956   // -- QFunction context
1957   CeedInt *num_comp_data;
1958   ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr);
1959   num_comp_data[0] = num_comp;
1960   CeedQFunctionContext ctx_fdm;
1961   ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr);
1962   ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER,
1963                                      sizeof(*num_comp_data), num_comp_data);
1964   CeedChk(ierr);
1965   ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr);
1966   ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr);
1967   // -- Operator
1968   ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv);
1969   CeedChk(ierr);
1970   CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
1971   CeedChk(ierr);
1972   CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED,
1973                        q_data); CeedChk(ierr);
1974   CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
1975   CeedChk(ierr);
1976 
1977   // Cleanup
1978   ierr = CeedVectorDestroy(&q_data); CeedChk(ierr);
1979   ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr);
1980   ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr);
1981   ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr);
1982 
1983   return CEED_ERROR_SUCCESS;
1984 }
1985 
1986 /// @}
1987