xref: /libCEED/interface/ceed-preconditioning.c (revision bf185410af015d53ed6d3b520013ce284095d550)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <ceed-impl.h>
20 #include <math.h>
21 #include <stdbool.h>
22 #include <stdio.h>
23 #include <string.h>
24 
25 /// @file
26 /// Implementation of CeedOperator preconditioning interfaces
27 
28 /// ----------------------------------------------------------------------------
29 /// CeedOperator Library Internal Preconditioning Functions
30 /// ----------------------------------------------------------------------------
31 /// @addtogroup CeedOperatorDeveloper
32 /// @{
33 
34 /**
35   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced
36          CeedOperator functionality
37 
38   @param op  CeedOperator to create fallback for
39 
40   @return An error code: 0 - success, otherwise - failure
41 
42   @ref Developer
43 **/
44 int CeedOperatorCreateFallback(CeedOperator op) {
45   int ierr;
46 
47   // Fallback Ceed
48   const char *resource, *fallback_resource;
49   ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
50   ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
51   CeedChk(ierr);
52   if (!strcmp(resource, fallback_resource))
53     // LCOV_EXCL_START
54     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
55                      "Backend %s cannot create an operator"
56                      "fallback to resource %s", resource, fallback_resource);
57   // LCOV_EXCL_STOP
58 
59   // Fallback Ceed
60   Ceed ceed_ref;
61   if (!op->ceed->op_fallback_ceed) {
62     ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr);
63     ceed_ref->op_fallback_parent = op->ceed;
64     ceed_ref->Error = op->ceed->Error;
65     op->ceed->op_fallback_ceed = ceed_ref;
66   }
67   ceed_ref = op->ceed->op_fallback_ceed;
68 
69   // Clone Op
70   CeedOperator op_ref;
71   ierr = CeedCalloc(1, &op_ref); CeedChk(ierr);
72   memcpy(op_ref, op, sizeof(*op_ref));
73   op_ref->data = NULL;
74   op_ref->interface_setup = false;
75   op_ref->backend_setup = false;
76   op_ref->ceed = ceed_ref;
77   ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr);
78   op->op_fallback = op_ref;
79 
80   // Clone QF
81   CeedQFunction qf_ref;
82   ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr);
83   memcpy(qf_ref, (op->qf), sizeof(*qf_ref));
84   qf_ref->data = NULL;
85   qf_ref->ceed = ceed_ref;
86   ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr);
87   op_ref->qf = qf_ref;
88   op->qf_fallback = qf_ref;
89   return CEED_ERROR_SUCCESS;
90 }
91 
92 /**
93   @brief Select correct basis matrix pointer based on CeedEvalMode
94 
95   @param[in] eval_mode   Current basis evaluation mode
96   @param[in] identity    Pointer to identity matrix
97   @param[in] interp      Pointer to interpolation matrix
98   @param[in] grad        Pointer to gradient matrix
99   @param[out] basis_ptr  Basis pointer to set
100 
101   @return none
102 
103   @ref Developer
104 **/
105 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode,
106     const CeedScalar *identity, const CeedScalar *interp,
107     const CeedScalar *grad, const CeedScalar **basis_ptr) {
108   switch (eval_mode) {
109   case CEED_EVAL_NONE:
110     *basis_ptr = identity;
111     break;
112   case CEED_EVAL_INTERP:
113     *basis_ptr = interp;
114     break;
115   case CEED_EVAL_GRAD:
116     *basis_ptr = grad;
117     break;
118   case CEED_EVAL_WEIGHT:
119   case CEED_EVAL_DIV:
120   case CEED_EVAL_CURL:
121     break; // Caught by QF Assembly
122   }
123 }
124 
125 /**
126   @brief Create point block restriction for active operator field
127 
128   @param[in] rstr              Original CeedElemRestriction for active field
129   @param[out] pointblock_rstr  Address of the variable where the newly created
130                                  CeedElemRestriction will be stored
131 
132   @return An error code: 0 - success, otherwise - failure
133 
134   @ref Developer
135 **/
136 static int CeedOperatorCreateActivePointBlockRestriction(
137   CeedElemRestriction rstr,
138   CeedElemRestriction *pointblock_rstr) {
139   int ierr;
140   Ceed ceed;
141   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr);
142   const CeedInt *offsets;
143   ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets);
144   CeedChk(ierr);
145 
146   // Expand offsets
147   CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1,
148                                                       *pointblock_offsets;
149   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
150   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
151   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
152   ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr);
153   CeedInt shift = num_comp;
154   if (comp_stride != 1)
155     shift *= num_comp;
156   ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets);
157   CeedChk(ierr);
158   for (CeedInt i = 0; i < num_elem*elem_size; i++) {
159     pointblock_offsets[i] = offsets[i]*shift;
160     if (pointblock_offsets[i] > max)
161       max = pointblock_offsets[i];
162   }
163 
164   // Create new restriction
165   ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp,
166                                    1, max + num_comp*num_comp, CEED_MEM_HOST,
167                                    CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr);
168   CeedChk(ierr);
169 
170   // Cleanup
171   ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr);
172 
173   return CEED_ERROR_SUCCESS;
174 }
175 
176 /**
177   @brief Core logic for assembling operator diagonal or point block diagonal
178 
179   @param[in] op             CeedOperator to assemble point block diagonal
180   @param[in] request        Address of CeedRequest for non-blocking completion, else
181                               CEED_REQUEST_IMMEDIATE
182   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
183   @param[out] assembled     CeedVector to store assembled diagonal
184 
185   @return An error code: 0 - success, otherwise - failure
186 
187   @ref Developer
188 **/
189 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op,
190     CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
191   int ierr;
192   Ceed ceed;
193   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
194 
195   // Assemble QFunction
196   CeedQFunction qf;
197   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
198   CeedInt num_input_fields, num_output_fields;
199   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
200   CeedChk(ierr);
201   CeedVector assembled_qf;
202   CeedElemRestriction rstr;
203   ierr = CeedOperatorLinearAssembleQFunction(op,  &assembled_qf, &rstr, request);
204   CeedChk(ierr);
205   CeedInt layout[3];
206   ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr);
207   ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr);
208   CeedScalar max_norm = 0;
209   ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
210 
211   // Determine active input basis
212   CeedOperatorField *op_fields;
213   CeedQFunctionField *qf_fields;
214   ierr = CeedOperatorGetFields(op, &op_fields, NULL); CeedChk(ierr);
215   ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL); CeedChk(ierr);
216   CeedInt num_eval_mode_in = 0, num_comp, dim = 1;
217   CeedEvalMode *eval_mode_in = NULL;
218   CeedBasis basis_in = NULL;
219   CeedElemRestriction rstr_in = NULL;
220   for (CeedInt i=0; i<num_input_fields; i++) {
221     CeedVector vec;
222     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
223     if (vec == CEED_VECTOR_ACTIVE) {
224       CeedElemRestriction rstr;
225       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr);
226       ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr);
227       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
228       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
229       if (rstr_in && rstr_in != rstr)
230         // LCOV_EXCL_START
231         return CeedError(ceed, CEED_ERROR_BACKEND,
232                          "Multi-field non-composite operator diagonal assembly not supported");
233       // LCOV_EXCL_STOP
234       rstr_in = rstr;
235       CeedEvalMode eval_mode;
236       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
237       switch (eval_mode) {
238       case CEED_EVAL_NONE:
239       case CEED_EVAL_INTERP:
240         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
241         eval_mode_in[num_eval_mode_in] = eval_mode;
242         num_eval_mode_in += 1;
243         break;
244       case CEED_EVAL_GRAD:
245         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
246         for (CeedInt d=0; d<dim; d++)
247           eval_mode_in[num_eval_mode_in+d] = eval_mode;
248         num_eval_mode_in += dim;
249         break;
250       case CEED_EVAL_WEIGHT:
251       case CEED_EVAL_DIV:
252       case CEED_EVAL_CURL:
253         break; // Caught by QF Assembly
254       }
255     }
256   }
257 
258   // Determine active output basis
259   ierr = CeedOperatorGetFields(op, NULL, &op_fields); CeedChk(ierr);
260   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields); CeedChk(ierr);
261   CeedInt num_eval_mode_out = 0;
262   CeedEvalMode *eval_mode_out = NULL;
263   CeedBasis basis_out = NULL;
264   CeedElemRestriction rstr_out = NULL;
265   for (CeedInt i=0; i<num_output_fields; i++) {
266     CeedVector vec;
267     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
268     if (vec == CEED_VECTOR_ACTIVE) {
269       CeedElemRestriction rstr;
270       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr);
271       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
272       if (rstr_out && rstr_out != rstr)
273         // LCOV_EXCL_START
274         return CeedError(ceed, CEED_ERROR_BACKEND,
275                          "Multi-field non-composite operator diagonal assembly not supported");
276       // LCOV_EXCL_STOP
277       rstr_out = rstr;
278       CeedEvalMode eval_mode;
279       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
280       switch (eval_mode) {
281       case CEED_EVAL_NONE:
282       case CEED_EVAL_INTERP:
283         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
284         eval_mode_out[num_eval_mode_out] = eval_mode;
285         num_eval_mode_out += 1;
286         break;
287       case CEED_EVAL_GRAD:
288         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
289         for (CeedInt d=0; d<dim; d++)
290           eval_mode_out[num_eval_mode_out+d] = eval_mode;
291         num_eval_mode_out += dim;
292         break;
293       case CEED_EVAL_WEIGHT:
294       case CEED_EVAL_DIV:
295       case CEED_EVAL_CURL:
296         break; // Caught by QF Assembly
297       }
298     }
299   }
300 
301   // Assemble point block diagonal restriction, if needed
302   CeedElemRestriction diag_rstr = rstr_out;
303   if (is_pointblock) {
304     ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr);
305     CeedChk(ierr);
306   }
307 
308   // Create diagonal vector
309   CeedVector elem_diag;
310   ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag);
311   CeedChk(ierr);
312 
313   // Assemble element operator diagonals
314   CeedScalar *elem_diag_array, *assembled_qf_array;
315   ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr);
316   ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array);
317   CeedChk(ierr);
318   ierr = CeedVectorGetArray(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
319   CeedChk(ierr);
320   CeedInt num_elem, num_nodes, num_qpts;
321   ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr);
322   ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr);
323   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
324   // Basis matrices
325   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
326   CeedScalar *identity = NULL;
327   bool evalNone = false;
328   for (CeedInt i=0; i<num_eval_mode_in; i++)
329     evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE);
330   for (CeedInt i=0; i<num_eval_mode_out; i++)
331     evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE);
332   if (evalNone) {
333     ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr);
334     for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++)
335       identity[i*num_nodes+i] = 1.0;
336   }
337   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
338   ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr);
339   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
340   ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr);
341   // Compute the diagonal of B^T D B
342   // Each element
343   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
344   for (CeedInt e=0; e<num_elem; e++) {
345     CeedInt d_out = -1;
346     // Each basis eval mode pair
347     for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) {
348       const CeedScalar *bt = NULL;
349       if (eval_mode_out[e_out] == CEED_EVAL_GRAD)
350         d_out += 1;
351       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out,
352                                   &grad_out[d_out*num_qpts*num_nodes], &bt);
353       CeedInt d_in = -1;
354       for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) {
355         const CeedScalar *b = NULL;
356         if (eval_mode_in[e_in] == CEED_EVAL_GRAD)
357           d_in += 1;
358         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in,
359                                     &grad_in[d_in*num_qpts*num_nodes], &b);
360         // Each component
361         for (CeedInt c_out=0; c_out<num_comp; c_out++)
362           // Each qpoint/node pair
363           for (CeedInt q=0; q<num_qpts; q++)
364             if (is_pointblock) {
365               // Point Block Diagonal
366               for (CeedInt c_in=0; c_in<num_comp; c_in++) {
367                 const CeedScalar qf_value =
368                   assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)*
369                                                      num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
370                 if (fabs(qf_value) > qf_value_bound)
371                   for (CeedInt n=0; n<num_nodes; n++)
372                     elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] +=
373                       bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
374               }
375             } else {
376               // Diagonal Only
377               const CeedScalar qf_value =
378                 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)*
379                                                    num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
380               if (fabs(qf_value) > qf_value_bound)
381                 for (CeedInt n=0; n<num_nodes; n++)
382                   elem_diag_array[(e*num_comp+c_out)*num_nodes+n] +=
383                     bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
384             }
385       }
386     }
387   }
388   ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr);
389   ierr = CeedVectorRestoreArray(assembled_qf, &assembled_qf_array); CeedChk(ierr);
390 
391   // Assemble local operator diagonal
392   ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag,
393                                   assembled, request); CeedChk(ierr);
394 
395   // Cleanup
396   if (is_pointblock) {
397     ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr);
398   }
399   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
400   ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr);
401   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
402   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
403   ierr = CeedFree(&identity); CeedChk(ierr);
404 
405   return CEED_ERROR_SUCCESS;
406 }
407 
408 /**
409   @brief Core logic for assembling composite operator diagonal
410 
411   @param[in] op             CeedOperator to assemble point block diagonal
412   @param[in] request        Address of CeedRequest for non-blocking completion, else
413                             CEED_REQUEST_IMMEDIATE
414   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
415   @param[out] assembled     CeedVector to store assembled diagonal
416 
417   @return An error code: 0 - success, otherwise - failure
418 
419   @ref Developer
420 **/
421 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(
422   CeedOperator op, CeedRequest *request, const bool is_pointblock,
423   CeedVector assembled) {
424   int ierr;
425   CeedInt num_sub;
426   CeedOperator *suboperators;
427   ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr);
428   ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr);
429   for (CeedInt i = 0; i < num_sub; i++) {
430     ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request,
431            is_pointblock, assembled); CeedChk(ierr);
432   }
433   return CEED_ERROR_SUCCESS;
434 }
435 
436 /**
437   @brief Build nonzero pattern for non-composite operator
438 
439   Users should generally use CeedOperatorLinearAssembleSymbolic()
440 
441   @param[in] op      CeedOperator to assemble nonzero pattern
442   @param[in] offset  Offset for number of entries
443   @param[out] rows   Row number for each entry
444   @param[out] cols   Column number for each entry
445 
446   @return An error code: 0 - success, otherwise - failure
447 
448   @ref Developer
449 **/
450 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset,
451     CeedInt *rows, CeedInt *cols) {
452   int ierr;
453   Ceed ceed = op->ceed;
454   if (op->composite)
455     // LCOV_EXCL_START
456     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
457                      "Composite operator not supported");
458   // LCOV_EXCL_STOP
459 
460   CeedElemRestriction rstr_in;
461   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr);
462   CeedInt num_elem, elem_size, num_nodes, num_comp;
463   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
464   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
465   ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr);
466   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
467   CeedInt layout_er[3];
468   ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr);
469 
470   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
471 
472   // Determine elem_dof relation
473   CeedVector index_vec;
474   ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr);
475   CeedScalar *array;
476   ierr = CeedVectorGetArray(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr);
477   for (CeedInt i = 0; i < num_nodes; ++i) {
478     array[i] = i;
479   }
480   ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr);
481   CeedVector elem_dof;
482   ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof);
483   CeedChk(ierr);
484   ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr);
485   CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec,
486                            elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
487   const CeedScalar *elem_dof_a;
488   ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a);
489   CeedChk(ierr);
490   ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr);
491 
492   // Determine i, j locations for element matrices
493   CeedInt count = 0;
494   for (int e = 0; e < num_elem; ++e) {
495     for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
496       for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
497         for (int i = 0; i < elem_size; ++i) {
498           for (int j = 0; j < elem_size; ++j) {
499             const CeedInt elem_dof_index_row = (i)*layout_er[0] +
500                                                (comp_out)*layout_er[1] + e*layout_er[2];
501             const CeedInt elem_dof_index_col = (j)*layout_er[0] +
502                                                (comp_in)*layout_er[1] + e*layout_er[2];
503 
504             const CeedInt row = elem_dof_a[elem_dof_index_row];
505             const CeedInt col = elem_dof_a[elem_dof_index_col];
506 
507             rows[offset + count] = row;
508             cols[offset + count] = col;
509             count++;
510           }
511         }
512       }
513     }
514   }
515   if (count != local_num_entries)
516     // LCOV_EXCL_START
517     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
518   // LCOV_EXCL_STOP
519   ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr);
520   ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr);
521 
522   return CEED_ERROR_SUCCESS;
523 }
524 
525 /**
526   @brief Assemble nonzero entries for non-composite operator
527 
528   Users should generally use CeedOperatorLinearAssemble()
529 
530   @param[in] op       CeedOperator to assemble
531   @param[out] offset  Offest for number of entries
532   @param[out] values  Values to assemble into matrix
533 
534   @return An error code: 0 - success, otherwise - failure
535 
536   @ref Developer
537 **/
538 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
539                                       CeedVector values) {
540   int ierr;
541   Ceed ceed = op->ceed;
542   if (op->composite)
543     // LCOV_EXCL_START
544     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
545                      "Composite operator not supported");
546   // LCOV_EXCL_STOP
547 
548   // Assemble QFunction
549   CeedQFunction qf;
550   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
551   CeedInt num_input_fields, num_output_fields;
552   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
553   CeedChk(ierr);
554   CeedVector assembled_qf;
555   CeedElemRestriction rstr_q;
556   ierr = CeedOperatorLinearAssembleQFunction(
557            op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
558 
559   CeedInt qf_length;
560   ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr);
561 
562   CeedOperatorField *input_fields;
563   CeedOperatorField *output_fields;
564   ierr = CeedOperatorGetFields(op, &input_fields, &output_fields); CeedChk(ierr);
565 
566   // Determine active input basis
567   CeedQFunctionField *qf_fields;
568   ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL); CeedChk(ierr);
569   CeedInt num_eval_mode_in = 0, dim = 1;
570   CeedEvalMode *eval_mode_in = NULL;
571   CeedBasis basis_in = NULL;
572   CeedElemRestriction rstr_in = NULL;
573   for (CeedInt i=0; i<num_input_fields; i++) {
574     CeedVector vec;
575     ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr);
576     if (vec == CEED_VECTOR_ACTIVE) {
577       ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in);
578       CeedChk(ierr);
579       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
580       ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in);
581       CeedChk(ierr);
582       CeedEvalMode eval_mode;
583       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
584       CeedChk(ierr);
585       switch (eval_mode) {
586       case CEED_EVAL_NONE:
587       case CEED_EVAL_INTERP:
588         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
589         eval_mode_in[num_eval_mode_in] = eval_mode;
590         num_eval_mode_in += 1;
591         break;
592       case CEED_EVAL_GRAD:
593         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
594         for (CeedInt d=0; d<dim; d++) {
595           eval_mode_in[num_eval_mode_in+d] = eval_mode;
596         }
597         num_eval_mode_in += dim;
598         break;
599       case CEED_EVAL_WEIGHT:
600       case CEED_EVAL_DIV:
601       case CEED_EVAL_CURL:
602         break; // Caught by QF Assembly
603       }
604     }
605   }
606 
607   // Determine active output basis
608   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields); CeedChk(ierr);
609   CeedInt num_eval_mode_out = 0;
610   CeedEvalMode *eval_mode_out = NULL;
611   CeedBasis basis_out = NULL;
612   CeedElemRestriction rstr_out = NULL;
613   for (CeedInt i=0; i<num_output_fields; i++) {
614     CeedVector vec;
615     ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr);
616     if (vec == CEED_VECTOR_ACTIVE) {
617       ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out);
618       CeedChk(ierr);
619       ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out);
620       CeedChk(ierr);
621       CeedEvalMode eval_mode;
622       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
623       CeedChk(ierr);
624       switch (eval_mode) {
625       case CEED_EVAL_NONE:
626       case CEED_EVAL_INTERP:
627         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
628         eval_mode_out[num_eval_mode_out] = eval_mode;
629         num_eval_mode_out += 1;
630         break;
631       case CEED_EVAL_GRAD:
632         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
633         for (CeedInt d=0; d<dim; d++) {
634           eval_mode_out[num_eval_mode_out+d] = eval_mode;
635         }
636         num_eval_mode_out += dim;
637         break;
638       case CEED_EVAL_WEIGHT:
639       case CEED_EVAL_DIV:
640       case CEED_EVAL_CURL:
641         break; // Caught by QF Assembly
642       }
643     }
644   }
645 
646   if (num_eval_mode_in == 0 || num_eval_mode_out == 0)
647     // LCOV_EXCL_START
648     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
649                      "Cannot assemble operator with out inputs/outputs");
650   // LCOV_EXCL_STOP
651 
652   CeedInt num_elem, elem_size, num_qpts, num_comp;
653   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
654   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
655   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
656   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
657 
658   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
659 
660   // loop over elements and put in data structure
661   const CeedScalar *interp_in, *grad_in;
662   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
663   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
664 
665   const CeedScalar *assembled_qf_array;
666   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
667   CeedChk(ierr);
668 
669   CeedInt layout_qf[3];
670   ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr);
671   ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr);
672 
673   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
674   CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size];
675   CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size];
676   CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in *
677                                      num_qpts]; // logically 3-tensor
678   CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in];
679   CeedScalar elem_mat[elem_size * elem_size];
680   int count = 0;
681   CeedScalar *vals;
682   ierr = CeedVectorGetArray(values, CEED_MEM_HOST, &vals); CeedChk(ierr);
683   for (int e = 0; e < num_elem; ++e) {
684     for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
685       for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
686         for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) {
687           B_mat_in[ell] = 0.0;
688         }
689         for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) {
690           B_mat_out[ell] = 0.0;
691         }
692         // Store block-diagonal D matrix as collection of small dense blocks
693         for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) {
694           D_mat[ell] = 0.0;
695         }
696         // form element matrix itself (for each block component)
697         for (int ell = 0; ell < elem_size*elem_size; ++ell) {
698           elem_mat[ell] = 0.0;
699         }
700         for (int q = 0; q < num_qpts; ++q) {
701           for (int n = 0; n < elem_size; ++n) {
702             CeedInt d_in = -1;
703             for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) {
704               const int qq = num_eval_mode_in*q;
705               if (eval_mode_in[e_in] == CEED_EVAL_INTERP) {
706                 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n];
707               } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) {
708                 d_in += 1;
709                 B_mat_in[(qq+e_in)*elem_size + n] +=
710                   grad_in[(d_in*num_qpts+q) * elem_size + n];
711               } else {
712                 // LCOV_EXCL_START
713                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
714                 // LCOV_EXCL_STOP
715               }
716             }
717             CeedInt d_out = -1;
718             for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) {
719               const int qq = num_eval_mode_out*q;
720               if (eval_mode_out[e_out] == CEED_EVAL_INTERP) {
721                 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n];
722               } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) {
723                 d_out += 1;
724                 B_mat_out[(qq+e_out)*elem_size + n] +=
725                   grad_in[(d_out*num_qpts+q) * elem_size + n];
726               } else {
727                 // LCOV_EXCL_START
728                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
729                 // LCOV_EXCL_STOP
730               }
731             }
732           }
733           for (int ei = 0; ei < num_eval_mode_out; ++ei) {
734             for (int ej = 0; ej < num_eval_mode_in; ++ej) {
735               const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp
736                                           +comp_out;
737               const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] +
738                                 e*layout_qf[2];
739               D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index];
740             }
741           }
742         }
743         // Compute B^T*D
744         for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) {
745           BTD[ell] = 0.0;
746         }
747         for (int j = 0; j<elem_size; ++j) {
748           for (int q = 0; q<num_qpts; ++q) {
749             int qq = num_eval_mode_out*q;
750             for (int ei = 0; ei < num_eval_mode_in; ++ei) {
751               for (int ej = 0; ej < num_eval_mode_out; ++ej) {
752                 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] +=
753                   B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q];
754               }
755             }
756           }
757         }
758 
759         ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size,
760                                   elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr);
761 
762         // put element matrix in coordinate data structure
763         for (int i = 0; i < elem_size; ++i) {
764           for (int j = 0; j < elem_size; ++j) {
765             vals[offset + count] = elem_mat[i*elem_size + j];
766             count++;
767           }
768         }
769       }
770     }
771   }
772   if (count != local_num_entries)
773     // LCOV_EXCL_START
774     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
775   // LCOV_EXCL_STOP
776   ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr);
777 
778   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
779   CeedChk(ierr);
780   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
781   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
782   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
783 
784   return CEED_ERROR_SUCCESS;
785 }
786 
787 /**
788   @brief Count number of entries for assembled CeedOperator
789 
790   @param[in] op            CeedOperator to assemble
791   @param[out] num_entries  Number of entries in assembled representation
792 
793   @return An error code: 0 - success, otherwise - failure
794 
795   @ref Utility
796 **/
797 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op,
798     CeedInt *num_entries) {
799   int ierr;
800   CeedElemRestriction rstr;
801   CeedInt num_elem, elem_size, num_comp;
802 
803   if (op->composite)
804     // LCOV_EXCL_START
805     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
806                      "Composite operator not supported");
807   // LCOV_EXCL_STOP
808   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr);
809   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
810   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
811   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
812   *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
813 
814   return CEED_ERROR_SUCCESS;
815 }
816 
817 /**
818   @brief Common code for creating a multigrid coarse operator and level
819            transfer operators for a CeedOperator
820 
821   @param[in] op_fine       Fine grid operator
822   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
823   @param[in] rstr_coarse   Coarse grid restriction
824   @param[in] basis_coarse  Coarse grid active vector basis
825   @param[in] basis_c_to_f  Basis for coarse to fine interpolation
826   @param[out] op_coarse    Coarse grid operator
827   @param[out] op_prolong   Coarse to fine operator
828   @param[out] op_restrict  Fine to coarse operator
829 
830   @return An error code: 0 - success, otherwise - failure
831 
832   @ref Developer
833 **/
834 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
835     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
836     CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
837     CeedOperator *op_restrict) {
838   int ierr;
839   Ceed ceed;
840   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
841 
842   // Check for composite operator
843   bool is_composite;
844   ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr);
845   if (is_composite)
846     // LCOV_EXCL_START
847     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
848                      "Automatic multigrid setup for composite operators not supported");
849   // LCOV_EXCL_STOP
850 
851   // Coarse Grid
852   ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT,
853                             op_coarse); CeedChk(ierr);
854   CeedElemRestriction rstr_fine = NULL;
855   // -- Clone input fields
856   for (int i = 0; i < op_fine->qf->num_input_fields; i++) {
857     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
858       rstr_fine = op_fine->input_fields[i]->elem_restr;
859       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
860                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
861       CeedChk(ierr);
862     } else {
863       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
864                                   op_fine->input_fields[i]->elem_restr,
865                                   op_fine->input_fields[i]->basis,
866                                   op_fine->input_fields[i]->vec); CeedChk(ierr);
867     }
868   }
869   // -- Clone output fields
870   for (int i = 0; i < op_fine->qf->num_output_fields; i++) {
871     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
872       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
873                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
874       CeedChk(ierr);
875     } else {
876       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
877                                   op_fine->output_fields[i]->elem_restr,
878                                   op_fine->output_fields[i]->basis,
879                                   op_fine->output_fields[i]->vec); CeedChk(ierr);
880     }
881   }
882 
883   // Multiplicity vector
884   CeedVector mult_vec, mult_e_vec;
885   ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec);
886   CeedChk(ierr);
887   ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr);
888   ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine,
889                                   mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
890   ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr);
891   ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec,
892                                   CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
893   ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr);
894   ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr);
895 
896   // Restriction
897   CeedInt num_comp;
898   ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr);
899   CeedQFunction qf_restrict;
900   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict);
901   CeedChk(ierr);
902   CeedInt *num_comp_r_data;
903   ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr);
904   num_comp_r_data[0] = num_comp;
905   CeedQFunctionContext ctx_r;
906   ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr);
907   ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER,
908                                      sizeof(*num_comp_r_data), num_comp_r_data);
909   CeedChk(ierr);
910   ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr);
911   ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr);
912   ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE);
913   CeedChk(ierr);
914   ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE);
915   CeedChk(ierr);
916   ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp,
917                                 CEED_EVAL_INTERP); CeedChk(ierr);
918 
919   ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE,
920                             CEED_QFUNCTION_NONE, op_restrict);
921   CeedChk(ierr);
922   ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine,
923                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
924   CeedChk(ierr);
925   ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine,
926                               CEED_BASIS_COLLOCATED, mult_vec);
927   CeedChk(ierr);
928   ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f,
929                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
930 
931   // Prolongation
932   CeedQFunction qf_prolong;
933   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong);
934   CeedChk(ierr);
935   CeedInt *num_comp_p_data;
936   ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr);
937   num_comp_p_data[0] = num_comp;
938   CeedQFunctionContext ctx_p;
939   ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr);
940   ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER,
941                                      sizeof(*num_comp_p_data), num_comp_p_data);
942   CeedChk(ierr);
943   ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr);
944   ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr);
945   ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP);
946   CeedChk(ierr);
947   ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE);
948   CeedChk(ierr);
949   ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE);
950   CeedChk(ierr);
951 
952   ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE,
953                             CEED_QFUNCTION_NONE, op_prolong);
954   CeedChk(ierr);
955   ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f,
956                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
957   ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine,
958                               CEED_BASIS_COLLOCATED, mult_vec);
959   CeedChk(ierr);
960   ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine,
961                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
962   CeedChk(ierr);
963 
964   // Cleanup
965   ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr);
966   ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr);
967   ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr);
968   ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr);
969   return CEED_ERROR_SUCCESS;
970 }
971 
972 /**
973   @brief Build 1D mass matrix and Laplacian with perturbation
974 
975   @param[in] interp_1d    Interpolation matrix in one dimension
976   @param[in] grad_1d      Gradient matrix in one dimension
977   @param[in] q_weight_1d  Quadrature weights in one dimension
978   @param[in] P_1d         Number of basis nodes in one dimension
979   @param[in] Q_1d         Number of quadrature points in one dimension
980   @param[in] dim          Dimension of basis
981   @param[out] mass        Assembled mass matrix in one dimension
982   @param[out] laplace     Assembled perturbed Laplacian in one dimension
983 
984   @return An error code: 0 - success, otherwise - failure
985 
986   @ref Developer
987 **/
988 CeedPragmaOptimizeOff
989 static int CeedBuildMassLaplace(const CeedScalar *interp_1d,
990                                 const CeedScalar *grad_1d,
991                                 const CeedScalar *q_weight_1d, CeedInt P_1d,
992                                 CeedInt Q_1d, CeedInt dim,
993                                 CeedScalar *mass, CeedScalar *laplace) {
994 
995   for (CeedInt i=0; i<P_1d; i++)
996     for (CeedInt j=0; j<P_1d; j++) {
997       CeedScalar sum = 0.0;
998       for (CeedInt k=0; k<Q_1d; k++)
999         sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j];
1000       mass[i+j*P_1d] = sum;
1001     }
1002   // -- Laplacian
1003   for (CeedInt i=0; i<P_1d; i++)
1004     for (CeedInt j=0; j<P_1d; j++) {
1005       CeedScalar sum = 0.0;
1006       for (CeedInt k=0; k<Q_1d; k++)
1007         sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j];
1008       laplace[i+j*P_1d] = sum;
1009     }
1010   CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4;
1011   for (CeedInt i=0; i<P_1d; i++)
1012     laplace[i+P_1d*i] += perturbation;
1013   return CEED_ERROR_SUCCESS;
1014 }
1015 CeedPragmaOptimizeOn
1016 
1017 /// @}
1018 
1019 /// ----------------------------------------------------------------------------
1020 /// CeedOperator Public API
1021 /// ----------------------------------------------------------------------------
1022 /// @addtogroup CeedOperatorUser
1023 /// @{
1024 
1025 /**
1026   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1027 
1028   This returns a CeedVector containing a matrix at each quadrature point
1029     providing the action of the CeedQFunction associated with the CeedOperator.
1030     The vector 'assembled' is of shape
1031       [num_elements, num_input_fields, num_output_fields, num_quad_points]
1032     and contains column-major matrices representing the action of the
1033     CeedQFunction for a corresponding quadrature point on an element. Inputs and
1034     outputs are in the order provided by the user when adding CeedOperator fields.
1035     For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and
1036     'v', provided in that order, would result in an assembled QFunction that
1037     consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting
1038     on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1039 
1040   @param op              CeedOperator to assemble CeedQFunction
1041   @param[out] assembled  CeedVector to store assembled CeedQFunction at
1042                            quadrature points
1043   @param[out] rstr       CeedElemRestriction for CeedVector containing assembled
1044                            CeedQFunction
1045   @param request         Address of CeedRequest for non-blocking completion, else
1046                            @ref CEED_REQUEST_IMMEDIATE
1047 
1048   @return An error code: 0 - success, otherwise - failure
1049 
1050   @ref User
1051 **/
1052 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled,
1053                                         CeedElemRestriction *rstr,
1054                                         CeedRequest *request) {
1055   int ierr;
1056   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1057 
1058   // Backend version
1059   if (op->LinearAssembleQFunction) {
1060     ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); CeedChk(ierr);
1061     return CEED_ERROR_SUCCESS;
1062   } else {
1063     // Fallback to reference Ceed
1064     if (!op->op_fallback) {
1065       ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1066     }
1067     // Assemble
1068     ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled,
1069            rstr, request); CeedChk(ierr);
1070     return CEED_ERROR_SUCCESS;
1071   }
1072 }
1073 
1074 /**
1075   @brief Assemble the diagonal of a square linear CeedOperator
1076 
1077   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1078 
1079   Note: Currently only non-composite CeedOperators with a single field and
1080           composite CeedOperators with single field sub-operators are supported.
1081 
1082   @param op              CeedOperator to assemble CeedQFunction
1083   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1084   @param request         Address of CeedRequest for non-blocking completion, else
1085                            @ref CEED_REQUEST_IMMEDIATE
1086 
1087   @return An error code: 0 - success, otherwise - failure
1088 
1089   @ref User
1090 **/
1091 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled,
1092                                        CeedRequest *request) {
1093   int ierr;
1094   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1095 
1096   // Use backend version, if available
1097   if (op->LinearAssembleDiagonal) {
1098     ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr);
1099     return CEED_ERROR_SUCCESS;
1100   } else if (op->LinearAssembleAddDiagonal) {
1101     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1102     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1103     return CEED_ERROR_SUCCESS;
1104   } else {
1105     // Check for valid fallback resource
1106     const char *resource, *fallback_resource;
1107     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1108     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1109     CeedChk(ierr);
1110     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1111       // Fallback to reference Ceed
1112       if (!op->op_fallback) {
1113         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1114       }
1115       // Assemble
1116       ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request);
1117       CeedChk(ierr);
1118       return CEED_ERROR_SUCCESS;
1119     }
1120   }
1121 
1122   // Default interface implementation
1123   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1124   ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request);
1125   CeedChk(ierr);
1126   return CEED_ERROR_SUCCESS;
1127 }
1128 
1129 /**
1130   @brief Assemble the diagonal of a square linear CeedOperator
1131 
1132   This sums into a CeedVector the diagonal of a linear CeedOperator.
1133 
1134   Note: Currently only non-composite CeedOperators with a single field and
1135           composite CeedOperators with single field sub-operators are supported.
1136 
1137   @param op              CeedOperator to assemble CeedQFunction
1138   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1139   @param request         Address of CeedRequest for non-blocking completion, else
1140                            @ref CEED_REQUEST_IMMEDIATE
1141 
1142   @return An error code: 0 - success, otherwise - failure
1143 
1144   @ref User
1145 **/
1146 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled,
1147     CeedRequest *request) {
1148   int ierr;
1149   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1150 
1151   // Use backend version, if available
1152   if (op->LinearAssembleAddDiagonal) {
1153     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1154     return CEED_ERROR_SUCCESS;
1155   } else {
1156     // Check for valid fallback resource
1157     const char *resource, *fallback_resource;
1158     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1159     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1160     CeedChk(ierr);
1161     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1162       // Fallback to reference Ceed
1163       if (!op->op_fallback) {
1164         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1165       }
1166       // Assemble
1167       ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled,
1168              request); CeedChk(ierr);
1169       return CEED_ERROR_SUCCESS;
1170     }
1171   }
1172 
1173   // Default interface implementation
1174   bool is_composite;
1175   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1176   if (is_composite) {
1177     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1178            false, assembled); CeedChk(ierr);
1179     return CEED_ERROR_SUCCESS;
1180   } else {
1181     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled);
1182     CeedChk(ierr);
1183     return CEED_ERROR_SUCCESS;
1184   }
1185 }
1186 
1187 /**
1188   @brief Assemble the point block diagonal of a square linear CeedOperator
1189 
1190   This overwrites a CeedVector with the point block diagonal of a linear
1191     CeedOperator.
1192 
1193   Note: Currently only non-composite CeedOperators with a single field and
1194           composite CeedOperators with single field sub-operators are supported.
1195 
1196   @param op              CeedOperator to assemble CeedQFunction
1197   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1198                            diagonal, provided in row-major form with an
1199                            @a num_comp * @a num_comp block at each node. The dimensions
1200                            of this vector are derived from the active vector
1201                            for the CeedOperator. The array has shape
1202                            [nodes, component out, component in].
1203   @param request         Address of CeedRequest for non-blocking completion, else
1204                            CEED_REQUEST_IMMEDIATE
1205 
1206   @return An error code: 0 - success, otherwise - failure
1207 
1208   @ref User
1209 **/
1210 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op,
1211     CeedVector assembled, CeedRequest *request) {
1212   int ierr;
1213   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1214 
1215   // Use backend version, if available
1216   if (op->LinearAssemblePointBlockDiagonal) {
1217     ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request);
1218     CeedChk(ierr);
1219     return CEED_ERROR_SUCCESS;
1220   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1221     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1222     ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled,
1223            request); CeedChk(ierr);
1224     return CEED_ERROR_SUCCESS;
1225   } else {
1226     // Check for valid fallback resource
1227     const char *resource, *fallback_resource;
1228     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1229     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1230     CeedChk(ierr);
1231     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1232       // Fallback to reference Ceed
1233       if (!op->op_fallback) {
1234         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1235       }
1236       // Assemble
1237       ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback,
1238              assembled, request); CeedChk(ierr);
1239       return CEED_ERROR_SUCCESS;
1240     }
1241   }
1242 
1243   // Default interface implementation
1244   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1245   ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request);
1246   CeedChk(ierr);
1247   return CEED_ERROR_SUCCESS;
1248 }
1249 
1250 /**
1251   @brief Assemble the point block diagonal of a square linear CeedOperator
1252 
1253   This sums into a CeedVector with the point block diagonal of a linear
1254     CeedOperator.
1255 
1256   Note: Currently only non-composite CeedOperators with a single field and
1257           composite CeedOperators with single field sub-operators are supported.
1258 
1259   @param op              CeedOperator to assemble CeedQFunction
1260   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1261                            diagonal, provided in row-major form with an
1262                            @a num_comp * @a num_comp block at each node. The dimensions
1263                            of this vector are derived from the active vector
1264                            for the CeedOperator. The array has shape
1265                            [nodes, component out, component in].
1266   @param request         Address of CeedRequest for non-blocking completion, else
1267                            CEED_REQUEST_IMMEDIATE
1268 
1269   @return An error code: 0 - success, otherwise - failure
1270 
1271   @ref User
1272 **/
1273 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op,
1274     CeedVector assembled, CeedRequest *request) {
1275   int ierr;
1276   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1277 
1278   // Use backend version, if available
1279   if (op->LinearAssembleAddPointBlockDiagonal) {
1280     ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request);
1281     CeedChk(ierr);
1282     return CEED_ERROR_SUCCESS;
1283   } else {
1284     // Check for valid fallback resource
1285     const char *resource, *fallback_resource;
1286     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1287     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1288     CeedChk(ierr);
1289     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1290       // Fallback to reference Ceed
1291       if (!op->op_fallback) {
1292         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1293       }
1294       // Assemble
1295       ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback,
1296              assembled, request); CeedChk(ierr);
1297       return CEED_ERROR_SUCCESS;
1298     }
1299   }
1300 
1301   // Default interface implemenation
1302   bool is_composite;
1303   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1304   if (is_composite) {
1305     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1306            true, assembled); CeedChk(ierr);
1307     return CEED_ERROR_SUCCESS;
1308   } else {
1309     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled);
1310     CeedChk(ierr);
1311     return CEED_ERROR_SUCCESS;
1312   }
1313 }
1314 
1315 /**
1316    @brief Fully assemble the nonzero pattern of a linear operator.
1317 
1318    Expected to be used in conjunction with CeedOperatorLinearAssemble()
1319 
1320    The assembly routines use coordinate format, with num_entries tuples of the
1321    form (i, j, value) which indicate that value should be added to the matrix
1322    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1323    This function returns the number of entries and their (i, j) locations,
1324    while CeedOperatorLinearAssemble() provides the values in the same
1325    ordering.
1326 
1327    This will generally be slow unless your operator is low-order.
1328 
1329    @param[in]  op           CeedOperator to assemble
1330    @param[out] num_entries  Number of entries in coordinate nonzero pattern
1331    @param[out] rows         Row number for each entry
1332    @param[out] cols         Column number for each entry
1333 
1334    @ref User
1335 **/
1336 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedInt *num_entries,
1337                                        CeedInt **rows, CeedInt **cols) {
1338   int ierr;
1339   CeedInt num_suboperators, single_entries;
1340   CeedOperator *sub_operators;
1341   bool is_composite;
1342   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1343 
1344   // Use backend version, if available
1345   if (op->LinearAssembleSymbolic) {
1346     ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr);
1347     return CEED_ERROR_SUCCESS;
1348   } else {
1349     // Check for valid fallback resource
1350     const char *resource, *fallback_resource;
1351     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1352     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1353     CeedChk(ierr);
1354     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1355       // Fallback to reference Ceed
1356       if (!op->op_fallback) {
1357         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1358       }
1359       // Assemble
1360       ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows,
1361              cols); CeedChk(ierr);
1362       return CEED_ERROR_SUCCESS;
1363     }
1364   }
1365 
1366   // Default interface implementation
1367 
1368   // count entries and allocate rows, cols arrays
1369   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1370   *num_entries = 0;
1371   if (is_composite) {
1372     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1373     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1374     for (int k = 0; k < num_suboperators; ++k) {
1375       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1376              &single_entries); CeedChk(ierr);
1377       *num_entries += single_entries;
1378     }
1379   } else {
1380     ierr = CeedSingleOperatorAssemblyCountEntries(op,
1381            &single_entries); CeedChk(ierr);
1382     *num_entries += single_entries;
1383   }
1384   ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr);
1385   ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr);
1386 
1387   // assemble nonzero locations
1388   CeedInt offset = 0;
1389   if (is_composite) {
1390     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1391     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1392     for (int k = 0; k < num_suboperators; ++k) {
1393       ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows,
1394              *cols); CeedChk(ierr);
1395       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1396              &single_entries);
1397       CeedChk(ierr);
1398       offset += single_entries;
1399     }
1400   } else {
1401     ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols);
1402     CeedChk(ierr);
1403   }
1404 
1405   return CEED_ERROR_SUCCESS;
1406 }
1407 
1408 /**
1409    @brief Fully assemble the nonzero entries of a linear operator.
1410 
1411    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic()
1412 
1413    The assembly routines use coordinate format, with num_entries tuples of the
1414    form (i, j, value) which indicate that value should be added to the matrix
1415    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1416    This function returns the values of the nonzero entries to be added, their
1417    (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1418 
1419    This will generally be slow unless your operator is low-order.
1420 
1421    @param[in]  op      CeedOperator to assemble
1422    @param[out] values  Values to assemble into matrix
1423 
1424    @ref User
1425 **/
1426 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1427   int ierr;
1428   CeedInt num_suboperators, single_entries = 0;
1429   CeedOperator *sub_operators;
1430   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1431 
1432   // Use backend version, if available
1433   if (op->LinearAssemble) {
1434     ierr = op->LinearAssemble(op, values); CeedChk(ierr);
1435     return CEED_ERROR_SUCCESS;
1436   } else {
1437     // Check for valid fallback resource
1438     const char *resource, *fallback_resource;
1439     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1440     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1441     CeedChk(ierr);
1442     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1443       // Fallback to reference Ceed
1444       if (!op->op_fallback) {
1445         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1446       }
1447       // Assemble
1448       ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr);
1449       return CEED_ERROR_SUCCESS;
1450     }
1451   }
1452 
1453   // Default interface implementation
1454   bool is_composite;
1455   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1456 
1457   CeedInt offset = 0;
1458   if (is_composite) {
1459     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1460     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1461     for (int k = 0; k < num_suboperators; ++k) {
1462       ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values);
1463       CeedChk(ierr);
1464       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1465              &single_entries);
1466       CeedChk(ierr);
1467       offset += single_entries;
1468     }
1469   } else {
1470     ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr);
1471   }
1472 
1473   return CEED_ERROR_SUCCESS;
1474 }
1475 
1476 /**
1477   @brief Create a multigrid coarse operator and level transfer operators
1478            for a CeedOperator, creating the prolongation basis from the
1479            fine and coarse grid interpolation
1480 
1481   @param[in] op_fine       Fine grid operator
1482   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
1483   @param[in] rstr_coarse   Coarse grid restriction
1484   @param[in] basis_coarse  Coarse grid active vector basis
1485   @param[out] op_coarse    Coarse grid operator
1486   @param[out] op_prolong   Coarse to fine operator
1487   @param[out] op_restrict  Fine to coarse operator
1488 
1489   @return An error code: 0 - success, otherwise - failure
1490 
1491   @ref User
1492 **/
1493 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine,
1494                                      CeedVector p_mult_fine,
1495                                      CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1496                                      CeedOperator *op_coarse, CeedOperator *op_prolong,
1497                                      CeedOperator *op_restrict) {
1498   int ierr;
1499   Ceed ceed;
1500   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1501 
1502   // Check for compatible quadrature spaces
1503   CeedBasis basis_fine;
1504   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1505   CeedInt Q_f, Q_c;
1506   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1507   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1508   if (Q_f != Q_c)
1509     // LCOV_EXCL_START
1510     return CeedError(ceed, CEED_ERROR_DIMENSION,
1511                      "Bases must have compatible quadrature spaces");
1512   // LCOV_EXCL_STOP
1513 
1514   // Coarse to fine basis
1515   CeedInt P_f, P_c, Q = Q_f;
1516   bool is_tensor_f, is_tensor_c;
1517   ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr);
1518   ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr);
1519   CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau;
1520   if (is_tensor_f && is_tensor_c) {
1521     ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr);
1522     ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr);
1523     ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr);
1524   } else if (!is_tensor_f && !is_tensor_c) {
1525     ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr);
1526     ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr);
1527   } else {
1528     // LCOV_EXCL_START
1529     return CeedError(ceed, CEED_ERROR_MINOR,
1530                      "Bases must both be tensor or non-tensor");
1531     // LCOV_EXCL_STOP
1532   }
1533 
1534   ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr);
1535   ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr);
1536   ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr);
1537   ierr = CeedMalloc(Q, &tau); CeedChk(ierr);
1538   if (is_tensor_f) {
1539     memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]);
1540     memcpy(interp_c, basis_coarse->interp_1d,
1541            Q*P_c*sizeof basis_coarse->interp_1d[0]);
1542   } else {
1543     memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]);
1544     memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]);
1545   }
1546 
1547   // -- QR Factorization, interp_f = Q R
1548   ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr);
1549 
1550   // -- Apply Qtranspose, interp_c = Qtranspose interp_c
1551   ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE,
1552                                Q, P_c, P_f, P_c, 1); CeedChk(ierr);
1553 
1554   // -- Apply Rinv, interp_c_to_f = Rinv interp_c
1555   for (CeedInt j=0; j<P_c; j++) { // Column j
1556     interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1];
1557     for (CeedInt i=P_f-2; i>=0; i--) { // Row i
1558       interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i];
1559       for (CeedInt k=i+1; k<P_f; k++)
1560         interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k];
1561       interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i];
1562     }
1563   }
1564   ierr = CeedFree(&tau); CeedChk(ierr);
1565   ierr = CeedFree(&interp_c); CeedChk(ierr);
1566   ierr = CeedFree(&interp_f); CeedChk(ierr);
1567 
1568   // Complete with interp_c_to_f versions of code
1569   if (is_tensor_f) {
1570     ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine,
1571            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
1572     CeedChk(ierr);
1573   } else {
1574     ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine,
1575            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
1576     CeedChk(ierr);
1577   }
1578 
1579   // Cleanup
1580   ierr = CeedFree(&interp_c_to_f); CeedChk(ierr);
1581   return CEED_ERROR_SUCCESS;
1582 }
1583 
1584 /**
1585   @brief Create a multigrid coarse operator and level transfer operators
1586            for a CeedOperator with a tensor basis for the active basis
1587 
1588   @param[in] op_fine        Fine grid operator
1589   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1590   @param[in] rstr_coarse    Coarse grid restriction
1591   @param[in] basis_coarse   Coarse grid active vector basis
1592   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1593   @param[out] op_coarse     Coarse grid operator
1594   @param[out] op_prolong    Coarse to fine operator
1595   @param[out] op_restrict   Fine to coarse operator
1596 
1597   @return An error code: 0 - success, otherwise - failure
1598 
1599   @ref User
1600 **/
1601 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine,
1602     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1603     const CeedScalar *interp_c_to_f, CeedOperator *op_coarse,
1604     CeedOperator *op_prolong, CeedOperator *op_restrict) {
1605   int ierr;
1606   Ceed ceed;
1607   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1608 
1609   // Check for compatible quadrature spaces
1610   CeedBasis basis_fine;
1611   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1612   CeedInt Q_f, Q_c;
1613   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1614   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1615   if (Q_f != Q_c)
1616     // LCOV_EXCL_START
1617     return CeedError(ceed, CEED_ERROR_DIMENSION,
1618                      "Bases must have compatible quadrature spaces");
1619   // LCOV_EXCL_STOP
1620 
1621   // Coarse to fine basis
1622   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
1623   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
1624   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
1625   ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr);
1626   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
1627   CeedChk(ierr);
1628   P_1d_c = dim == 1 ? num_nodes_c :
1629            dim == 2 ? sqrt(num_nodes_c) :
1630            cbrt(num_nodes_c);
1631   CeedScalar *q_ref, *q_weight, *grad;
1632   ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr);
1633   ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr);
1634   ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr);
1635   CeedBasis basis_c_to_f;
1636   ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f,
1637                                  interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
1638   CeedChk(ierr);
1639   ierr = CeedFree(&q_ref); CeedChk(ierr);
1640   ierr = CeedFree(&q_weight); CeedChk(ierr);
1641   ierr = CeedFree(&grad); CeedChk(ierr);
1642 
1643   // Core code
1644   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
1645                                           basis_coarse, basis_c_to_f, op_coarse,
1646                                           op_prolong, op_restrict);
1647   CeedChk(ierr);
1648   return CEED_ERROR_SUCCESS;
1649 }
1650 
1651 /**
1652   @brief Create a multigrid coarse operator and level transfer operators
1653            for a CeedOperator with a non-tensor basis for the active vector
1654 
1655   @param[in] op_fine        Fine grid operator
1656   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1657   @param[in] rstr_coarse    Coarse grid restriction
1658   @param[in] basis_coarse   Coarse grid active vector basis
1659   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1660   @param[out] op_coarse     Coarse grid operator
1661   @param[out] op_prolong    Coarse to fine operator
1662   @param[out] op_restrict   Fine to coarse operator
1663 
1664   @return An error code: 0 - success, otherwise - failure
1665 
1666   @ref User
1667 **/
1668 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine,
1669                                        CeedVector p_mult_fine,
1670                                        CeedElemRestriction rstr_coarse,
1671                                        CeedBasis basis_coarse,
1672                                        const CeedScalar *interp_c_to_f,
1673                                        CeedOperator *op_coarse,
1674                                        CeedOperator *op_prolong,
1675                                        CeedOperator *op_restrict) {
1676   int ierr;
1677   Ceed ceed;
1678   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1679 
1680   // Check for compatible quadrature spaces
1681   CeedBasis basis_fine;
1682   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1683   CeedInt Q_f, Q_c;
1684   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1685   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1686   if (Q_f != Q_c)
1687     // LCOV_EXCL_START
1688     return CeedError(ceed, CEED_ERROR_DIMENSION,
1689                      "Bases must have compatible quadrature spaces");
1690   // LCOV_EXCL_STOP
1691 
1692   // Coarse to fine basis
1693   CeedElemTopology topo;
1694   ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr);
1695   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
1696   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
1697   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
1698   ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr);
1699   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
1700   CeedChk(ierr);
1701   CeedScalar *q_ref, *q_weight, *grad;
1702   ierr = CeedCalloc(num_nodes_f, &q_ref); CeedChk(ierr);
1703   ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr);
1704   ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr);
1705   CeedBasis basis_c_to_f;
1706   ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f,
1707                            interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
1708   CeedChk(ierr);
1709   ierr = CeedFree(&q_ref); CeedChk(ierr);
1710   ierr = CeedFree(&q_weight); CeedChk(ierr);
1711   ierr = CeedFree(&grad); CeedChk(ierr);
1712 
1713   // Core code
1714   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
1715                                           basis_coarse, basis_c_to_f, op_coarse,
1716                                           op_prolong, op_restrict);
1717   CeedChk(ierr);
1718   return CEED_ERROR_SUCCESS;
1719 }
1720 
1721 /**
1722   @brief Build a FDM based approximate inverse for each element for a
1723            CeedOperator
1724 
1725   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization
1726     Method based approximate inverse. This function obtains the simultaneous
1727     diagonalization for the 1D mass and Laplacian operators,
1728       M = V^T V, K = V^T S V.
1729     The assembled QFunction is used to modify the eigenvalues from simultaneous
1730     diagonalization and obtain an approximate inverse of the form
1731       V^T S^hat V. The CeedOperator must be linear and non-composite. The
1732     associated CeedQFunction must therefore also be linear.
1733 
1734   @param op            CeedOperator to create element inverses
1735   @param[out] fdm_inv  CeedOperator to apply the action of a FDM based inverse
1736                          for each element
1737   @param request       Address of CeedRequest for non-blocking completion, else
1738                          @ref CEED_REQUEST_IMMEDIATE
1739 
1740   @return An error code: 0 - success, otherwise - failure
1741 
1742   @ref Backend
1743 **/
1744 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv,
1745                                         CeedRequest *request) {
1746   int ierr;
1747   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1748 
1749   // Use backend version, if available
1750   if (op->CreateFDMElementInverse) {
1751     ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr);
1752     return CEED_ERROR_SUCCESS;
1753   } else {
1754     // Check for valid fallback resource
1755     const char *resource, *fallback_resource;
1756     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1757     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1758     CeedChk(ierr);
1759     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1760       // Fallback to reference Ceed
1761       if (!op->op_fallback) {
1762         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1763       }
1764       // Assemble
1765       ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request);
1766       CeedChk(ierr);
1767       return CEED_ERROR_SUCCESS;
1768     }
1769   }
1770 
1771   // Interface implementation
1772   Ceed ceed, ceed_parent;
1773   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
1774   ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr);
1775   ceed_parent = ceed_parent ? ceed_parent : ceed;
1776   CeedQFunction qf;
1777   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
1778 
1779   // Determine active input basis
1780   bool interp = false, grad = false;
1781   CeedBasis basis = NULL;
1782   CeedElemRestriction rstr = NULL;
1783   CeedOperatorField *op_fields;
1784   CeedQFunctionField *qf_fields;
1785   ierr = CeedOperatorGetFields(op, &op_fields, NULL); CeedChk(ierr);
1786   ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL); CeedChk(ierr);
1787   CeedInt num_input_fields;
1788   ierr = CeedQFunctionGetNumArgs(qf, &num_input_fields, NULL); CeedChk(ierr);
1789   for (CeedInt i=0; i<num_input_fields; i++) {
1790     CeedVector vec;
1791     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
1792     if (vec == CEED_VECTOR_ACTIVE) {
1793       CeedEvalMode eval_mode;
1794       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
1795       interp = interp || eval_mode == CEED_EVAL_INTERP;
1796       grad = grad || eval_mode == CEED_EVAL_GRAD;
1797       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr);
1798       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
1799     }
1800   }
1801   if (!basis)
1802     // LCOV_EXCL_START
1803     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
1804   // LCOV_EXCL_STOP
1805   CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1,
1806                                                 l_size = 1;
1807   ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr);
1808   ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr);
1809   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr);
1810   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
1811   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
1812   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
1813   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
1814   ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr);
1815 
1816   // Build and diagonalize 1D Mass and Laplacian
1817   bool tensor_basis;
1818   ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr);
1819   if (!tensor_basis)
1820     // LCOV_EXCL_START
1821     return CeedError(ceed, CEED_ERROR_BACKEND,
1822                      "FDMElementInverse only supported for tensor "
1823                      "bases");
1824   // LCOV_EXCL_STOP
1825   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
1826   ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr);
1827   ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr);
1828   ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr);
1829   ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr);
1830   ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr);
1831   // -- Build matrices
1832   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
1833   ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr);
1834   ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr);
1835   ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr);
1836   ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim,
1837                               mass, laplace); CeedChk(ierr);
1838 
1839   // -- Diagonalize
1840   ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d);
1841   CeedChk(ierr);
1842   ierr = CeedFree(&mass); CeedChk(ierr);
1843   ierr = CeedFree(&laplace); CeedChk(ierr);
1844   for (CeedInt i=0; i<P_1d; i++)
1845     for (CeedInt j=0; j<P_1d; j++)
1846       fdm_interp[i+j*P_1d] = x[j+i*P_1d];
1847   ierr = CeedFree(&x); CeedChk(ierr);
1848 
1849   // Assemble QFunction
1850   CeedVector assembled;
1851   CeedElemRestriction rstr_qf;
1852   ierr =  CeedOperatorLinearAssembleQFunction(op, &assembled, &rstr_qf, request);
1853   CeedChk(ierr);
1854   CeedInt layout[3];
1855   ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr);
1856   ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr);
1857   CeedScalar max_norm = 0;
1858   ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
1859 
1860   // Calculate element averages
1861   CeedInt num_modes = (interp?1:0) + (grad?dim:0);
1862   CeedScalar *elem_avg;
1863   const CeedScalar *assembled_array, *q_weight_array;
1864   CeedVector q_weight;
1865   ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr);
1866   ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
1867                         CEED_VECTOR_NONE, q_weight); CeedChk(ierr);
1868   ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array);
1869   CeedChk(ierr);
1870   ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array);
1871   CeedChk(ierr);
1872   ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr);
1873   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
1874   for (CeedInt e=0; e<num_elem; e++) {
1875     CeedInt count = 0;
1876     for (CeedInt q=0; q<num_qpts; q++)
1877       for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++)
1878         if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) >
1879             qf_value_bound) {
1880           elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] /
1881                          q_weight_array[q];
1882           count++;
1883         }
1884     if (count) {
1885       elem_avg[e] /= count;
1886     } else {
1887       elem_avg[e] = 1.0;
1888     }
1889   }
1890   ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr);
1891   ierr = CeedVectorDestroy(&assembled); CeedChk(ierr);
1892   ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr);
1893   ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr);
1894 
1895   // Build FDM diagonal
1896   CeedVector q_data;
1897   CeedScalar *q_data_array, *fdm_diagonal;
1898   ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr);
1899   const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON;
1900   for (CeedInt c=0; c<num_comp; c++)
1901     for (CeedInt n=0; n<elem_size; n++) {
1902       if (interp)
1903         fdm_diagonal[c*elem_size + n] = 1.0;
1904       if (grad)
1905         for (CeedInt d=0; d<dim; d++) {
1906           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
1907           fdm_diagonal[c*elem_size + n] += lambda[i];
1908         }
1909       if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound)
1910         fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound;
1911     }
1912   ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data);
1913   CeedChk(ierr);
1914   ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr);
1915   ierr = CeedVectorGetArray(q_data, CEED_MEM_HOST, &q_data_array); CeedChk(ierr);
1916   for (CeedInt e=0; e<num_elem; e++)
1917     for (CeedInt c=0; c<num_comp; c++)
1918       for (CeedInt n=0; n<elem_size; n++)
1919         q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] *
1920             fdm_diagonal[c*elem_size + n]);
1921   ierr = CeedFree(&elem_avg); CeedChk(ierr);
1922   ierr = CeedFree(&fdm_diagonal); CeedChk(ierr);
1923   ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr);
1924 
1925   // Setup FDM operator
1926   // -- Basis
1927   CeedBasis fdm_basis;
1928   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
1929   ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr);
1930   ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr);
1931   ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr);
1932   ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d,
1933                                  fdm_interp, grad_dummy, q_ref_dummy,
1934                                  q_weight_dummy, &fdm_basis); CeedChk(ierr);
1935   ierr = CeedFree(&fdm_interp); CeedChk(ierr);
1936   ierr = CeedFree(&grad_dummy); CeedChk(ierr);
1937   ierr = CeedFree(&q_ref_dummy); CeedChk(ierr);
1938   ierr = CeedFree(&q_weight_dummy); CeedChk(ierr);
1939   ierr = CeedFree(&lambda); CeedChk(ierr);
1940 
1941   // -- Restriction
1942   CeedElemRestriction rstr_qd_i;
1943   CeedInt strides[3] = {1, elem_size, elem_size*num_comp};
1944   ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size,
1945                                           num_comp, num_elem*num_comp*elem_size,
1946                                           strides, &rstr_qd_i); CeedChk(ierr);
1947   // -- QFunction
1948   CeedQFunction qf_fdm;
1949   ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm);
1950   CeedChk(ierr);
1951   ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP);
1952   CeedChk(ierr);
1953   ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE);
1954   CeedChk(ierr);
1955   ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP);
1956   CeedChk(ierr);
1957   // -- QFunction context
1958   CeedInt *num_comp_data;
1959   ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr);
1960   num_comp_data[0] = num_comp;
1961   CeedQFunctionContext ctx_fdm;
1962   ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr);
1963   ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER,
1964                                      sizeof(*num_comp_data), num_comp_data);
1965   CeedChk(ierr);
1966   ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr);
1967   ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr);
1968   // -- Operator
1969   ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv);
1970   CeedChk(ierr);
1971   CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
1972   CeedChk(ierr);
1973   CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED,
1974                        q_data); CeedChk(ierr);
1975   CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
1976   CeedChk(ierr);
1977 
1978   // Cleanup
1979   ierr = CeedVectorDestroy(&q_data); CeedChk(ierr);
1980   ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr);
1981   ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr);
1982   ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr);
1983 
1984   return CEED_ERROR_SUCCESS;
1985 }
1986 
1987 /// @}
1988