xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision 428b7a1209da14c7168a290c625ec4fa7d71aa51)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed/ceed.h>
9 #include <ceed/backend.h>
10 #include <ceed-impl.h>
11 #include <math.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 /// @file
17 /// Implementation of CeedOperator preconditioning interfaces
18 
19 /// ----------------------------------------------------------------------------
20 /// CeedOperator Library Internal Preconditioning Functions
21 /// ----------------------------------------------------------------------------
22 /// @addtogroup CeedOperatorDeveloper
23 /// @{
24 
25 /**
26   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced
27          CeedOperator functionality
28 
29   @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in] qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback  fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf,
38                                        CeedQFunction *qf_fallback) {
39   int ierr;
40 
41   // Check if NULL qf passed in
42   if (!qf) return CEED_ERROR_SUCCESS;
43 
44   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
45   CeedDebug256(qf->ceed, 255, "Creating fallback CeedQFunction\n");
46 
47   char *source_path_with_name = "";
48   if (qf->source_path) {
49     size_t path_len = strlen(qf->source_path),
50            name_len = strlen(qf->kernel_name);
51     ierr = CeedCalloc(path_len + name_len + 2, &source_path_with_name);
52     CeedChk(ierr);
53     memcpy(source_path_with_name, qf->source_path, path_len);
54     memcpy(&source_path_with_name[path_len], ":", 1);
55     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
56   } else {
57     ierr = CeedCalloc(1, &source_path_with_name); CeedChk(ierr);
58   }
59 
60   ierr = CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length,
61                                      qf->function, source_path_with_name,
62                                      qf_fallback); CeedChk(ierr);
63   {
64     CeedQFunctionContext ctx;
65 
66     ierr = CeedQFunctionGetContext(qf, &ctx); CeedChk(ierr);
67     ierr = CeedQFunctionSetContext(*qf_fallback, ctx); CeedChk(ierr);
68   }
69   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
70     ierr = CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name,
71                                  qf->input_fields[i]->size,
72                                  qf->input_fields[i]->eval_mode); CeedChk(ierr);
73   }
74   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
75     ierr = CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name,
76                                   qf->output_fields[i]->size,
77                                   qf->output_fields[i]->eval_mode); CeedChk(ierr);
78   }
79   ierr = CeedFree(&source_path_with_name); CeedChk(ierr);
80 
81   return CEED_ERROR_SUCCESS;
82 }
83 
84 /**
85   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced
86          CeedOperator functionality
87 
88   @param op  CeedOperator to create fallback for
89 
90   @return An error code: 0 - success, otherwise - failure
91 
92   @ref Developer
93 **/
94 static int CeedOperatorCreateFallback(CeedOperator op) {
95   int ierr;
96   Ceed ceed_fallback;
97 
98   // Check not already created
99   if (op->op_fallback) return CEED_ERROR_SUCCESS;
100 
101   // Fallback Ceed
102   ierr = CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback); CeedChk(ierr);
103   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
104 
105   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
106   CeedDebug256(op->ceed, 255, "Creating fallback CeedOperator\n");
107 
108   // Clone Op
109   CeedOperator op_fallback;
110   if (op->is_composite) {
111     ierr = CeedCompositeOperatorCreate(ceed_fallback, &op_fallback);
112     CeedChk(ierr);
113     for (CeedInt i = 0; i < op->num_suboperators; i++) {
114       CeedOperator op_sub_fallback;
115 
116       ierr = CeedOperatorGetFallback(op->sub_operators[i], &op_sub_fallback);
117       CeedChk(ierr);
118       ierr = CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback); CeedChk(ierr);
119     }
120   } else {
121     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
122     ierr = CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback);
123     CeedChk(ierr);
124     ierr = CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback);
125     CeedChk(ierr);
126     ierr = CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback);
127     CeedChk(ierr);
128     ierr = CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback,
129                               dqfT_fallback, &op_fallback); CeedChk(ierr);
130     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
131       ierr = CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name,
132                                   op->input_fields[i]->elem_restr,
133                                   op->input_fields[i]->basis,
134                                   op->input_fields[i]->vec); CeedChk(ierr);
135     }
136     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
137       ierr = CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name,
138                                   op->output_fields[i]->elem_restr,
139                                   op->output_fields[i]->basis,
140                                   op->output_fields[i]->vec); CeedChk(ierr);
141     }
142     ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled,
143            &op_fallback->qf_assembled); CeedChk(ierr);
144     if (op_fallback->num_qpts == 0) {
145       ierr = CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts);
146       CeedChk(ierr);
147     }
148     // Cleanup
149     ierr = CeedQFunctionDestroy(&qf_fallback); CeedChk(ierr);
150     ierr = CeedQFunctionDestroy(&dqf_fallback); CeedChk(ierr);
151     ierr = CeedQFunctionDestroy(&dqfT_fallback); CeedChk(ierr);
152   }
153   ierr = CeedOperatorSetName(op_fallback, op->name); CeedChk(ierr);
154   ierr = CeedOperatorCheckReady(op_fallback); CeedChk(ierr);
155   op->op_fallback = op_fallback;
156 
157   return CEED_ERROR_SUCCESS;
158 }
159 
160 /**
161   @brief Retreive fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
162 
163   @param[in] op            CeedOperator to retrieve fallback for
164   @param[out] op_fallback  Fallback CeedOperator
165 
166   @return An error code: 0 - success, otherwise - failure
167 
168   @ref Developer
169 **/
170 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
171   int ierr;
172 
173   // Create if needed
174   if (!op->op_fallback) {
175     ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
176   }
177   if (op->op_fallback) {
178     bool is_debug;
179 
180     ierr = CeedIsDebug(op->ceed, &is_debug); CeedChk(ierr);
181     if (is_debug) {
182       Ceed ceed_fallback;
183       const char *resource, *resource_fallback;
184 
185       ierr = CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback); CeedChk(ierr);
186       ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
187       ierr = CeedGetResource(ceed_fallback, &resource_fallback); CeedChk(ierr);
188 
189       CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
190       CeedDebug256(op->ceed, 255,
191                    "Falling back from %s operator at address %ld to %s operator at address %ld\n",
192                    resource, op, resource_fallback, op->op_fallback);
193     }
194   }
195   *op_fallback = op->op_fallback;
196 
197   return CEED_ERROR_SUCCESS;
198 }
199 
200 /**
201   @brief Select correct basis matrix pointer based on CeedEvalMode
202 
203   @param[in] eval_mode   Current basis evaluation mode
204   @param[in] identity    Pointer to identity matrix
205   @param[in] interp      Pointer to interpolation matrix
206   @param[in] grad        Pointer to gradient matrix
207   @param[out] basis_ptr  Basis pointer to set
208 
209   @ref Developer
210 **/
211 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode,
212     const CeedScalar *identity, const CeedScalar *interp,
213     const CeedScalar *grad, const CeedScalar **basis_ptr) {
214   switch (eval_mode) {
215   case CEED_EVAL_NONE:
216     *basis_ptr = identity;
217     break;
218   case CEED_EVAL_INTERP:
219     *basis_ptr = interp;
220     break;
221   case CEED_EVAL_GRAD:
222     *basis_ptr = grad;
223     break;
224   case CEED_EVAL_WEIGHT:
225   case CEED_EVAL_DIV:
226   case CEED_EVAL_CURL:
227     break; // Caught by QF Assembly
228   }
229 }
230 
231 /**
232   @brief Create point block restriction for active operator field
233 
234   @param[in] rstr              Original CeedElemRestriction for active field
235   @param[out] pointblock_rstr  Address of the variable where the newly created
236                                  CeedElemRestriction will be stored
237 
238   @return An error code: 0 - success, otherwise - failure
239 
240   @ref Developer
241 **/
242 static int CeedOperatorCreateActivePointBlockRestriction(
243   CeedElemRestriction rstr,
244   CeedElemRestriction *pointblock_rstr) {
245   int ierr;
246   Ceed ceed;
247   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr);
248   const CeedInt *offsets;
249   ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets);
250   CeedChk(ierr);
251 
252   // Expand offsets
253   CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1,
254                                                       *pointblock_offsets;
255   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
256   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
257   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
258   ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr);
259   CeedInt shift = num_comp;
260   if (comp_stride != 1)
261     shift *= num_comp;
262   ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets);
263   CeedChk(ierr);
264   for (CeedInt i = 0; i < num_elem*elem_size; i++) {
265     pointblock_offsets[i] = offsets[i]*shift;
266     if (pointblock_offsets[i] > max)
267       max = pointblock_offsets[i];
268   }
269 
270   // Create new restriction
271   ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp,
272                                    1, max + num_comp*num_comp, CEED_MEM_HOST,
273                                    CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr);
274   CeedChk(ierr);
275 
276   // Cleanup
277   ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr);
278 
279   return CEED_ERROR_SUCCESS;
280 }
281 
282 /**
283   @brief Core logic for assembling operator diagonal or point block diagonal
284 
285   @param[in] op             CeedOperator to assemble point block diagonal
286   @param[in] request        Address of CeedRequest for non-blocking completion, else
287                               CEED_REQUEST_IMMEDIATE
288   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
289   @param[out] assembled     CeedVector to store assembled diagonal
290 
291   @return An error code: 0 - success, otherwise - failure
292 
293   @ref Developer
294 **/
295 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op,
296     CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
297   int ierr;
298   Ceed ceed;
299   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
300 
301   // Assemble QFunction
302   CeedQFunction qf;
303   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
304   CeedInt num_input_fields, num_output_fields;
305   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
306   CeedChk(ierr);
307   CeedVector assembled_qf;
308   CeedElemRestriction rstr;
309   ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op,  &assembled_qf,
310          &rstr, request); CeedChk(ierr);
311   CeedInt layout[3];
312   ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr);
313   ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr);
314   CeedScalar max_norm = 0;
315   ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
316 
317   // Determine active input basis
318   CeedOperatorField *op_fields;
319   CeedQFunctionField *qf_fields;
320   ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr);
321   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
322   CeedInt num_eval_mode_in = 0, num_comp, dim = 1;
323   CeedEvalMode *eval_mode_in = NULL;
324   CeedBasis basis_in = NULL;
325   CeedElemRestriction rstr_in = NULL;
326   for (CeedInt i=0; i<num_input_fields; i++) {
327     CeedVector vec;
328     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
329     if (vec == CEED_VECTOR_ACTIVE) {
330       CeedElemRestriction rstr;
331       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr);
332       ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr);
333       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
334       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
335       if (rstr_in && rstr_in != rstr)
336         // LCOV_EXCL_START
337         return CeedError(ceed, CEED_ERROR_BACKEND,
338                          "Multi-field non-composite operator diagonal assembly not supported");
339       // LCOV_EXCL_STOP
340       rstr_in = rstr;
341       CeedEvalMode eval_mode;
342       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
343       switch (eval_mode) {
344       case CEED_EVAL_NONE:
345       case CEED_EVAL_INTERP:
346         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
347         eval_mode_in[num_eval_mode_in] = eval_mode;
348         num_eval_mode_in += 1;
349         break;
350       case CEED_EVAL_GRAD:
351         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
352         for (CeedInt d=0; d<dim; d++)
353           eval_mode_in[num_eval_mode_in+d] = eval_mode;
354         num_eval_mode_in += dim;
355         break;
356       case CEED_EVAL_WEIGHT:
357       case CEED_EVAL_DIV:
358       case CEED_EVAL_CURL:
359         break; // Caught by QF Assembly
360       }
361     }
362   }
363 
364   // Determine active output basis
365   ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr);
366   ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr);
367   CeedInt num_eval_mode_out = 0;
368   CeedEvalMode *eval_mode_out = NULL;
369   CeedBasis basis_out = NULL;
370   CeedElemRestriction rstr_out = NULL;
371   for (CeedInt i=0; i<num_output_fields; i++) {
372     CeedVector vec;
373     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
374     if (vec == CEED_VECTOR_ACTIVE) {
375       CeedElemRestriction rstr;
376       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr);
377       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
378       if (rstr_out && rstr_out != rstr)
379         // LCOV_EXCL_START
380         return CeedError(ceed, CEED_ERROR_BACKEND,
381                          "Multi-field non-composite operator diagonal assembly not supported");
382       // LCOV_EXCL_STOP
383       rstr_out = rstr;
384       CeedEvalMode eval_mode;
385       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
386       switch (eval_mode) {
387       case CEED_EVAL_NONE:
388       case CEED_EVAL_INTERP:
389         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
390         eval_mode_out[num_eval_mode_out] = eval_mode;
391         num_eval_mode_out += 1;
392         break;
393       case CEED_EVAL_GRAD:
394         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
395         for (CeedInt d=0; d<dim; d++)
396           eval_mode_out[num_eval_mode_out+d] = eval_mode;
397         num_eval_mode_out += dim;
398         break;
399       case CEED_EVAL_WEIGHT:
400       case CEED_EVAL_DIV:
401       case CEED_EVAL_CURL:
402         break; // Caught by QF Assembly
403       }
404     }
405   }
406 
407   // Assemble point block diagonal restriction, if needed
408   CeedElemRestriction diag_rstr = rstr_out;
409   if (is_pointblock) {
410     ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr);
411     CeedChk(ierr);
412   }
413 
414   // Create diagonal vector
415   CeedVector elem_diag;
416   ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag);
417   CeedChk(ierr);
418 
419   // Assemble element operator diagonals
420   CeedScalar *elem_diag_array;
421   const CeedScalar *assembled_qf_array;
422   ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr);
423   ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array);
424   CeedChk(ierr);
425   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
426   CeedChk(ierr);
427   CeedInt num_elem, num_nodes, num_qpts;
428   ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr);
429   ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr);
430   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
431   // Basis matrices
432   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
433   CeedScalar *identity = NULL;
434   bool evalNone = false;
435   for (CeedInt i=0; i<num_eval_mode_in; i++)
436     evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE);
437   for (CeedInt i=0; i<num_eval_mode_out; i++)
438     evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE);
439   if (evalNone) {
440     ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr);
441     for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++)
442       identity[i*num_nodes+i] = 1.0;
443   }
444   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
445   ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr);
446   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
447   ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr);
448   // Compute the diagonal of B^T D B
449   // Each element
450   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
451   for (CeedInt e=0; e<num_elem; e++) {
452     CeedInt d_out = -1;
453     // Each basis eval mode pair
454     for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) {
455       const CeedScalar *bt = NULL;
456       if (eval_mode_out[e_out] == CEED_EVAL_GRAD)
457         d_out += 1;
458       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out,
459                                   &grad_out[d_out*num_qpts*num_nodes], &bt);
460       CeedInt d_in = -1;
461       for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) {
462         const CeedScalar *b = NULL;
463         if (eval_mode_in[e_in] == CEED_EVAL_GRAD)
464           d_in += 1;
465         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in,
466                                     &grad_in[d_in*num_qpts*num_nodes], &b);
467         // Each component
468         for (CeedInt c_out=0; c_out<num_comp; c_out++)
469           // Each qpoint/node pair
470           for (CeedInt q=0; q<num_qpts; q++)
471             if (is_pointblock) {
472               // Point Block Diagonal
473               for (CeedInt c_in=0; c_in<num_comp; c_in++) {
474                 const CeedScalar qf_value =
475                   assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)*
476                                                      num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
477                 if (fabs(qf_value) > qf_value_bound)
478                   for (CeedInt n=0; n<num_nodes; n++)
479                     elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] +=
480                       bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
481               }
482             } else {
483               // Diagonal Only
484               const CeedScalar qf_value =
485                 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)*
486                                                    num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
487               if (fabs(qf_value) > qf_value_bound)
488                 for (CeedInt n=0; n<num_nodes; n++)
489                   elem_diag_array[(e*num_comp+c_out)*num_nodes+n] +=
490                     bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
491             }
492       }
493     }
494   }
495   ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr);
496   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
497   CeedChk(ierr);
498 
499   // Assemble local operator diagonal
500   ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag,
501                                   assembled, request); CeedChk(ierr);
502 
503   // Cleanup
504   if (is_pointblock) {
505     ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr);
506   }
507   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
508   ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr);
509   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
510   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
511   ierr = CeedFree(&identity); CeedChk(ierr);
512 
513   return CEED_ERROR_SUCCESS;
514 }
515 
516 /**
517   @brief Core logic for assembling composite operator diagonal
518 
519   @param[in] op             CeedOperator to assemble point block diagonal
520   @param[in] request        Address of CeedRequest for non-blocking completion, else
521                             CEED_REQUEST_IMMEDIATE
522   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
523   @param[out] assembled     CeedVector to store assembled diagonal
524 
525   @return An error code: 0 - success, otherwise - failure
526 
527   @ref Developer
528 **/
529 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(
530   CeedOperator op, CeedRequest *request, const bool is_pointblock,
531   CeedVector assembled) {
532   int ierr;
533   CeedInt num_sub;
534   CeedOperator *suboperators;
535   ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr);
536   ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr);
537   for (CeedInt i = 0; i < num_sub; i++) {
538     if (is_pointblock) {
539       ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i],
540              assembled, request); CeedChk(ierr);
541     } else {
542       ierr = CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled,
543              request); CeedChk(ierr);
544     }
545   }
546   return CEED_ERROR_SUCCESS;
547 }
548 
549 /**
550   @brief Build nonzero pattern for non-composite operator
551 
552   Users should generally use CeedOperatorLinearAssembleSymbolic()
553 
554   @param[in] op      CeedOperator to assemble nonzero pattern
555   @param[in] offset  Offset for number of entries
556   @param[out] rows   Row number for each entry
557   @param[out] cols   Column number for each entry
558 
559   @return An error code: 0 - success, otherwise - failure
560 
561   @ref Developer
562 **/
563 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset,
564     CeedInt *rows, CeedInt *cols) {
565   int ierr;
566   Ceed ceed = op->ceed;
567   if (op->is_composite)
568     // LCOV_EXCL_START
569     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
570                      "Composite operator not supported");
571   // LCOV_EXCL_STOP
572 
573   CeedSize num_nodes;
574   ierr = CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL); CeedChk(ierr);
575   CeedElemRestriction rstr_in;
576   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr);
577   CeedInt num_elem, elem_size, num_comp;
578   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
579   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
580   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
581   CeedInt layout_er[3];
582   ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr);
583 
584   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
585 
586   // Determine elem_dof relation
587   CeedVector index_vec;
588   ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr);
589   CeedScalar *array;
590   ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr);
591   for (CeedInt i = 0; i < num_nodes; ++i) {
592     array[i] = i;
593   }
594   ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr);
595   CeedVector elem_dof;
596   ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof);
597   CeedChk(ierr);
598   ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr);
599   CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec,
600                            elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
601   const CeedScalar *elem_dof_a;
602   ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a);
603   CeedChk(ierr);
604   ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr);
605 
606   // Determine i, j locations for element matrices
607   CeedInt count = 0;
608   for (CeedInt e = 0; e < num_elem; ++e) {
609     for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) {
610       for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) {
611         for (CeedInt i = 0; i < elem_size; ++i) {
612           for (CeedInt j = 0; j < elem_size; ++j) {
613             const CeedInt elem_dof_index_row = (i)*layout_er[0] +
614                                                (comp_out)*layout_er[1] + e*layout_er[2];
615             const CeedInt elem_dof_index_col = (j)*layout_er[0] +
616                                                (comp_in)*layout_er[1] + e*layout_er[2];
617 
618             const CeedInt row = elem_dof_a[elem_dof_index_row];
619             const CeedInt col = elem_dof_a[elem_dof_index_col];
620 
621             rows[offset + count] = row;
622             cols[offset + count] = col;
623             count++;
624           }
625         }
626       }
627     }
628   }
629   if (count != local_num_entries)
630     // LCOV_EXCL_START
631     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
632   // LCOV_EXCL_STOP
633   ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr);
634   ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr);
635 
636   return CEED_ERROR_SUCCESS;
637 }
638 
639 /**
640   @brief Assemble nonzero entries for non-composite operator
641 
642   Users should generally use CeedOperatorLinearAssemble()
643 
644   @param[in] op       CeedOperator to assemble
645   @param[out] offset  Offest for number of entries
646   @param[out] values  Values to assemble into matrix
647 
648   @return An error code: 0 - success, otherwise - failure
649 
650   @ref Developer
651 **/
652 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
653                                       CeedVector values) {
654   int ierr;
655   Ceed ceed = op->ceed;
656   if (op->is_composite)
657     // LCOV_EXCL_START
658     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
659                      "Composite operator not supported");
660   // LCOV_EXCL_STOP
661 
662   // Assemble QFunction
663   CeedQFunction qf;
664   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
665   CeedVector assembled_qf;
666   CeedElemRestriction rstr_q;
667   ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(
668            op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
669 
670   CeedSize qf_length;
671   ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr);
672 
673   CeedInt num_input_fields, num_output_fields;
674   CeedOperatorField *input_fields;
675   CeedOperatorField *output_fields;
676   ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields,
677                                &num_output_fields, &output_fields); CeedChk(ierr);
678 
679   // Determine active input basis
680   CeedQFunctionField *qf_fields;
681   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
682   CeedInt num_eval_mode_in = 0, dim = 1;
683   CeedEvalMode *eval_mode_in = NULL;
684   CeedBasis basis_in = NULL;
685   CeedElemRestriction rstr_in = NULL;
686   for (CeedInt i=0; i<num_input_fields; i++) {
687     CeedVector vec;
688     ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr);
689     if (vec == CEED_VECTOR_ACTIVE) {
690       ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in);
691       CeedChk(ierr);
692       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
693       ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in);
694       CeedChk(ierr);
695       CeedEvalMode eval_mode;
696       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
697       CeedChk(ierr);
698       switch (eval_mode) {
699       case CEED_EVAL_NONE:
700       case CEED_EVAL_INTERP:
701         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
702         eval_mode_in[num_eval_mode_in] = eval_mode;
703         num_eval_mode_in += 1;
704         break;
705       case CEED_EVAL_GRAD:
706         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
707         for (CeedInt d=0; d<dim; d++) {
708           eval_mode_in[num_eval_mode_in+d] = eval_mode;
709         }
710         num_eval_mode_in += dim;
711         break;
712       case CEED_EVAL_WEIGHT:
713       case CEED_EVAL_DIV:
714       case CEED_EVAL_CURL:
715         break; // Caught by QF Assembly
716       }
717     }
718   }
719 
720   // Determine active output basis
721   ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr);
722   CeedInt num_eval_mode_out = 0;
723   CeedEvalMode *eval_mode_out = NULL;
724   CeedBasis basis_out = NULL;
725   CeedElemRestriction rstr_out = NULL;
726   for (CeedInt i=0; i<num_output_fields; i++) {
727     CeedVector vec;
728     ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr);
729     if (vec == CEED_VECTOR_ACTIVE) {
730       ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out);
731       CeedChk(ierr);
732       ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out);
733       CeedChk(ierr);
734       CeedEvalMode eval_mode;
735       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
736       CeedChk(ierr);
737       switch (eval_mode) {
738       case CEED_EVAL_NONE:
739       case CEED_EVAL_INTERP:
740         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
741         eval_mode_out[num_eval_mode_out] = eval_mode;
742         num_eval_mode_out += 1;
743         break;
744       case CEED_EVAL_GRAD:
745         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
746         for (CeedInt d=0; d<dim; d++) {
747           eval_mode_out[num_eval_mode_out+d] = eval_mode;
748         }
749         num_eval_mode_out += dim;
750         break;
751       case CEED_EVAL_WEIGHT:
752       case CEED_EVAL_DIV:
753       case CEED_EVAL_CURL:
754         break; // Caught by QF Assembly
755       }
756     }
757   }
758 
759   if (num_eval_mode_in == 0 || num_eval_mode_out == 0)
760     // LCOV_EXCL_START
761     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
762                      "Cannot assemble operator with out inputs/outputs");
763   // LCOV_EXCL_STOP
764 
765   CeedInt num_elem, elem_size, num_qpts, num_comp;
766   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
767   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
768   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
769   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
770 
771   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
772 
773   // loop over elements and put in data structure
774   const CeedScalar *interp_in, *grad_in;
775   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
776   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
777 
778   const CeedScalar *assembled_qf_array;
779   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
780   CeedChk(ierr);
781 
782   CeedInt layout_qf[3];
783   ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr);
784   ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr);
785 
786   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
787   CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size];
788   CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size];
789   CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in *
790                                      num_qpts]; // logically 3-tensor
791   CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in];
792   CeedScalar elem_mat[elem_size * elem_size];
793   CeedInt count = 0;
794   CeedScalar *vals;
795   ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr);
796   for (CeedInt e = 0; e < num_elem; ++e) {
797     for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) {
798       for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) {
799         for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) {
800           B_mat_in[ell] = 0.0;
801         }
802         for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) {
803           B_mat_out[ell] = 0.0;
804         }
805         // Store block-diagonal D matrix as collection of small dense blocks
806         for (CeedInt ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts;
807              ++ell) {
808           D_mat[ell] = 0.0;
809         }
810         // form element matrix itself (for each block component)
811         for (CeedInt ell = 0; ell < elem_size*elem_size; ++ell) {
812           elem_mat[ell] = 0.0;
813         }
814         for (CeedInt q = 0; q < num_qpts; ++q) {
815           for (CeedInt n = 0; n < elem_size; ++n) {
816             CeedInt d_in = -1;
817             for (CeedInt e_in = 0; e_in < num_eval_mode_in; ++e_in) {
818               const CeedInt qq = num_eval_mode_in*q;
819               if (eval_mode_in[e_in] == CEED_EVAL_INTERP) {
820                 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n];
821               } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) {
822                 d_in += 1;
823                 B_mat_in[(qq+e_in)*elem_size + n] +=
824                   grad_in[(d_in*num_qpts+q) * elem_size + n];
825               } else {
826                 // LCOV_EXCL_START
827                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
828                 // LCOV_EXCL_STOP
829               }
830             }
831             CeedInt d_out = -1;
832             for (CeedInt e_out = 0; e_out < num_eval_mode_out; ++e_out) {
833               const CeedInt qq = num_eval_mode_out*q;
834               if (eval_mode_out[e_out] == CEED_EVAL_INTERP) {
835                 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n];
836               } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) {
837                 d_out += 1;
838                 B_mat_out[(qq+e_out)*elem_size + n] +=
839                   grad_in[(d_out*num_qpts+q) * elem_size + n];
840               } else {
841                 // LCOV_EXCL_START
842                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
843                 // LCOV_EXCL_STOP
844               }
845             }
846           }
847           for (CeedInt ei = 0; ei < num_eval_mode_out; ++ei) {
848             for (CeedInt ej = 0; ej < num_eval_mode_in; ++ej) {
849               const CeedInt eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)
850                                               *num_comp
851                                               +comp_out;
852               const CeedInt index = q*layout_qf[0] + eval_mode_index*layout_qf[1] +
853                                     e*layout_qf[2];
854               D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index];
855             }
856           }
857         }
858         // Compute B^T*D
859         for (CeedInt ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) {
860           BTD[ell] = 0.0;
861         }
862         for (CeedInt j = 0; j<elem_size; ++j) {
863           for (CeedInt q = 0; q<num_qpts; ++q) {
864             const CeedInt qq = num_eval_mode_out*q;
865             for (CeedInt ei = 0; ei < num_eval_mode_in; ++ei) {
866               for (CeedInt ej = 0; ej < num_eval_mode_out; ++ej) {
867                 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] +=
868                   B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q];
869               }
870             }
871           }
872         }
873 
874         ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size,
875                                   elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr);
876 
877         // put element matrix in coordinate data structure
878         for (CeedInt i = 0; i < elem_size; ++i) {
879           for (CeedInt j = 0; j < elem_size; ++j) {
880             vals[offset + count] = elem_mat[i*elem_size + j];
881             count++;
882           }
883         }
884       }
885     }
886   }
887   if (count != local_num_entries)
888     // LCOV_EXCL_START
889     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
890   // LCOV_EXCL_STOP
891   ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr);
892 
893   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
894   CeedChk(ierr);
895   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
896   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
897   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
898 
899   return CEED_ERROR_SUCCESS;
900 }
901 
902 /**
903   @brief Count number of entries for assembled CeedOperator
904 
905   @param[in] op            CeedOperator to assemble
906   @param[out] num_entries  Number of entries in assembled representation
907 
908   @return An error code: 0 - success, otherwise - failure
909 
910   @ref Utility
911 **/
912 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op,
913     CeedInt *num_entries) {
914   int ierr;
915   CeedElemRestriction rstr;
916   CeedInt num_elem, elem_size, num_comp;
917 
918   if (op->is_composite)
919     // LCOV_EXCL_START
920     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
921                      "Composite operator not supported");
922   // LCOV_EXCL_STOP
923   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr);
924   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
925   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
926   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
927   *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
928 
929   return CEED_ERROR_SUCCESS;
930 }
931 
932 /**
933   @brief Common code for creating a multigrid coarse operator and level
934            transfer operators for a CeedOperator
935 
936   @param[in] op_fine       Fine grid operator
937   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
938   @param[in] rstr_coarse   Coarse grid restriction
939   @param[in] basis_coarse  Coarse grid active vector basis
940   @param[in] basis_c_to_f  Basis for coarse to fine interpolation
941   @param[out] op_coarse    Coarse grid operator
942   @param[out] op_prolong   Coarse to fine operator
943   @param[out] op_restrict  Fine to coarse operator
944 
945   @return An error code: 0 - success, otherwise - failure
946 
947   @ref Developer
948 **/
949 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
950     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
951     CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
952     CeedOperator *op_restrict) {
953   int ierr;
954   Ceed ceed;
955   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
956 
957   // Check for composite operator
958   bool is_composite;
959   ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr);
960   if (is_composite)
961     // LCOV_EXCL_START
962     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
963                      "Automatic multigrid setup for composite operators not supported");
964   // LCOV_EXCL_STOP
965 
966   // Coarse Grid
967   ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT,
968                             op_coarse); CeedChk(ierr);
969   CeedElemRestriction rstr_fine = NULL;
970   // -- Clone input fields
971   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
972     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
973       rstr_fine = op_fine->input_fields[i]->elem_restr;
974       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
975                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
976       CeedChk(ierr);
977     } else {
978       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
979                                   op_fine->input_fields[i]->elem_restr,
980                                   op_fine->input_fields[i]->basis,
981                                   op_fine->input_fields[i]->vec); CeedChk(ierr);
982     }
983   }
984   // -- Clone output fields
985   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
986     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
987       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
988                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
989       CeedChk(ierr);
990     } else {
991       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
992                                   op_fine->output_fields[i]->elem_restr,
993                                   op_fine->output_fields[i]->basis,
994                                   op_fine->output_fields[i]->vec); CeedChk(ierr);
995     }
996   }
997   // -- Clone QFunctionAssemblyData
998   ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled,
999          &(*op_coarse)->qf_assembled); CeedChk(ierr);
1000 
1001   // Multiplicity vector
1002   CeedVector mult_vec, mult_e_vec;
1003   ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec);
1004   CeedChk(ierr);
1005   ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr);
1006   ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine,
1007                                   mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
1008   ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr);
1009   ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec,
1010                                   CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
1011   ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr);
1012   ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr);
1013 
1014   // Restriction
1015   CeedInt num_comp;
1016   ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr);
1017   CeedQFunction qf_restrict;
1018   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict);
1019   CeedChk(ierr);
1020   CeedInt *num_comp_r_data;
1021   ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr);
1022   num_comp_r_data[0] = num_comp;
1023   CeedQFunctionContext ctx_r;
1024   ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr);
1025   ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER,
1026                                      sizeof(*num_comp_r_data), num_comp_r_data);
1027   CeedChk(ierr);
1028   ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr);
1029   ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr);
1030   ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE);
1031   CeedChk(ierr);
1032   ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE);
1033   CeedChk(ierr);
1034   ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp,
1035                                 CEED_EVAL_INTERP); CeedChk(ierr);
1036   ierr = CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp); CeedChk(ierr);
1037 
1038   ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE,
1039                             CEED_QFUNCTION_NONE, op_restrict);
1040   CeedChk(ierr);
1041   ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine,
1042                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
1043   CeedChk(ierr);
1044   ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine,
1045                               CEED_BASIS_COLLOCATED, mult_vec);
1046   CeedChk(ierr);
1047   ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f,
1048                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
1049 
1050   // Prolongation
1051   CeedQFunction qf_prolong;
1052   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong);
1053   CeedChk(ierr);
1054   CeedInt *num_comp_p_data;
1055   ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr);
1056   num_comp_p_data[0] = num_comp;
1057   CeedQFunctionContext ctx_p;
1058   ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr);
1059   ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER,
1060                                      sizeof(*num_comp_p_data), num_comp_p_data);
1061   CeedChk(ierr);
1062   ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr);
1063   ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr);
1064   ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP);
1065   CeedChk(ierr);
1066   ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE);
1067   CeedChk(ierr);
1068   ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE);
1069   CeedChk(ierr);
1070   ierr = CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp); CeedChk(ierr);
1071 
1072   ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE,
1073                             CEED_QFUNCTION_NONE, op_prolong);
1074   CeedChk(ierr);
1075   ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f,
1076                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
1077   ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine,
1078                               CEED_BASIS_COLLOCATED, mult_vec);
1079   CeedChk(ierr);
1080   ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine,
1081                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
1082   CeedChk(ierr);
1083 
1084   // Clone name
1085   bool has_name = op_fine->name;
1086   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
1087   ierr = CeedOperatorSetName(*op_coarse, op_fine->name); CeedChk(ierr);
1088   {
1089     char *prolongation_name;
1090     ierr = CeedCalloc(18 + name_len, &prolongation_name); CeedChk(ierr);
1091     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "",
1092             op_fine->name);
1093     ierr = CeedOperatorSetName(*op_prolong, prolongation_name); CeedChk(ierr);
1094     ierr = CeedFree(&prolongation_name); CeedChk(ierr);
1095   }
1096   {
1097     char *restriction_name;
1098     ierr = CeedCalloc(17 + name_len, &restriction_name); CeedChk(ierr);
1099     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "",
1100             op_fine->name);
1101     ierr = CeedOperatorSetName(*op_restrict, restriction_name); CeedChk(ierr);
1102     ierr = CeedFree(&restriction_name); CeedChk(ierr);
1103   }
1104 
1105   // Cleanup
1106   ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr);
1107   ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr);
1108   ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr);
1109   ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr);
1110 
1111   return CEED_ERROR_SUCCESS;
1112 }
1113 
1114 /**
1115   @brief Build 1D mass matrix and Laplacian with perturbation
1116 
1117   @param[in] interp_1d    Interpolation matrix in one dimension
1118   @param[in] grad_1d      Gradient matrix in one dimension
1119   @param[in] q_weight_1d  Quadrature weights in one dimension
1120   @param[in] P_1d         Number of basis nodes in one dimension
1121   @param[in] Q_1d         Number of quadrature points in one dimension
1122   @param[in] dim          Dimension of basis
1123   @param[out] mass        Assembled mass matrix in one dimension
1124   @param[out] laplace     Assembled perturbed Laplacian in one dimension
1125 
1126   @return An error code: 0 - success, otherwise - failure
1127 
1128   @ref Developer
1129 **/
1130 CeedPragmaOptimizeOff
1131 static int CeedBuildMassLaplace(const CeedScalar *interp_1d,
1132                                 const CeedScalar *grad_1d,
1133                                 const CeedScalar *q_weight_1d, CeedInt P_1d,
1134                                 CeedInt Q_1d, CeedInt dim,
1135                                 CeedScalar *mass, CeedScalar *laplace) {
1136 
1137   for (CeedInt i=0; i<P_1d; i++)
1138     for (CeedInt j=0; j<P_1d; j++) {
1139       CeedScalar sum = 0.0;
1140       for (CeedInt k=0; k<Q_1d; k++)
1141         sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j];
1142       mass[i+j*P_1d] = sum;
1143     }
1144   // -- Laplacian
1145   for (CeedInt i=0; i<P_1d; i++)
1146     for (CeedInt j=0; j<P_1d; j++) {
1147       CeedScalar sum = 0.0;
1148       for (CeedInt k=0; k<Q_1d; k++)
1149         sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j];
1150       laplace[i+j*P_1d] = sum;
1151     }
1152   CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4;
1153   for (CeedInt i=0; i<P_1d; i++)
1154     laplace[i+P_1d*i] += perturbation;
1155   return CEED_ERROR_SUCCESS;
1156 }
1157 CeedPragmaOptimizeOn
1158 
1159 /// @}
1160 
1161 /// ----------------------------------------------------------------------------
1162 /// CeedOperator Backend API
1163 /// ----------------------------------------------------------------------------
1164 /// @addtogroup CeedOperatorBackend
1165 /// @{
1166 
1167 /**
1168   @brief Create object holding CeedQFunction assembly data for CeedOperator
1169 
1170   @param[in] ceed  A Ceed object where the CeedQFunctionAssemblyData will be created
1171   @param[out] data Address of the variable where the newly created
1172                      CeedQFunctionAssemblyData will be stored
1173 
1174   @return An error code: 0 - success, otherwise - failure
1175 
1176   @ref Backend
1177 **/
1178 int CeedQFunctionAssemblyDataCreate(Ceed ceed,
1179                                     CeedQFunctionAssemblyData *data) {
1180   int ierr;
1181 
1182   ierr = CeedCalloc(1, data); CeedChk(ierr);
1183   (*data)->ref_count = 1;
1184   (*data)->ceed = ceed;
1185   ierr = CeedReference(ceed); CeedChk(ierr);
1186 
1187   return CEED_ERROR_SUCCESS;
1188 }
1189 
1190 /**
1191   @brief Increment the reference counter for a CeedQFunctionAssemblyData
1192 
1193   @param data  CeedQFunctionAssemblyData to increment the reference counter
1194 
1195   @return An error code: 0 - success, otherwise - failure
1196 
1197   @ref Backend
1198 **/
1199 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1200   data->ref_count++;
1201   return CEED_ERROR_SUCCESS;
1202 }
1203 
1204 /**
1205   @brief Set re-use of CeedQFunctionAssemblyData
1206 
1207   @param data       CeedQFunctionAssemblyData to mark for reuse
1208   @param reuse_data Boolean flag indicating data re-use
1209 
1210   @return An error code: 0 - success, otherwise - failure
1211 
1212   @ref Backend
1213 **/
1214 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data,
1215                                       bool reuse_data) {
1216   data->reuse_data = reuse_data;
1217   data->needs_data_update = true;
1218   return CEED_ERROR_SUCCESS;
1219 }
1220 
1221 /**
1222   @brief Mark QFunctionAssemblyData as stale
1223 
1224   @param data              CeedQFunctionAssemblyData to mark as stale
1225   @param needs_data_update Boolean flag indicating if update is needed or completed
1226 
1227   @return An error code: 0 - success, otherwise - failure
1228 
1229   @ref Backend
1230 **/
1231 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data,
1232     bool needs_data_update) {
1233   data->needs_data_update = needs_data_update;
1234   return CEED_ERROR_SUCCESS;
1235 }
1236 
1237 /**
1238   @brief Determine if QFunctionAssemblyData needs update
1239 
1240   @param[in] data              CeedQFunctionAssemblyData to mark as stale
1241   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1242 
1243   @return An error code: 0 - success, otherwise - failure
1244 
1245   @ref Backend
1246 **/
1247 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data,
1248     bool *is_update_needed) {
1249   *is_update_needed = !data->reuse_data || data->needs_data_update;
1250   return CEED_ERROR_SUCCESS;
1251 }
1252 
1253 /**
1254   @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should
1255            be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`;
1256            Note: If `*data_copy` is non-NULL, then it is assumed that
1257            `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This
1258            CeedQFunctionAssemblyData will be destroyed if `*data_copy` is
1259            the only reference to this CeedQFunctionAssemblyData.
1260 
1261   @param data            CeedQFunctionAssemblyData to copy reference to
1262   @param[out] data_copy  Variable to store copied reference
1263 
1264   @return An error code: 0 - success, otherwise - failure
1265 
1266   @ref Backend
1267 **/
1268 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data,
1269     CeedQFunctionAssemblyData *data_copy) {
1270   int ierr;
1271 
1272   ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr);
1273   ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr);
1274   *data_copy = data;
1275   return CEED_ERROR_SUCCESS;
1276 }
1277 
1278 /**
1279   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1280 
1281   @param[in] data      CeedQFunctionAssemblyData to retreive status
1282   @param[out] is_setup Boolean flag for setup status
1283 
1284   @return An error code: 0 - success, otherwise - failure
1285 
1286   @ref Backend
1287 **/
1288 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data,
1289                                      bool *is_setup) {
1290   *is_setup = data->is_setup;
1291   return CEED_ERROR_SUCCESS;
1292 }
1293 
1294 /**
1295   @brief Set internal objects for CeedQFunctionAssemblyData
1296 
1297   @param[in] data  CeedQFunctionAssemblyData to set objects
1298   @param[in] vec   CeedVector to store assembled CeedQFunction at quadrature points
1299   @param[in] rstr  CeedElemRestriction for CeedVector containing assembled CeedQFunction
1300 
1301   @return An error code: 0 - success, otherwise - failure
1302 
1303   @ref Backend
1304 **/
1305 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data,
1306                                         CeedVector vec, CeedElemRestriction rstr) {
1307   int ierr;
1308 
1309   ierr = CeedVectorReferenceCopy(vec, &data->vec); CeedChk(ierr);
1310   ierr = CeedElemRestrictionReferenceCopy(rstr, &data->rstr); CeedChk(ierr);
1311 
1312   data->is_setup = true;
1313   return CEED_ERROR_SUCCESS;
1314 }
1315 
1316 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data,
1317                                         CeedVector *vec, CeedElemRestriction *rstr) {
1318   int ierr;
1319 
1320   if (!data->is_setup)
1321     // LCOV_EXCL_START
1322     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE,
1323                      "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1324   // LCOV_EXCL_STOP
1325 
1326   ierr = CeedVectorReferenceCopy(data->vec, vec); CeedChk(ierr);
1327   ierr = CeedElemRestrictionReferenceCopy(data->rstr, rstr); CeedChk(ierr);
1328 
1329   return CEED_ERROR_SUCCESS;
1330 }
1331 
1332 /**
1333   @brief Destroy CeedQFunctionAssemblyData
1334 
1335   @param[out] data  CeedQFunctionAssemblyData to destroy
1336 
1337   @return An error code: 0 - success, otherwise - failure
1338 
1339   @ref Backend
1340 **/
1341 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1342   int ierr;
1343 
1344   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1345 
1346   ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr);
1347   ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr);
1348   ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr);
1349 
1350   ierr = CeedFree(data); CeedChk(ierr);
1351   return CEED_ERROR_SUCCESS;
1352 }
1353 
1354 /// @}
1355 
1356 /// ----------------------------------------------------------------------------
1357 /// CeedOperator Public API
1358 /// ----------------------------------------------------------------------------
1359 /// @addtogroup CeedOperatorUser
1360 /// @{
1361 
1362 /**
1363   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1364 
1365   This returns a CeedVector containing a matrix at each quadrature point
1366     providing the action of the CeedQFunction associated with the CeedOperator.
1367     The vector 'assembled' is of shape
1368       [num_elements, num_input_fields, num_output_fields, num_quad_points]
1369     and contains column-major matrices representing the action of the
1370     CeedQFunction for a corresponding quadrature point on an element. Inputs and
1371     outputs are in the order provided by the user when adding CeedOperator fields.
1372     For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and
1373     'v', provided in that order, would result in an assembled QFunction that
1374     consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting
1375     on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1376 
1377   Note: Calling this function asserts that setup is complete
1378           and sets the CeedOperator as immutable.
1379 
1380   @param op              CeedOperator to assemble CeedQFunction
1381   @param[out] assembled  CeedVector to store assembled CeedQFunction at
1382                            quadrature points
1383   @param[out] rstr       CeedElemRestriction for CeedVector containing assembled
1384                            CeedQFunction
1385   @param request         Address of CeedRequest for non-blocking completion, else
1386                            @ref CEED_REQUEST_IMMEDIATE
1387 
1388   @return An error code: 0 - success, otherwise - failure
1389 
1390   @ref User
1391 **/
1392 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled,
1393                                         CeedElemRestriction *rstr,
1394                                         CeedRequest *request) {
1395   int ierr;
1396   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1397 
1398   if (op->LinearAssembleQFunction) {
1399     // Backend version
1400     ierr = op->LinearAssembleQFunction(op, assembled, rstr, request);
1401     CeedChk(ierr);
1402   } else {
1403     // Operator fallback
1404     CeedOperator op_fallback;
1405 
1406     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1407     if (op_fallback) {
1408       ierr = CeedOperatorLinearAssembleQFunction(op_fallback, assembled,
1409              rstr, request); CeedChk(ierr);
1410     } else {
1411       // LCOV_EXCL_START
1412       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
1413                        "Backend does not support CeedOperatorLinearAssembleQFunction");
1414       // LCOV_EXCL_STOP
1415     }
1416   }
1417   return CEED_ERROR_SUCCESS;
1418 }
1419 
1420 /**
1421   @brief Assemble CeedQFunction and store result internall. Return copied
1422            references of stored data to the caller. Caller is responsible for
1423            ownership and destruction of the copied references. See also
1424            @ref CeedOperatorLinearAssembleQFunction
1425 
1426   @param op              CeedOperator to assemble CeedQFunction
1427   @param assembled       CeedVector to store assembled CeedQFunction at
1428                            quadrature points
1429   @param rstr            CeedElemRestriction for CeedVector containing assembled
1430                            CeedQFunction
1431   @param request         Address of CeedRequest for non-blocking completion, else
1432                            @ref CEED_REQUEST_IMMEDIATE
1433 
1434   @return An error code: 0 - success, otherwise - failure
1435 
1436   @ref User
1437 **/
1438 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op,
1439     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1440   int ierr;
1441   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1442 
1443   if (op->LinearAssembleQFunctionUpdate) {
1444     // Backend version
1445     bool qf_assembled_is_setup;
1446     CeedVector assembled_vec = NULL;
1447     CeedElemRestriction assembled_rstr = NULL;
1448 
1449     ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled,
1450                                             &qf_assembled_is_setup); CeedChk(ierr);
1451     if (qf_assembled_is_setup) {
1452       bool update_needed;
1453 
1454       ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec,
1455              &assembled_rstr); CeedChk(ierr);
1456       ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled,
1457              &update_needed); CeedChk(ierr);
1458       if (update_needed) {
1459         ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr,
1460                request); CeedChk(ierr);
1461       }
1462     } else {
1463       ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr,
1464                                          request); CeedChk(ierr);
1465       ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec,
1466              assembled_rstr); CeedChk(ierr);
1467     }
1468     ierr = CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false);
1469     CeedChk(ierr);
1470 
1471     // Copy reference from internally held copy
1472     *assembled = NULL;
1473     *rstr = NULL;
1474     ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr);
1475     ierr = CeedVectorDestroy(&assembled_vec); CeedChk(ierr);
1476     ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr);
1477     ierr = CeedElemRestrictionDestroy(&assembled_rstr); CeedChk(ierr);
1478   } else {
1479     // Operator fallback
1480     CeedOperator op_fallback;
1481 
1482     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1483     if (op_fallback) {
1484       ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled,
1485              rstr, request); CeedChk(ierr);
1486     } else {
1487       // LCOV_EXCL_START
1488       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
1489                        "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1490       // LCOV_EXCL_STOP
1491     }
1492   }
1493 
1494   return CEED_ERROR_SUCCESS;
1495 }
1496 
1497 /**
1498   @brief Assemble the diagonal of a square linear CeedOperator
1499 
1500   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1501 
1502   Note: Currently only non-composite CeedOperators with a single field and
1503           composite CeedOperators with single field sub-operators are supported.
1504 
1505   Note: Calling this function asserts that setup is complete
1506           and sets the CeedOperator as immutable.
1507 
1508   @param op              CeedOperator to assemble CeedQFunction
1509   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1510   @param request         Address of CeedRequest for non-blocking completion, else
1511                            @ref CEED_REQUEST_IMMEDIATE
1512 
1513   @return An error code: 0 - success, otherwise - failure
1514 
1515   @ref User
1516 **/
1517 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled,
1518                                        CeedRequest *request) {
1519   int ierr;
1520   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1521 
1522   CeedSize input_size = 0, output_size = 0;
1523   ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size);
1524   CeedChk(ierr);
1525   if (input_size != output_size)
1526     // LCOV_EXCL_START
1527     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1528   // LCOV_EXCL_STOP
1529 
1530   if (op->LinearAssembleDiagonal) {
1531     // Backend version
1532     ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr);
1533     return CEED_ERROR_SUCCESS;
1534   } else if (op->LinearAssembleAddDiagonal) {
1535     // Backend version with zeroing first
1536     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1537     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1538     return CEED_ERROR_SUCCESS;
1539   } else {
1540     // Operator fallback
1541     CeedOperator op_fallback;
1542 
1543     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1544     if (op_fallback) {
1545       ierr = CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request);
1546       CeedChk(ierr);
1547       return CEED_ERROR_SUCCESS;
1548     }
1549   }
1550   // Default interface implementation
1551   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1552   ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request);
1553   CeedChk(ierr);
1554 
1555   return CEED_ERROR_SUCCESS;
1556 }
1557 
1558 /**
1559   @brief Assemble the diagonal of a square linear CeedOperator
1560 
1561   This sums into a CeedVector the diagonal of a linear CeedOperator.
1562 
1563   Note: Currently only non-composite CeedOperators with a single field and
1564           composite CeedOperators with single field sub-operators are supported.
1565 
1566   Note: Calling this function asserts that setup is complete
1567           and sets the CeedOperator as immutable.
1568 
1569   @param op              CeedOperator to assemble CeedQFunction
1570   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1571   @param request         Address of CeedRequest for non-blocking completion, else
1572                            @ref CEED_REQUEST_IMMEDIATE
1573 
1574   @return An error code: 0 - success, otherwise - failure
1575 
1576   @ref User
1577 **/
1578 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled,
1579     CeedRequest *request) {
1580   int ierr;
1581   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1582 
1583   CeedSize input_size = 0, output_size = 0;
1584   ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size);
1585   CeedChk(ierr);
1586   if (input_size != output_size)
1587     // LCOV_EXCL_START
1588     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1589   // LCOV_EXCL_STOP
1590 
1591   if (op->LinearAssembleAddDiagonal) {
1592     // Backend version
1593     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1594     return CEED_ERROR_SUCCESS;
1595   } else {
1596     // Operator fallback
1597     CeedOperator op_fallback;
1598 
1599     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1600     if (op_fallback) {
1601       ierr = CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request);
1602       CeedChk(ierr);
1603       return CEED_ERROR_SUCCESS;
1604     }
1605   }
1606   // Default interface implementation
1607   bool is_composite;
1608   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1609   if (is_composite) {
1610     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1611            false, assembled); CeedChk(ierr);
1612   } else {
1613     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled);
1614     CeedChk(ierr);
1615   }
1616 
1617   return CEED_ERROR_SUCCESS;
1618 }
1619 
1620 /**
1621   @brief Assemble the point block diagonal of a square linear CeedOperator
1622 
1623   This overwrites a CeedVector with the point block diagonal of a linear
1624     CeedOperator.
1625 
1626   Note: Currently only non-composite CeedOperators with a single field and
1627           composite CeedOperators with single field sub-operators are supported.
1628 
1629   Note: Calling this function asserts that setup is complete
1630           and sets the CeedOperator as immutable.
1631 
1632   @param op              CeedOperator to assemble CeedQFunction
1633   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1634                            diagonal, provided in row-major form with an
1635                            @a num_comp * @a num_comp block at each node. The dimensions
1636                            of this vector are derived from the active vector
1637                            for the CeedOperator. The array has shape
1638                            [nodes, component out, component in].
1639   @param request         Address of CeedRequest for non-blocking completion, else
1640                            CEED_REQUEST_IMMEDIATE
1641 
1642   @return An error code: 0 - success, otherwise - failure
1643 
1644   @ref User
1645 **/
1646 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op,
1647     CeedVector assembled, CeedRequest *request) {
1648   int ierr;
1649   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1650 
1651   CeedSize input_size = 0, output_size = 0;
1652   ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size);
1653   CeedChk(ierr);
1654   if (input_size != output_size)
1655     // LCOV_EXCL_START
1656     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1657   // LCOV_EXCL_STOP
1658 
1659   if (op->LinearAssemblePointBlockDiagonal) {
1660     // Backend version
1661     ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request);
1662     CeedChk(ierr);
1663     return CEED_ERROR_SUCCESS;
1664   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1665     // Backend version with zeroing first
1666     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1667     ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled,
1668            request); CeedChk(ierr);
1669     return CEED_ERROR_SUCCESS;
1670   } else {
1671     // Operator fallback
1672     CeedOperator op_fallback;
1673 
1674     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1675     if (op_fallback) {
1676       ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled,
1677              request); CeedChk(ierr);
1678       return CEED_ERROR_SUCCESS;
1679     }
1680   }
1681   // Default interface implementation
1682   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1683   ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request);
1684   CeedChk(ierr);
1685 
1686   return CEED_ERROR_SUCCESS;
1687 }
1688 
1689 /**
1690   @brief Assemble the point block diagonal of a square linear CeedOperator
1691 
1692   This sums into a CeedVector with the point block diagonal of a linear
1693     CeedOperator.
1694 
1695   Note: Currently only non-composite CeedOperators with a single field and
1696           composite CeedOperators with single field sub-operators are supported.
1697 
1698   Note: Calling this function asserts that setup is complete
1699           and sets the CeedOperator as immutable.
1700 
1701   @param op              CeedOperator to assemble CeedQFunction
1702   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1703                            diagonal, provided in row-major form with an
1704                            @a num_comp * @a num_comp block at each node. The dimensions
1705                            of this vector are derived from the active vector
1706                            for the CeedOperator. The array has shape
1707                            [nodes, component out, component in].
1708   @param request         Address of CeedRequest for non-blocking completion, else
1709                            CEED_REQUEST_IMMEDIATE
1710 
1711   @return An error code: 0 - success, otherwise - failure
1712 
1713   @ref User
1714 **/
1715 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op,
1716     CeedVector assembled, CeedRequest *request) {
1717   int ierr;
1718   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1719 
1720   CeedSize input_size = 0, output_size = 0;
1721   ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size);
1722   CeedChk(ierr);
1723   if (input_size != output_size)
1724     // LCOV_EXCL_START
1725     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1726   // LCOV_EXCL_STOP
1727 
1728   if (op->LinearAssembleAddPointBlockDiagonal) {
1729     // Backend version
1730     ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request);
1731     CeedChk(ierr);
1732     return CEED_ERROR_SUCCESS;
1733   } else {
1734     // Operator fallback
1735     CeedOperator op_fallback;
1736 
1737     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1738     if (op_fallback) {
1739       ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled,
1740              request); CeedChk(ierr);
1741       return CEED_ERROR_SUCCESS;
1742     }
1743   }
1744   // Default interface implemenation
1745   bool is_composite;
1746   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1747   if (is_composite) {
1748     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1749            true, assembled); CeedChk(ierr);
1750   } else {
1751     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled);
1752     CeedChk(ierr);
1753   }
1754 
1755   return CEED_ERROR_SUCCESS;
1756 }
1757 
1758 /**
1759    @brief Fully assemble the nonzero pattern of a linear operator.
1760 
1761    Expected to be used in conjunction with CeedOperatorLinearAssemble()
1762 
1763    The assembly routines use coordinate format, with num_entries tuples of the
1764    form (i, j, value) which indicate that value should be added to the matrix
1765    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1766    This function returns the number of entries and their (i, j) locations,
1767    while CeedOperatorLinearAssemble() provides the values in the same
1768    ordering.
1769 
1770    This will generally be slow unless your operator is low-order.
1771 
1772   Note: Calling this function asserts that setup is complete
1773           and sets the CeedOperator as immutable.
1774 
1775    @param[in]  op           CeedOperator to assemble
1776    @param[out] num_entries  Number of entries in coordinate nonzero pattern
1777    @param[out] rows         Row number for each entry
1778    @param[out] cols         Column number for each entry
1779 
1780    @ref User
1781 **/
1782 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries,
1783                                        CeedInt **rows, CeedInt **cols) {
1784   int ierr;
1785   CeedInt num_suboperators, single_entries;
1786   CeedOperator *sub_operators;
1787   bool is_composite;
1788   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1789 
1790   if (op->LinearAssembleSymbolic) {
1791     // Backend version
1792     ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr);
1793     return CEED_ERROR_SUCCESS;
1794   } else {
1795     // Operator fallback
1796     CeedOperator op_fallback;
1797 
1798     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1799     if (op_fallback) {
1800       ierr = CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols);
1801       CeedChk(ierr);
1802       return CEED_ERROR_SUCCESS;
1803     }
1804   }
1805 
1806   // Default interface implementation
1807 
1808   // count entries and allocate rows, cols arrays
1809   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1810   *num_entries = 0;
1811   if (is_composite) {
1812     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1813     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1814     for (CeedInt k = 0; k < num_suboperators; ++k) {
1815       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1816              &single_entries); CeedChk(ierr);
1817       *num_entries += single_entries;
1818     }
1819   } else {
1820     ierr = CeedSingleOperatorAssemblyCountEntries(op,
1821            &single_entries); CeedChk(ierr);
1822     *num_entries += single_entries;
1823   }
1824   ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr);
1825   ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr);
1826 
1827   // assemble nonzero locations
1828   CeedInt offset = 0;
1829   if (is_composite) {
1830     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1831     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1832     for (CeedInt k = 0; k < num_suboperators; ++k) {
1833       ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows,
1834              *cols); CeedChk(ierr);
1835       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1836              &single_entries);
1837       CeedChk(ierr);
1838       offset += single_entries;
1839     }
1840   } else {
1841     ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols);
1842     CeedChk(ierr);
1843   }
1844 
1845   return CEED_ERROR_SUCCESS;
1846 }
1847 
1848 /**
1849    @brief Fully assemble the nonzero entries of a linear operator.
1850 
1851    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic()
1852 
1853    The assembly routines use coordinate format, with num_entries tuples of the
1854    form (i, j, value) which indicate that value should be added to the matrix
1855    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1856    This function returns the values of the nonzero entries to be added, their
1857    (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1858 
1859    This will generally be slow unless your operator is low-order.
1860 
1861   Note: Calling this function asserts that setup is complete
1862           and sets the CeedOperator as immutable.
1863 
1864    @param[in]  op      CeedOperator to assemble
1865    @param[out] values  Values to assemble into matrix
1866 
1867    @ref User
1868 **/
1869 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1870   int ierr;
1871   CeedInt num_suboperators, single_entries = 0;
1872   CeedOperator *sub_operators;
1873   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1874 
1875   if (op->LinearAssemble) {
1876     // Backend version
1877     ierr = op->LinearAssemble(op, values); CeedChk(ierr);
1878     return CEED_ERROR_SUCCESS;
1879   } else {
1880     // Operator fallback
1881     CeedOperator op_fallback;
1882 
1883     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1884     if (op_fallback) {
1885       ierr = CeedOperatorLinearAssemble(op_fallback, values); CeedChk(ierr);
1886       return CEED_ERROR_SUCCESS;
1887     }
1888   }
1889 
1890   // Default interface implementation
1891   bool is_composite;
1892   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1893 
1894   CeedInt offset = 0;
1895   if (is_composite) {
1896     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1897     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1898     for (CeedInt k = 0; k < num_suboperators; ++k) {
1899       ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values);
1900       CeedChk(ierr);
1901       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1902              &single_entries);
1903       CeedChk(ierr);
1904       offset += single_entries;
1905     }
1906   } else {
1907     ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr);
1908   }
1909 
1910   return CEED_ERROR_SUCCESS;
1911 }
1912 
1913 /**
1914   @brief Create a multigrid coarse operator and level transfer operators
1915            for a CeedOperator, creating the prolongation basis from the
1916            fine and coarse grid interpolation
1917 
1918   Note: Calling this function asserts that setup is complete
1919           and sets the CeedOperator as immutable.
1920 
1921   @param[in] op_fine       Fine grid operator
1922   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
1923   @param[in] rstr_coarse   Coarse grid restriction
1924   @param[in] basis_coarse  Coarse grid active vector basis
1925   @param[out] op_coarse    Coarse grid operator
1926   @param[out] op_prolong   Coarse to fine operator
1927   @param[out] op_restrict  Fine to coarse operator
1928 
1929   @return An error code: 0 - success, otherwise - failure
1930 
1931   @ref User
1932 **/
1933 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine,
1934                                      CeedVector p_mult_fine,
1935                                      CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1936                                      CeedOperator *op_coarse, CeedOperator *op_prolong,
1937                                      CeedOperator *op_restrict) {
1938   int ierr;
1939   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
1940   Ceed ceed;
1941   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1942 
1943   // Check for compatible quadrature spaces
1944   CeedBasis basis_fine;
1945   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1946   CeedInt Q_f, Q_c;
1947   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1948   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1949   if (Q_f != Q_c)
1950     // LCOV_EXCL_START
1951     return CeedError(ceed, CEED_ERROR_DIMENSION,
1952                      "Bases must have compatible quadrature spaces");
1953   // LCOV_EXCL_STOP
1954 
1955   // Coarse to fine basis
1956   CeedInt P_f, P_c, Q = Q_f;
1957   bool is_tensor_f, is_tensor_c;
1958   ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr);
1959   ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr);
1960   CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau;
1961   if (is_tensor_f && is_tensor_c) {
1962     ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr);
1963     ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr);
1964     ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr);
1965   } else if (!is_tensor_f && !is_tensor_c) {
1966     ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr);
1967     ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr);
1968   } else {
1969     // LCOV_EXCL_START
1970     return CeedError(ceed, CEED_ERROR_MINOR,
1971                      "Bases must both be tensor or non-tensor");
1972     // LCOV_EXCL_STOP
1973   }
1974 
1975   ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr);
1976   ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr);
1977   ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr);
1978   ierr = CeedMalloc(Q, &tau); CeedChk(ierr);
1979   const CeedScalar *interp_f_source = NULL, *interp_c_source = NULL;
1980   if (is_tensor_f) {
1981     ierr = CeedBasisGetInterp1D(basis_fine, &interp_f_source); CeedChk(ierr);
1982     ierr = CeedBasisGetInterp1D(basis_coarse, &interp_c_source); CeedChk(ierr);
1983   } else {
1984     ierr = CeedBasisGetInterp(basis_fine, &interp_f_source); CeedChk(ierr);
1985     ierr = CeedBasisGetInterp(basis_coarse, &interp_c_source); CeedChk(ierr);
1986   }
1987   memcpy(interp_f, interp_f_source, Q*P_f*sizeof interp_f_source[0]);
1988   memcpy(interp_c, interp_c_source, Q*P_c*sizeof interp_c_source[0]);
1989 
1990   // -- QR Factorization, interp_f = Q R
1991   ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr);
1992 
1993   // -- Apply Qtranspose, interp_c = Qtranspose interp_c
1994   ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE,
1995                                Q, P_c, P_f, P_c, 1); CeedChk(ierr);
1996 
1997   // -- Apply Rinv, interp_c_to_f = Rinv interp_c
1998   for (CeedInt j=0; j<P_c; j++) { // Column j
1999     interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1];
2000     for (CeedInt i=P_f-2; i>=0; i--) { // Row i
2001       interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i];
2002       for (CeedInt k=i+1; k<P_f; k++)
2003         interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k];
2004       interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i];
2005     }
2006   }
2007   ierr = CeedFree(&tau); CeedChk(ierr);
2008   ierr = CeedFree(&interp_c); CeedChk(ierr);
2009   ierr = CeedFree(&interp_f); CeedChk(ierr);
2010 
2011   // Complete with interp_c_to_f versions of code
2012   if (is_tensor_f) {
2013     ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine,
2014            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
2015     CeedChk(ierr);
2016   } else {
2017     ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine,
2018            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
2019     CeedChk(ierr);
2020   }
2021 
2022   // Cleanup
2023   ierr = CeedFree(&interp_c_to_f); CeedChk(ierr);
2024   return CEED_ERROR_SUCCESS;
2025 }
2026 
2027 /**
2028   @brief Create a multigrid coarse operator and level transfer operators
2029            for a CeedOperator with a tensor basis for the active basis
2030 
2031   Note: Calling this function asserts that setup is complete
2032           and sets the CeedOperator as immutable.
2033 
2034   @param[in] op_fine        Fine grid operator
2035   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
2036   @param[in] rstr_coarse    Coarse grid restriction
2037   @param[in] basis_coarse   Coarse grid active vector basis
2038   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
2039   @param[out] op_coarse     Coarse grid operator
2040   @param[out] op_prolong    Coarse to fine operator
2041   @param[out] op_restrict   Fine to coarse operator
2042 
2043   @return An error code: 0 - success, otherwise - failure
2044 
2045   @ref User
2046 **/
2047 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine,
2048     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2049     const CeedScalar *interp_c_to_f, CeedOperator *op_coarse,
2050     CeedOperator *op_prolong, CeedOperator *op_restrict) {
2051   int ierr;
2052   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
2053   Ceed ceed;
2054   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
2055 
2056   // Check for compatible quadrature spaces
2057   CeedBasis basis_fine;
2058   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
2059   CeedInt Q_f, Q_c;
2060   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
2061   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
2062   if (Q_f != Q_c)
2063     // LCOV_EXCL_START
2064     return CeedError(ceed, CEED_ERROR_DIMENSION,
2065                      "Bases must have compatible quadrature spaces");
2066   // LCOV_EXCL_STOP
2067 
2068   // Coarse to fine basis
2069   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2070   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
2071   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
2072   ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr);
2073   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
2074   CeedChk(ierr);
2075   P_1d_c = dim == 1 ? num_nodes_c :
2076            dim == 2 ? sqrt(num_nodes_c) :
2077            cbrt(num_nodes_c);
2078   CeedScalar *q_ref, *q_weight, *grad;
2079   ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr);
2080   ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr);
2081   ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr);
2082   CeedBasis basis_c_to_f;
2083   ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f,
2084                                  interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
2085   CeedChk(ierr);
2086   ierr = CeedFree(&q_ref); CeedChk(ierr);
2087   ierr = CeedFree(&q_weight); CeedChk(ierr);
2088   ierr = CeedFree(&grad); CeedChk(ierr);
2089 
2090   // Core code
2091   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
2092                                           basis_coarse, basis_c_to_f, op_coarse,
2093                                           op_prolong, op_restrict);
2094   CeedChk(ierr);
2095   return CEED_ERROR_SUCCESS;
2096 }
2097 
2098 /**
2099   @brief Create a multigrid coarse operator and level transfer operators
2100            for a CeedOperator with a non-tensor basis for the active vector
2101 
2102   Note: Calling this function asserts that setup is complete
2103           and sets the CeedOperator as immutable.
2104 
2105   @param[in] op_fine        Fine grid operator
2106   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
2107   @param[in] rstr_coarse    Coarse grid restriction
2108   @param[in] basis_coarse   Coarse grid active vector basis
2109   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
2110   @param[out] op_coarse     Coarse grid operator
2111   @param[out] op_prolong    Coarse to fine operator
2112   @param[out] op_restrict   Fine to coarse operator
2113 
2114   @return An error code: 0 - success, otherwise - failure
2115 
2116   @ref User
2117 **/
2118 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine,
2119                                        CeedVector p_mult_fine,
2120                                        CeedElemRestriction rstr_coarse,
2121                                        CeedBasis basis_coarse,
2122                                        const CeedScalar *interp_c_to_f,
2123                                        CeedOperator *op_coarse,
2124                                        CeedOperator *op_prolong,
2125                                        CeedOperator *op_restrict) {
2126   int ierr;
2127   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
2128   Ceed ceed;
2129   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
2130 
2131   // Check for compatible quadrature spaces
2132   CeedBasis basis_fine;
2133   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
2134   CeedInt Q_f, Q_c;
2135   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
2136   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
2137   if (Q_f != Q_c)
2138     // LCOV_EXCL_START
2139     return CeedError(ceed, CEED_ERROR_DIMENSION,
2140                      "Bases must have compatible quadrature spaces");
2141   // LCOV_EXCL_STOP
2142 
2143   // Coarse to fine basis
2144   CeedElemTopology topo;
2145   ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr);
2146   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2147   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
2148   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
2149   ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr);
2150   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
2151   CeedChk(ierr);
2152   CeedScalar *q_ref, *q_weight, *grad;
2153   ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr);
2154   ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr);
2155   ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr);
2156   CeedBasis basis_c_to_f;
2157   ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f,
2158                            interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
2159   CeedChk(ierr);
2160   ierr = CeedFree(&q_ref); CeedChk(ierr);
2161   ierr = CeedFree(&q_weight); CeedChk(ierr);
2162   ierr = CeedFree(&grad); CeedChk(ierr);
2163 
2164   // Core code
2165   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
2166                                           basis_coarse, basis_c_to_f, op_coarse,
2167                                           op_prolong, op_restrict);
2168   CeedChk(ierr);
2169   return CEED_ERROR_SUCCESS;
2170 }
2171 
2172 /**
2173   @brief Build a FDM based approximate inverse for each element for a
2174            CeedOperator
2175 
2176   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization
2177     Method based approximate inverse. This function obtains the simultaneous
2178     diagonalization for the 1D mass and Laplacian operators,
2179       M = V^T V, K = V^T S V.
2180     The assembled QFunction is used to modify the eigenvalues from simultaneous
2181     diagonalization and obtain an approximate inverse of the form
2182       V^T S^hat V. The CeedOperator must be linear and non-composite. The
2183     associated CeedQFunction must therefore also be linear.
2184 
2185   Note: Calling this function asserts that setup is complete
2186           and sets the CeedOperator as immutable.
2187 
2188   @param op            CeedOperator to create element inverses
2189   @param[out] fdm_inv  CeedOperator to apply the action of a FDM based inverse
2190                          for each element
2191   @param request       Address of CeedRequest for non-blocking completion, else
2192                          @ref CEED_REQUEST_IMMEDIATE
2193 
2194   @return An error code: 0 - success, otherwise - failure
2195 
2196   @ref User
2197 **/
2198 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv,
2199                                         CeedRequest *request) {
2200   int ierr;
2201   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
2202 
2203   if (op->CreateFDMElementInverse) {
2204     // Backend version
2205     ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr);
2206     return CEED_ERROR_SUCCESS;
2207   } else {
2208     // Operator fallback
2209     CeedOperator op_fallback;
2210 
2211     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
2212     if (op_fallback) {
2213       ierr = CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request);
2214       CeedChk(ierr);
2215       return CEED_ERROR_SUCCESS;
2216     }
2217   }
2218 
2219   // Default interface implementation
2220   Ceed ceed, ceed_parent;
2221   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
2222   ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr);
2223   ceed_parent = ceed_parent ? ceed_parent : ceed;
2224   CeedQFunction qf;
2225   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
2226 
2227   // Determine active input basis
2228   bool interp = false, grad = false;
2229   CeedBasis basis = NULL;
2230   CeedElemRestriction rstr = NULL;
2231   CeedOperatorField *op_fields;
2232   CeedQFunctionField *qf_fields;
2233   CeedInt num_input_fields;
2234   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL);
2235   CeedChk(ierr);
2236   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
2237   for (CeedInt i=0; i<num_input_fields; i++) {
2238     CeedVector vec;
2239     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
2240     if (vec == CEED_VECTOR_ACTIVE) {
2241       CeedEvalMode eval_mode;
2242       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
2243       interp = interp || eval_mode == CEED_EVAL_INTERP;
2244       grad = grad || eval_mode == CEED_EVAL_GRAD;
2245       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr);
2246       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
2247     }
2248   }
2249   if (!basis)
2250     // LCOV_EXCL_START
2251     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2252   // LCOV_EXCL_STOP
2253   CeedSize l_size = 1;
2254   CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2255   ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr);
2256   ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr);
2257   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr);
2258   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
2259   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
2260   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
2261   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
2262   ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr);
2263 
2264   // Build and diagonalize 1D Mass and Laplacian
2265   bool tensor_basis;
2266   ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr);
2267   if (!tensor_basis)
2268     // LCOV_EXCL_START
2269     return CeedError(ceed, CEED_ERROR_BACKEND,
2270                      "FDMElementInverse only supported for tensor "
2271                      "bases");
2272   // LCOV_EXCL_STOP
2273   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2274   ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr);
2275   ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr);
2276   ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr);
2277   ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr);
2278   ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr);
2279   // -- Build matrices
2280   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2281   ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr);
2282   ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr);
2283   ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr);
2284   ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim,
2285                               mass, laplace); CeedChk(ierr);
2286 
2287   // -- Diagonalize
2288   ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d);
2289   CeedChk(ierr);
2290   ierr = CeedFree(&mass); CeedChk(ierr);
2291   ierr = CeedFree(&laplace); CeedChk(ierr);
2292   for (CeedInt i=0; i<P_1d; i++)
2293     for (CeedInt j=0; j<P_1d; j++)
2294       fdm_interp[i+j*P_1d] = x[j+i*P_1d];
2295   ierr = CeedFree(&x); CeedChk(ierr);
2296 
2297   // Assemble QFunction
2298   CeedVector assembled;
2299   CeedElemRestriction rstr_qf;
2300   ierr =  CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled,
2301           &rstr_qf, request); CeedChk(ierr);
2302   CeedInt layout[3];
2303   ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr);
2304   ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr);
2305   CeedScalar max_norm = 0;
2306   ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
2307 
2308   // Calculate element averages
2309   CeedInt num_modes = (interp?1:0) + (grad?dim:0);
2310   CeedScalar *elem_avg;
2311   const CeedScalar *assembled_array, *q_weight_array;
2312   CeedVector q_weight;
2313   ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr);
2314   ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
2315                         CEED_VECTOR_NONE, q_weight); CeedChk(ierr);
2316   ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array);
2317   CeedChk(ierr);
2318   ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array);
2319   CeedChk(ierr);
2320   ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr);
2321   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
2322   for (CeedInt e=0; e<num_elem; e++) {
2323     CeedInt count = 0;
2324     for (CeedInt q=0; q<num_qpts; q++)
2325       for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++)
2326         if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) >
2327             qf_value_bound) {
2328           elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] /
2329                          q_weight_array[q];
2330           count++;
2331         }
2332     if (count) {
2333       elem_avg[e] /= count;
2334     } else {
2335       elem_avg[e] = 1.0;
2336     }
2337   }
2338   ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr);
2339   ierr = CeedVectorDestroy(&assembled); CeedChk(ierr);
2340   ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr);
2341   ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr);
2342 
2343   // Build FDM diagonal
2344   CeedVector q_data;
2345   CeedScalar *q_data_array, *fdm_diagonal;
2346   ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr);
2347   const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON;
2348   for (CeedInt c=0; c<num_comp; c++)
2349     for (CeedInt n=0; n<elem_size; n++) {
2350       if (interp)
2351         fdm_diagonal[c*elem_size + n] = 1.0;
2352       if (grad)
2353         for (CeedInt d=0; d<dim; d++) {
2354           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2355           fdm_diagonal[c*elem_size + n] += lambda[i];
2356         }
2357       if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound)
2358         fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound;
2359     }
2360   ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data);
2361   CeedChk(ierr);
2362   ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr);
2363   ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array);
2364   CeedChk(ierr);
2365   for (CeedInt e=0; e<num_elem; e++)
2366     for (CeedInt c=0; c<num_comp; c++)
2367       for (CeedInt n=0; n<elem_size; n++)
2368         q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] *
2369             fdm_diagonal[c*elem_size + n]);
2370   ierr = CeedFree(&elem_avg); CeedChk(ierr);
2371   ierr = CeedFree(&fdm_diagonal); CeedChk(ierr);
2372   ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr);
2373 
2374   // Setup FDM operator
2375   // -- Basis
2376   CeedBasis fdm_basis;
2377   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2378   ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr);
2379   ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr);
2380   ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr);
2381   ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d,
2382                                  fdm_interp, grad_dummy, q_ref_dummy,
2383                                  q_weight_dummy, &fdm_basis); CeedChk(ierr);
2384   ierr = CeedFree(&fdm_interp); CeedChk(ierr);
2385   ierr = CeedFree(&grad_dummy); CeedChk(ierr);
2386   ierr = CeedFree(&q_ref_dummy); CeedChk(ierr);
2387   ierr = CeedFree(&q_weight_dummy); CeedChk(ierr);
2388   ierr = CeedFree(&lambda); CeedChk(ierr);
2389 
2390   // -- Restriction
2391   CeedElemRestriction rstr_qd_i;
2392   CeedInt strides[3] = {1, elem_size, elem_size*num_comp};
2393   ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size,
2394                                           num_comp, num_elem*num_comp*elem_size,
2395                                           strides, &rstr_qd_i); CeedChk(ierr);
2396   // -- QFunction
2397   CeedQFunction qf_fdm;
2398   ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm);
2399   CeedChk(ierr);
2400   ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP);
2401   CeedChk(ierr);
2402   ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE);
2403   CeedChk(ierr);
2404   ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP);
2405   CeedChk(ierr);
2406   ierr = CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp); CeedChk(ierr);
2407   // -- QFunction context
2408   CeedInt *num_comp_data;
2409   ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr);
2410   num_comp_data[0] = num_comp;
2411   CeedQFunctionContext ctx_fdm;
2412   ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr);
2413   ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER,
2414                                      sizeof(*num_comp_data), num_comp_data);
2415   CeedChk(ierr);
2416   ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr);
2417   ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr);
2418   // -- Operator
2419   ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv);
2420   CeedChk(ierr);
2421   CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2422   CeedChk(ierr);
2423   CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED,
2424                        q_data); CeedChk(ierr);
2425   CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2426   CeedChk(ierr);
2427 
2428   // Cleanup
2429   ierr = CeedVectorDestroy(&q_data); CeedChk(ierr);
2430   ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr);
2431   ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr);
2432   ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr);
2433 
2434   return CEED_ERROR_SUCCESS;
2435 }
2436 
2437 /// @}
2438