xref: /libCEED/interface/ceed-preconditioning.c (revision c082eca03950350b023f271dc47b90089562a0c5)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed/ceed.h>
9 #include <ceed/backend.h>
10 #include <ceed-impl.h>
11 #include <math.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 /// @file
17 /// Implementation of CeedOperator preconditioning interfaces
18 
19 /// ----------------------------------------------------------------------------
20 /// CeedOperator Library Internal Preconditioning Functions
21 /// ----------------------------------------------------------------------------
22 /// @addtogroup CeedOperatorDeveloper
23 /// @{
24 
25 /**
26   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced
27          CeedOperator functionality
28 
29   @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in] qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback  fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf,
38                                        CeedQFunction *qf_fallback) {
39   int ierr;
40 
41   // Check if NULL qf passed in
42   if (!qf) return CEED_ERROR_SUCCESS;
43 
44   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
45   CeedDebug256(qf->ceed, 255, "Creating fallback CeedQFunction\n");
46 
47   char *source_path_with_name = "";
48   if (qf->source_path) {
49     size_t path_len = strlen(qf->source_path),
50            name_len = strlen(qf->kernel_name);
51     ierr = CeedCalloc(path_len + name_len + 2, &source_path_with_name);
52     CeedChk(ierr);
53     memcpy(source_path_with_name, qf->source_path, path_len);
54     memcpy(&source_path_with_name[path_len], ":", 1);
55     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
56   } else {
57     ierr = CeedCalloc(1, &source_path_with_name); CeedChk(ierr);
58   }
59 
60   ierr = CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length,
61                                      qf->function, source_path_with_name,
62                                      qf_fallback); CeedChk(ierr);
63   {
64     CeedQFunctionContext ctx;
65 
66     ierr = CeedQFunctionGetContext(qf, &ctx); CeedChk(ierr);
67     ierr = CeedQFunctionSetContext(*qf_fallback, ctx); CeedChk(ierr);
68   }
69   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
70     ierr = CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name,
71                                  qf->input_fields[i]->size,
72                                  qf->input_fields[i]->eval_mode); CeedChk(ierr);
73   }
74   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
75     ierr = CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name,
76                                   qf->output_fields[i]->size,
77                                   qf->output_fields[i]->eval_mode); CeedChk(ierr);
78   }
79   ierr = CeedFree(&source_path_with_name); CeedChk(ierr);
80 
81   return CEED_ERROR_SUCCESS;
82 }
83 
84 /**
85   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced
86          CeedOperator functionality
87 
88   @param op  CeedOperator to create fallback for
89 
90   @return An error code: 0 - success, otherwise - failure
91 
92   @ref Developer
93 **/
94 static int CeedOperatorCreateFallback(CeedOperator op) {
95   int ierr;
96   Ceed ceed_fallback;
97 
98   // Check not already created
99   if (op->op_fallback) return CEED_ERROR_SUCCESS;
100 
101   // Fallback Ceed
102   ierr = CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback); CeedChk(ierr);
103   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
104 
105   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
106   CeedDebug256(op->ceed, 255, "Creating fallback CeedOperator\n");
107 
108   // Clone Op
109   CeedOperator op_fallback;
110   if (op->is_composite) {
111     ierr = CeedCompositeOperatorCreate(ceed_fallback, &op_fallback);
112     CeedChk(ierr);
113     for (CeedInt i = 0; i < op->num_suboperators; i++) {
114       CeedOperator op_sub_fallback;
115 
116       ierr = CeedOperatorGetFallback(op->sub_operators[i], &op_sub_fallback);
117       CeedChk(ierr);
118       ierr = CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback); CeedChk(ierr);
119     }
120   } else {
121     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
122     ierr = CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback);
123     CeedChk(ierr);
124     ierr = CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback);
125     CeedChk(ierr);
126     ierr = CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback);
127     CeedChk(ierr);
128     ierr = CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback,
129                               dqfT_fallback, &op_fallback); CeedChk(ierr);
130     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
131       ierr = CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name,
132                                   op->input_fields[i]->elem_restr,
133                                   op->input_fields[i]->basis,
134                                   op->input_fields[i]->vec); CeedChk(ierr);
135     }
136     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
137       ierr = CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name,
138                                   op->output_fields[i]->elem_restr,
139                                   op->output_fields[i]->basis,
140                                   op->output_fields[i]->vec); CeedChk(ierr);
141     }
142     ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled,
143            &op_fallback->qf_assembled); CeedChk(ierr);
144     if (op_fallback->num_qpts == 0) {
145       ierr = CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts);
146       CeedChk(ierr);
147     }
148     // Cleanup
149     ierr = CeedQFunctionDestroy(&qf_fallback); CeedChk(ierr);
150     ierr = CeedQFunctionDestroy(&dqf_fallback); CeedChk(ierr);
151     ierr = CeedQFunctionDestroy(&dqfT_fallback); CeedChk(ierr);
152   }
153   ierr = CeedOperatorSetName(op_fallback, op->name); CeedChk(ierr);
154   ierr = CeedOperatorCheckReady(op_fallback); CeedChk(ierr);
155   op->op_fallback = op_fallback;
156 
157   return CEED_ERROR_SUCCESS;
158 }
159 
160 /**
161   @brief Retreive fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
162 
163   @param[in] op            CeedOperator to retrieve fallback for
164   @param[out] op_fallback  Fallback CeedOperator
165 
166   @return An error code: 0 - success, otherwise - failure
167 
168   @ref Developer
169 **/
170 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
171   int ierr;
172 
173   // Create if needed
174   if (!op->op_fallback) {
175     ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
176   }
177   if (op->op_fallback) {
178     bool is_debug;
179 
180     ierr = CeedIsDebug(op->ceed, &is_debug); CeedChk(ierr);
181     if (is_debug) {
182       Ceed ceed_fallback;
183       const char *resource, *resource_fallback;
184 
185       ierr = CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback); CeedChk(ierr);
186       ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
187       ierr = CeedGetResource(ceed_fallback, &resource_fallback); CeedChk(ierr);
188 
189       CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
190       CeedDebug256(op->ceed, 255,
191                    "Falling back from %s operator at address %ld to %s operator at address %ld\n",
192                    resource, op, resource_fallback, op->op_fallback);
193     }
194   }
195   *op_fallback = op->op_fallback;
196 
197   return CEED_ERROR_SUCCESS;
198 }
199 
200 /**
201   @brief Select correct basis matrix pointer based on CeedEvalMode
202 
203   @param[in] eval_mode   Current basis evaluation mode
204   @param[in] identity    Pointer to identity matrix
205   @param[in] interp      Pointer to interpolation matrix
206   @param[in] grad        Pointer to gradient matrix
207   @param[out] basis_ptr  Basis pointer to set
208 
209   @ref Developer
210 **/
211 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode,
212     const CeedScalar *identity, const CeedScalar *interp,
213     const CeedScalar *grad, const CeedScalar **basis_ptr) {
214   switch (eval_mode) {
215   case CEED_EVAL_NONE:
216     *basis_ptr = identity;
217     break;
218   case CEED_EVAL_INTERP:
219     *basis_ptr = interp;
220     break;
221   case CEED_EVAL_GRAD:
222     *basis_ptr = grad;
223     break;
224   case CEED_EVAL_WEIGHT:
225   case CEED_EVAL_DIV:
226   case CEED_EVAL_CURL:
227     break; // Caught by QF Assembly
228   }
229 }
230 
231 /**
232   @brief Create point block restriction for active operator field
233 
234   @param[in] rstr              Original CeedElemRestriction for active field
235   @param[out] pointblock_rstr  Address of the variable where the newly created
236                                  CeedElemRestriction will be stored
237 
238   @return An error code: 0 - success, otherwise - failure
239 
240   @ref Developer
241 **/
242 static int CeedOperatorCreateActivePointBlockRestriction(
243   CeedElemRestriction rstr,
244   CeedElemRestriction *pointblock_rstr) {
245   int ierr;
246   Ceed ceed;
247   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr);
248   const CeedInt *offsets;
249   ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets);
250   CeedChk(ierr);
251 
252   // Expand offsets
253   CeedInt num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
254   CeedSize l_size;
255   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
256   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
257   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
258   ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr);
259   ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr);
260   CeedInt shift = num_comp;
261   if (comp_stride != 1)
262     shift *= num_comp;
263   ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets);
264   CeedChk(ierr);
265   for (CeedInt i = 0; i < num_elem*elem_size; i++) {
266     pointblock_offsets[i] = offsets[i]*shift;
267   }
268 
269   // Create new restriction
270   ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp,
271                                    1, l_size * num_comp, CEED_MEM_HOST,
272                                    CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr);
273   CeedChk(ierr);
274 
275   // Cleanup
276   ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr);
277 
278   return CEED_ERROR_SUCCESS;
279 }
280 
281 /**
282   @brief Core logic for assembling operator diagonal or point block diagonal
283 
284   @param[in] op             CeedOperator to assemble point block diagonal
285   @param[in] request        Address of CeedRequest for non-blocking completion, else
286                               CEED_REQUEST_IMMEDIATE
287   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
288   @param[out] assembled     CeedVector to store assembled diagonal
289 
290   @return An error code: 0 - success, otherwise - failure
291 
292   @ref Developer
293 **/
294 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op,
295     CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
296   int ierr;
297   Ceed ceed;
298   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
299 
300   // Assemble QFunction
301   CeedQFunction qf;
302   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
303   CeedInt num_input_fields, num_output_fields;
304   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
305   CeedChk(ierr);
306   CeedVector assembled_qf;
307   CeedElemRestriction rstr;
308   ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op,  &assembled_qf,
309          &rstr, request); CeedChk(ierr);
310   CeedInt layout[3];
311   ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr);
312   ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr);
313   CeedScalar max_norm = 0;
314   ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
315 
316   // Determine active input basis
317   CeedOperatorField *op_fields;
318   CeedQFunctionField *qf_fields;
319   ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr);
320   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
321   CeedInt num_eval_mode_in = 0, num_comp, dim = 1;
322   CeedEvalMode *eval_mode_in = NULL;
323   CeedBasis basis_in = NULL;
324   CeedElemRestriction rstr_in = NULL;
325   for (CeedInt i=0; i<num_input_fields; i++) {
326     CeedVector vec;
327     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
328     if (vec == CEED_VECTOR_ACTIVE) {
329       CeedElemRestriction rstr;
330       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr);
331       ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr);
332       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
333       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
334       if (rstr_in && rstr_in != rstr)
335         // LCOV_EXCL_START
336         return CeedError(ceed, CEED_ERROR_BACKEND,
337                          "Multi-field non-composite operator diagonal assembly not supported");
338       // LCOV_EXCL_STOP
339       rstr_in = rstr;
340       CeedEvalMode eval_mode;
341       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
342       switch (eval_mode) {
343       case CEED_EVAL_NONE:
344       case CEED_EVAL_INTERP:
345         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
346         eval_mode_in[num_eval_mode_in] = eval_mode;
347         num_eval_mode_in += 1;
348         break;
349       case CEED_EVAL_GRAD:
350         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
351         for (CeedInt d=0; d<dim; d++)
352           eval_mode_in[num_eval_mode_in+d] = eval_mode;
353         num_eval_mode_in += dim;
354         break;
355       case CEED_EVAL_WEIGHT:
356       case CEED_EVAL_DIV:
357       case CEED_EVAL_CURL:
358         break; // Caught by QF Assembly
359       }
360     }
361   }
362 
363   // Determine active output basis
364   ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr);
365   ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr);
366   CeedInt num_eval_mode_out = 0;
367   CeedEvalMode *eval_mode_out = NULL;
368   CeedBasis basis_out = NULL;
369   CeedElemRestriction rstr_out = NULL;
370   for (CeedInt i=0; i<num_output_fields; i++) {
371     CeedVector vec;
372     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
373     if (vec == CEED_VECTOR_ACTIVE) {
374       CeedElemRestriction rstr;
375       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr);
376       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
377       if (rstr_out && rstr_out != rstr)
378         // LCOV_EXCL_START
379         return CeedError(ceed, CEED_ERROR_BACKEND,
380                          "Multi-field non-composite operator diagonal assembly not supported");
381       // LCOV_EXCL_STOP
382       rstr_out = rstr;
383       CeedEvalMode eval_mode;
384       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
385       switch (eval_mode) {
386       case CEED_EVAL_NONE:
387       case CEED_EVAL_INTERP:
388         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
389         eval_mode_out[num_eval_mode_out] = eval_mode;
390         num_eval_mode_out += 1;
391         break;
392       case CEED_EVAL_GRAD:
393         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
394         for (CeedInt d=0; d<dim; d++)
395           eval_mode_out[num_eval_mode_out+d] = eval_mode;
396         num_eval_mode_out += dim;
397         break;
398       case CEED_EVAL_WEIGHT:
399       case CEED_EVAL_DIV:
400       case CEED_EVAL_CURL:
401         break; // Caught by QF Assembly
402       }
403     }
404   }
405 
406   // Assemble point block diagonal restriction, if needed
407   CeedElemRestriction diag_rstr = rstr_out;
408   if (is_pointblock) {
409     ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr);
410     CeedChk(ierr);
411   }
412 
413   // Create diagonal vector
414   CeedVector elem_diag;
415   ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag);
416   CeedChk(ierr);
417 
418   // Assemble element operator diagonals
419   CeedScalar *elem_diag_array;
420   const CeedScalar *assembled_qf_array;
421   ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr);
422   ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array);
423   CeedChk(ierr);
424   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
425   CeedChk(ierr);
426   CeedInt num_elem, num_nodes, num_qpts;
427   ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr);
428   ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr);
429   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
430   // Basis matrices
431   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
432   CeedScalar *identity = NULL;
433   bool evalNone = false;
434   for (CeedInt i=0; i<num_eval_mode_in; i++)
435     evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE);
436   for (CeedInt i=0; i<num_eval_mode_out; i++)
437     evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE);
438   if (evalNone) {
439     ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr);
440     for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++)
441       identity[i*num_nodes+i] = 1.0;
442   }
443   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
444   ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr);
445   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
446   ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr);
447   // Compute the diagonal of B^T D B
448   // Each element
449   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
450   for (CeedInt e=0; e<num_elem; e++) {
451     CeedInt d_out = -1;
452     // Each basis eval mode pair
453     for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) {
454       const CeedScalar *bt = NULL;
455       if (eval_mode_out[e_out] == CEED_EVAL_GRAD)
456         d_out += 1;
457       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out,
458                                   &grad_out[d_out*num_qpts*num_nodes], &bt);
459       CeedInt d_in = -1;
460       for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) {
461         const CeedScalar *b = NULL;
462         if (eval_mode_in[e_in] == CEED_EVAL_GRAD)
463           d_in += 1;
464         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in,
465                                     &grad_in[d_in*num_qpts*num_nodes], &b);
466         // Each component
467         for (CeedInt c_out=0; c_out<num_comp; c_out++)
468           // Each qpoint/node pair
469           for (CeedInt q=0; q<num_qpts; q++)
470             if (is_pointblock) {
471               // Point Block Diagonal
472               for (CeedInt c_in=0; c_in<num_comp; c_in++) {
473                 const CeedScalar qf_value =
474                   assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)*
475                                                      num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
476                 if (fabs(qf_value) > qf_value_bound)
477                   for (CeedInt n=0; n<num_nodes; n++)
478                     elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] +=
479                       bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
480               }
481             } else {
482               // Diagonal Only
483               const CeedScalar qf_value =
484                 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)*
485                                                    num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
486               if (fabs(qf_value) > qf_value_bound)
487                 for (CeedInt n=0; n<num_nodes; n++)
488                   elem_diag_array[(e*num_comp+c_out)*num_nodes+n] +=
489                     bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
490             }
491       }
492     }
493   }
494   ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr);
495   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
496   CeedChk(ierr);
497 
498   // Assemble local operator diagonal
499   ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag,
500                                   assembled, request); CeedChk(ierr);
501 
502   // Cleanup
503   if (is_pointblock) {
504     ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr);
505   }
506   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
507   ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr);
508   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
509   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
510   ierr = CeedFree(&identity); CeedChk(ierr);
511 
512   return CEED_ERROR_SUCCESS;
513 }
514 
515 /**
516   @brief Core logic for assembling composite operator diagonal
517 
518   @param[in] op             CeedOperator to assemble point block diagonal
519   @param[in] request        Address of CeedRequest for non-blocking completion, else
520                             CEED_REQUEST_IMMEDIATE
521   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
522   @param[out] assembled     CeedVector to store assembled diagonal
523 
524   @return An error code: 0 - success, otherwise - failure
525 
526   @ref Developer
527 **/
528 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(
529   CeedOperator op, CeedRequest *request, const bool is_pointblock,
530   CeedVector assembled) {
531   int ierr;
532   CeedInt num_sub;
533   CeedOperator *suboperators;
534   ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr);
535   ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr);
536   for (CeedInt i = 0; i < num_sub; i++) {
537     if (is_pointblock) {
538       ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i],
539              assembled, request); CeedChk(ierr);
540     } else {
541       ierr = CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled,
542              request); CeedChk(ierr);
543     }
544   }
545   return CEED_ERROR_SUCCESS;
546 }
547 
548 /**
549   @brief Build nonzero pattern for non-composite operator
550 
551   Users should generally use CeedOperatorLinearAssembleSymbolic()
552 
553   @param[in] op      CeedOperator to assemble nonzero pattern
554   @param[in] offset  Offset for number of entries
555   @param[out] rows   Row number for each entry
556   @param[out] cols   Column number for each entry
557 
558   @return An error code: 0 - success, otherwise - failure
559 
560   @ref Developer
561 **/
562 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset,
563     CeedInt *rows, CeedInt *cols) {
564   int ierr;
565   Ceed ceed = op->ceed;
566   if (op->is_composite)
567     // LCOV_EXCL_START
568     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
569                      "Composite operator not supported");
570   // LCOV_EXCL_STOP
571 
572   CeedSize num_nodes;
573   ierr = CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL); CeedChk(ierr);
574   CeedElemRestriction rstr_in;
575   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr);
576   CeedInt num_elem, elem_size, num_comp;
577   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
578   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
579   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
580   CeedInt layout_er[3];
581   ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr);
582 
583   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
584 
585   // Determine elem_dof relation
586   CeedVector index_vec;
587   ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr);
588   CeedScalar *array;
589   ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr);
590   for (CeedInt i = 0; i < num_nodes; ++i) {
591     array[i] = i;
592   }
593   ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr);
594   CeedVector elem_dof;
595   ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof);
596   CeedChk(ierr);
597   ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr);
598   CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec,
599                            elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
600   const CeedScalar *elem_dof_a;
601   ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a);
602   CeedChk(ierr);
603   ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr);
604 
605   // Determine i, j locations for element matrices
606   CeedInt count = 0;
607   for (CeedInt e = 0; e < num_elem; ++e) {
608     for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) {
609       for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) {
610         for (CeedInt i = 0; i < elem_size; ++i) {
611           for (CeedInt j = 0; j < elem_size; ++j) {
612             const CeedInt elem_dof_index_row = (i)*layout_er[0] +
613                                                (comp_out)*layout_er[1] + e*layout_er[2];
614             const CeedInt elem_dof_index_col = (j)*layout_er[0] +
615                                                (comp_in)*layout_er[1] + e*layout_er[2];
616 
617             const CeedInt row = elem_dof_a[elem_dof_index_row];
618             const CeedInt col = elem_dof_a[elem_dof_index_col];
619 
620             rows[offset + count] = row;
621             cols[offset + count] = col;
622             count++;
623           }
624         }
625       }
626     }
627   }
628   if (count != local_num_entries)
629     // LCOV_EXCL_START
630     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
631   // LCOV_EXCL_STOP
632   ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr);
633   ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr);
634 
635   return CEED_ERROR_SUCCESS;
636 }
637 
638 /**
639   @brief Assemble nonzero entries for non-composite operator
640 
641   Users should generally use CeedOperatorLinearAssemble()
642 
643   @param[in] op       CeedOperator to assemble
644   @param[in] offset   Offest for number of entries
645   @param[out] values  Values to assemble into matrix
646 
647   @return An error code: 0 - success, otherwise - failure
648 
649   @ref Developer
650 **/
651 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
652                                       CeedVector values) {
653   int ierr;
654   Ceed ceed = op->ceed;
655   if (op->is_composite)
656     // LCOV_EXCL_START
657     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
658                      "Composite operator not supported");
659   // LCOV_EXCL_STOP
660   if (op->num_elem == 0) return CEED_ERROR_SUCCESS;
661 
662   if (op->LinearAssembleSingle) {
663     // Backend version
664     ierr = op->LinearAssembleSingle(op, offset, values); CeedChk(ierr);
665     return CEED_ERROR_SUCCESS;
666   } else {
667     // Operator fallback
668     CeedOperator op_fallback;
669 
670     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
671     if (op_fallback) {
672       ierr = CeedSingleOperatorAssemble(op_fallback, offset, values);
673       CeedChk(ierr);
674       return CEED_ERROR_SUCCESS;
675     }
676   }
677 
678   // Assemble QFunction
679   CeedQFunction qf;
680   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
681   CeedVector assembled_qf;
682   CeedElemRestriction rstr_q;
683   ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(
684            op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
685 
686   CeedSize qf_length;
687   ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr);
688 
689   CeedInt num_input_fields, num_output_fields;
690   CeedOperatorField *input_fields;
691   CeedOperatorField *output_fields;
692   ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields,
693                                &num_output_fields, &output_fields); CeedChk(ierr);
694 
695   // Determine active input basis
696   CeedQFunctionField *qf_fields;
697   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
698   CeedInt num_eval_mode_in = 0, dim = 1;
699   CeedEvalMode *eval_mode_in = NULL;
700   CeedBasis basis_in = NULL;
701   CeedElemRestriction rstr_in = NULL;
702   for (CeedInt i=0; i<num_input_fields; i++) {
703     CeedVector vec;
704     ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr);
705     if (vec == CEED_VECTOR_ACTIVE) {
706       ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in);
707       CeedChk(ierr);
708       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
709       ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in);
710       CeedChk(ierr);
711       CeedEvalMode eval_mode;
712       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
713       CeedChk(ierr);
714       switch (eval_mode) {
715       case CEED_EVAL_NONE:
716       case CEED_EVAL_INTERP:
717         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
718         eval_mode_in[num_eval_mode_in] = eval_mode;
719         num_eval_mode_in += 1;
720         break;
721       case CEED_EVAL_GRAD:
722         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
723         for (CeedInt d=0; d<dim; d++) {
724           eval_mode_in[num_eval_mode_in+d] = eval_mode;
725         }
726         num_eval_mode_in += dim;
727         break;
728       case CEED_EVAL_WEIGHT:
729       case CEED_EVAL_DIV:
730       case CEED_EVAL_CURL:
731         break; // Caught by QF Assembly
732       }
733     }
734   }
735 
736   // Determine active output basis
737   ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr);
738   CeedInt num_eval_mode_out = 0;
739   CeedEvalMode *eval_mode_out = NULL;
740   CeedBasis basis_out = NULL;
741   CeedElemRestriction rstr_out = NULL;
742   for (CeedInt i=0; i<num_output_fields; i++) {
743     CeedVector vec;
744     ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr);
745     if (vec == CEED_VECTOR_ACTIVE) {
746       ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out);
747       CeedChk(ierr);
748       ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out);
749       CeedChk(ierr);
750       CeedEvalMode eval_mode;
751       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
752       CeedChk(ierr);
753       switch (eval_mode) {
754       case CEED_EVAL_NONE:
755       case CEED_EVAL_INTERP:
756         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
757         eval_mode_out[num_eval_mode_out] = eval_mode;
758         num_eval_mode_out += 1;
759         break;
760       case CEED_EVAL_GRAD:
761         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
762         for (CeedInt d=0; d<dim; d++) {
763           eval_mode_out[num_eval_mode_out+d] = eval_mode;
764         }
765         num_eval_mode_out += dim;
766         break;
767       case CEED_EVAL_WEIGHT:
768       case CEED_EVAL_DIV:
769       case CEED_EVAL_CURL:
770         break; // Caught by QF Assembly
771       }
772     }
773   }
774 
775   if (num_eval_mode_in == 0 || num_eval_mode_out == 0)
776     // LCOV_EXCL_START
777     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
778                      "Cannot assemble operator with out inputs/outputs");
779   // LCOV_EXCL_STOP
780 
781   CeedInt num_elem, elem_size, num_qpts, num_comp;
782   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
783   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
784   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
785   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
786 
787   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
788 
789   // loop over elements and put in data structure
790   const CeedScalar *interp_in, *grad_in;
791   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
792   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
793 
794   const CeedScalar *assembled_qf_array;
795   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
796   CeedChk(ierr);
797 
798   CeedInt layout_qf[3];
799   ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr);
800   ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr);
801 
802   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
803   CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size];
804   CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size];
805   CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in *
806                                      num_qpts]; // logically 3-tensor
807   CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in];
808   CeedScalar elem_mat[elem_size * elem_size];
809   CeedInt count = 0;
810   CeedScalar *vals;
811   ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr);
812   for (CeedInt e = 0; e < num_elem; ++e) {
813     for (CeedInt comp_in = 0; comp_in < num_comp; ++comp_in) {
814       for (CeedInt comp_out = 0; comp_out < num_comp; ++comp_out) {
815         for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) {
816           B_mat_in[ell] = 0.0;
817         }
818         for (CeedInt ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) {
819           B_mat_out[ell] = 0.0;
820         }
821         // Store block-diagonal D matrix as collection of small dense blocks
822         for (CeedInt ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts;
823              ++ell) {
824           D_mat[ell] = 0.0;
825         }
826         // form element matrix itself (for each block component)
827         for (CeedInt ell = 0; ell < elem_size*elem_size; ++ell) {
828           elem_mat[ell] = 0.0;
829         }
830         for (CeedInt q = 0; q < num_qpts; ++q) {
831           for (CeedInt n = 0; n < elem_size; ++n) {
832             CeedInt d_in = -1;
833             for (CeedInt e_in = 0; e_in < num_eval_mode_in; ++e_in) {
834               const CeedInt qq = num_eval_mode_in*q;
835               if (eval_mode_in[e_in] == CEED_EVAL_INTERP) {
836                 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n];
837               } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) {
838                 d_in += 1;
839                 B_mat_in[(qq+e_in)*elem_size + n] +=
840                   grad_in[(d_in*num_qpts+q) * elem_size + n];
841               } else {
842                 // LCOV_EXCL_START
843                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
844                 // LCOV_EXCL_STOP
845               }
846             }
847             CeedInt d_out = -1;
848             for (CeedInt e_out = 0; e_out < num_eval_mode_out; ++e_out) {
849               const CeedInt qq = num_eval_mode_out*q;
850               if (eval_mode_out[e_out] == CEED_EVAL_INTERP) {
851                 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n];
852               } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) {
853                 d_out += 1;
854                 B_mat_out[(qq+e_out)*elem_size + n] +=
855                   grad_in[(d_out*num_qpts+q) * elem_size + n];
856               } else {
857                 // LCOV_EXCL_START
858                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
859                 // LCOV_EXCL_STOP
860               }
861             }
862           }
863           for (CeedInt ei = 0; ei < num_eval_mode_out; ++ei) {
864             for (CeedInt ej = 0; ej < num_eval_mode_in; ++ej) {
865               const CeedInt eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)
866                                               *num_comp
867                                               +comp_out;
868               const CeedInt index = q*layout_qf[0] + eval_mode_index*layout_qf[1] +
869                                     e*layout_qf[2];
870               D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index];
871             }
872           }
873         }
874         // Compute B^T*D
875         for (CeedInt ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) {
876           BTD[ell] = 0.0;
877         }
878         for (CeedInt j = 0; j<elem_size; ++j) {
879           for (CeedInt q = 0; q<num_qpts; ++q) {
880             const CeedInt qq = num_eval_mode_out*q;
881             for (CeedInt ei = 0; ei < num_eval_mode_in; ++ei) {
882               for (CeedInt ej = 0; ej < num_eval_mode_out; ++ej) {
883                 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] +=
884                   B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q];
885               }
886             }
887           }
888         }
889 
890         ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size,
891                                   elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr);
892 
893         // put element matrix in coordinate data structure
894         for (CeedInt i = 0; i < elem_size; ++i) {
895           for (CeedInt j = 0; j < elem_size; ++j) {
896             vals[offset + count] = elem_mat[i*elem_size + j];
897             count++;
898           }
899         }
900       }
901     }
902   }
903   if (count != local_num_entries)
904     // LCOV_EXCL_START
905     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
906   // LCOV_EXCL_STOP
907   ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr);
908 
909   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
910   CeedChk(ierr);
911   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
912   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
913   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
914 
915   return CEED_ERROR_SUCCESS;
916 }
917 
918 /**
919   @brief Count number of entries for assembled CeedOperator
920 
921   @param[in] op            CeedOperator to assemble
922   @param[out] num_entries  Number of entries in assembled representation
923 
924   @return An error code: 0 - success, otherwise - failure
925 
926   @ref Utility
927 **/
928 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op,
929     CeedInt *num_entries) {
930   int ierr;
931   CeedElemRestriction rstr;
932   CeedInt num_elem, elem_size, num_comp;
933 
934   if (op->is_composite)
935     // LCOV_EXCL_START
936     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
937                      "Composite operator not supported");
938   // LCOV_EXCL_STOP
939   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr);
940   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
941   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
942   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
943   *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
944 
945   return CEED_ERROR_SUCCESS;
946 }
947 
948 /**
949   @brief Common code for creating a multigrid coarse operator and level
950            transfer operators for a CeedOperator
951 
952   @param[in] op_fine       Fine grid operator
953   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
954   @param[in] rstr_coarse   Coarse grid restriction
955   @param[in] basis_coarse  Coarse grid active vector basis
956   @param[in] basis_c_to_f  Basis for coarse to fine interpolation
957   @param[out] op_coarse    Coarse grid operator
958   @param[out] op_prolong   Coarse to fine operator
959   @param[out] op_restrict  Fine to coarse operator
960 
961   @return An error code: 0 - success, otherwise - failure
962 
963   @ref Developer
964 **/
965 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
966     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
967     CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
968     CeedOperator *op_restrict) {
969   int ierr;
970   Ceed ceed;
971   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
972 
973   // Check for composite operator
974   bool is_composite;
975   ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr);
976   if (is_composite)
977     // LCOV_EXCL_START
978     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
979                      "Automatic multigrid setup for composite operators not supported");
980   // LCOV_EXCL_STOP
981 
982   // Coarse Grid
983   ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT,
984                             op_coarse); CeedChk(ierr);
985   CeedElemRestriction rstr_fine = NULL;
986   // -- Clone input fields
987   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
988     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
989       rstr_fine = op_fine->input_fields[i]->elem_restr;
990       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
991                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
992       CeedChk(ierr);
993     } else {
994       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
995                                   op_fine->input_fields[i]->elem_restr,
996                                   op_fine->input_fields[i]->basis,
997                                   op_fine->input_fields[i]->vec); CeedChk(ierr);
998     }
999   }
1000   // -- Clone output fields
1001   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
1002     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
1003       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
1004                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
1005       CeedChk(ierr);
1006     } else {
1007       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
1008                                   op_fine->output_fields[i]->elem_restr,
1009                                   op_fine->output_fields[i]->basis,
1010                                   op_fine->output_fields[i]->vec); CeedChk(ierr);
1011     }
1012   }
1013   // -- Clone QFunctionAssemblyData
1014   ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled,
1015          &(*op_coarse)->qf_assembled); CeedChk(ierr);
1016 
1017   // Multiplicity vector
1018   CeedVector mult_vec, mult_e_vec;
1019   ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec);
1020   CeedChk(ierr);
1021   ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr);
1022   ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine,
1023                                   mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
1024   ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr);
1025   ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec,
1026                                   CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
1027   ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr);
1028   ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr);
1029 
1030   // Restriction
1031   CeedInt num_comp;
1032   ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr);
1033   CeedQFunction qf_restrict;
1034   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict);
1035   CeedChk(ierr);
1036   CeedInt *num_comp_r_data;
1037   ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr);
1038   num_comp_r_data[0] = num_comp;
1039   CeedQFunctionContext ctx_r;
1040   ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr);
1041   ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER,
1042                                      sizeof(*num_comp_r_data), num_comp_r_data);
1043   CeedChk(ierr);
1044   ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr);
1045   ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr);
1046   ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE);
1047   CeedChk(ierr);
1048   ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE);
1049   CeedChk(ierr);
1050   ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp,
1051                                 CEED_EVAL_INTERP); CeedChk(ierr);
1052   ierr = CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp); CeedChk(ierr);
1053 
1054   ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE,
1055                             CEED_QFUNCTION_NONE, op_restrict);
1056   CeedChk(ierr);
1057   ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine,
1058                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
1059   CeedChk(ierr);
1060   ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine,
1061                               CEED_BASIS_COLLOCATED, mult_vec);
1062   CeedChk(ierr);
1063   ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f,
1064                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
1065 
1066   // Prolongation
1067   CeedQFunction qf_prolong;
1068   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong);
1069   CeedChk(ierr);
1070   CeedInt *num_comp_p_data;
1071   ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr);
1072   num_comp_p_data[0] = num_comp;
1073   CeedQFunctionContext ctx_p;
1074   ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr);
1075   ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER,
1076                                      sizeof(*num_comp_p_data), num_comp_p_data);
1077   CeedChk(ierr);
1078   ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr);
1079   ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr);
1080   ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP);
1081   CeedChk(ierr);
1082   ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE);
1083   CeedChk(ierr);
1084   ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE);
1085   CeedChk(ierr);
1086   ierr = CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp); CeedChk(ierr);
1087 
1088   ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE,
1089                             CEED_QFUNCTION_NONE, op_prolong);
1090   CeedChk(ierr);
1091   ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f,
1092                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
1093   ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine,
1094                               CEED_BASIS_COLLOCATED, mult_vec);
1095   CeedChk(ierr);
1096   ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine,
1097                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
1098   CeedChk(ierr);
1099 
1100   // Clone name
1101   bool has_name = op_fine->name;
1102   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
1103   ierr = CeedOperatorSetName(*op_coarse, op_fine->name); CeedChk(ierr);
1104   {
1105     char *prolongation_name;
1106     ierr = CeedCalloc(18 + name_len, &prolongation_name); CeedChk(ierr);
1107     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "",
1108             has_name ? op_fine->name : "");
1109     ierr = CeedOperatorSetName(*op_prolong, prolongation_name); CeedChk(ierr);
1110     ierr = CeedFree(&prolongation_name); CeedChk(ierr);
1111   }
1112   {
1113     char *restriction_name;
1114     ierr = CeedCalloc(17 + name_len, &restriction_name); CeedChk(ierr);
1115     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "",
1116             has_name ? op_fine->name : "");
1117     ierr = CeedOperatorSetName(*op_restrict, restriction_name); CeedChk(ierr);
1118     ierr = CeedFree(&restriction_name); CeedChk(ierr);
1119   }
1120 
1121   // Cleanup
1122   ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr);
1123   ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr);
1124   ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr);
1125   ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr);
1126 
1127   return CEED_ERROR_SUCCESS;
1128 }
1129 
1130 /**
1131   @brief Build 1D mass matrix and Laplacian with perturbation
1132 
1133   @param[in] interp_1d    Interpolation matrix in one dimension
1134   @param[in] grad_1d      Gradient matrix in one dimension
1135   @param[in] q_weight_1d  Quadrature weights in one dimension
1136   @param[in] P_1d         Number of basis nodes in one dimension
1137   @param[in] Q_1d         Number of quadrature points in one dimension
1138   @param[in] dim          Dimension of basis
1139   @param[out] mass        Assembled mass matrix in one dimension
1140   @param[out] laplace     Assembled perturbed Laplacian in one dimension
1141 
1142   @return An error code: 0 - success, otherwise - failure
1143 
1144   @ref Developer
1145 **/
1146 CeedPragmaOptimizeOff
1147 static int CeedBuildMassLaplace(const CeedScalar *interp_1d,
1148                                 const CeedScalar *grad_1d,
1149                                 const CeedScalar *q_weight_1d, CeedInt P_1d,
1150                                 CeedInt Q_1d, CeedInt dim,
1151                                 CeedScalar *mass, CeedScalar *laplace) {
1152 
1153   for (CeedInt i=0; i<P_1d; i++)
1154     for (CeedInt j=0; j<P_1d; j++) {
1155       CeedScalar sum = 0.0;
1156       for (CeedInt k=0; k<Q_1d; k++)
1157         sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j];
1158       mass[i+j*P_1d] = sum;
1159     }
1160   // -- Laplacian
1161   for (CeedInt i=0; i<P_1d; i++)
1162     for (CeedInt j=0; j<P_1d; j++) {
1163       CeedScalar sum = 0.0;
1164       for (CeedInt k=0; k<Q_1d; k++)
1165         sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j];
1166       laplace[i+j*P_1d] = sum;
1167     }
1168   CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4;
1169   for (CeedInt i=0; i<P_1d; i++)
1170     laplace[i+P_1d*i] += perturbation;
1171   return CEED_ERROR_SUCCESS;
1172 }
1173 CeedPragmaOptimizeOn
1174 
1175 /// @}
1176 
1177 /// ----------------------------------------------------------------------------
1178 /// CeedOperator Backend API
1179 /// ----------------------------------------------------------------------------
1180 /// @addtogroup CeedOperatorBackend
1181 /// @{
1182 
1183 /**
1184   @brief Create object holding CeedQFunction assembly data for CeedOperator
1185 
1186   @param[in] ceed  A Ceed object where the CeedQFunctionAssemblyData will be created
1187   @param[out] data Address of the variable where the newly created
1188                      CeedQFunctionAssemblyData will be stored
1189 
1190   @return An error code: 0 - success, otherwise - failure
1191 
1192   @ref Backend
1193 **/
1194 int CeedQFunctionAssemblyDataCreate(Ceed ceed,
1195                                     CeedQFunctionAssemblyData *data) {
1196   int ierr;
1197 
1198   ierr = CeedCalloc(1, data); CeedChk(ierr);
1199   (*data)->ref_count = 1;
1200   (*data)->ceed = ceed;
1201   ierr = CeedReference(ceed); CeedChk(ierr);
1202 
1203   return CEED_ERROR_SUCCESS;
1204 }
1205 
1206 /**
1207   @brief Increment the reference counter for a CeedQFunctionAssemblyData
1208 
1209   @param data  CeedQFunctionAssemblyData to increment the reference counter
1210 
1211   @return An error code: 0 - success, otherwise - failure
1212 
1213   @ref Backend
1214 **/
1215 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1216   data->ref_count++;
1217   return CEED_ERROR_SUCCESS;
1218 }
1219 
1220 /**
1221   @brief Set re-use of CeedQFunctionAssemblyData
1222 
1223   @param data       CeedQFunctionAssemblyData to mark for reuse
1224   @param reuse_data Boolean flag indicating data re-use
1225 
1226   @return An error code: 0 - success, otherwise - failure
1227 
1228   @ref Backend
1229 **/
1230 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data,
1231                                       bool reuse_data) {
1232   data->reuse_data = reuse_data;
1233   data->needs_data_update = true;
1234   return CEED_ERROR_SUCCESS;
1235 }
1236 
1237 /**
1238   @brief Mark QFunctionAssemblyData as stale
1239 
1240   @param data              CeedQFunctionAssemblyData to mark as stale
1241   @param needs_data_update Boolean flag indicating if update is needed or completed
1242 
1243   @return An error code: 0 - success, otherwise - failure
1244 
1245   @ref Backend
1246 **/
1247 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data,
1248     bool needs_data_update) {
1249   data->needs_data_update = needs_data_update;
1250   return CEED_ERROR_SUCCESS;
1251 }
1252 
1253 /**
1254   @brief Determine if QFunctionAssemblyData needs update
1255 
1256   @param[in] data              CeedQFunctionAssemblyData to mark as stale
1257   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1258 
1259   @return An error code: 0 - success, otherwise - failure
1260 
1261   @ref Backend
1262 **/
1263 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data,
1264     bool *is_update_needed) {
1265   *is_update_needed = !data->reuse_data || data->needs_data_update;
1266   return CEED_ERROR_SUCCESS;
1267 }
1268 
1269 /**
1270   @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should
1271            be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`;
1272            Note: If `*data_copy` is non-NULL, then it is assumed that
1273            `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This
1274            CeedQFunctionAssemblyData will be destroyed if `*data_copy` is
1275            the only reference to this CeedQFunctionAssemblyData.
1276 
1277   @param data            CeedQFunctionAssemblyData to copy reference to
1278   @param[out] data_copy  Variable to store copied reference
1279 
1280   @return An error code: 0 - success, otherwise - failure
1281 
1282   @ref Backend
1283 **/
1284 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data,
1285     CeedQFunctionAssemblyData *data_copy) {
1286   int ierr;
1287 
1288   ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr);
1289   ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr);
1290   *data_copy = data;
1291   return CEED_ERROR_SUCCESS;
1292 }
1293 
1294 /**
1295   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1296 
1297   @param[in] data      CeedQFunctionAssemblyData to retreive status
1298   @param[out] is_setup Boolean flag for setup status
1299 
1300   @return An error code: 0 - success, otherwise - failure
1301 
1302   @ref Backend
1303 **/
1304 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data,
1305                                      bool *is_setup) {
1306   *is_setup = data->is_setup;
1307   return CEED_ERROR_SUCCESS;
1308 }
1309 
1310 /**
1311   @brief Set internal objects for CeedQFunctionAssemblyData
1312 
1313   @param[in] data  CeedQFunctionAssemblyData to set objects
1314   @param[in] vec   CeedVector to store assembled CeedQFunction at quadrature points
1315   @param[in] rstr  CeedElemRestriction for CeedVector containing assembled CeedQFunction
1316 
1317   @return An error code: 0 - success, otherwise - failure
1318 
1319   @ref Backend
1320 **/
1321 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data,
1322                                         CeedVector vec, CeedElemRestriction rstr) {
1323   int ierr;
1324 
1325   ierr = CeedVectorReferenceCopy(vec, &data->vec); CeedChk(ierr);
1326   ierr = CeedElemRestrictionReferenceCopy(rstr, &data->rstr); CeedChk(ierr);
1327 
1328   data->is_setup = true;
1329   return CEED_ERROR_SUCCESS;
1330 }
1331 
1332 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data,
1333                                         CeedVector *vec, CeedElemRestriction *rstr) {
1334   int ierr;
1335 
1336   if (!data->is_setup)
1337     // LCOV_EXCL_START
1338     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE,
1339                      "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1340   // LCOV_EXCL_STOP
1341 
1342   ierr = CeedVectorReferenceCopy(data->vec, vec); CeedChk(ierr);
1343   ierr = CeedElemRestrictionReferenceCopy(data->rstr, rstr); CeedChk(ierr);
1344 
1345   return CEED_ERROR_SUCCESS;
1346 }
1347 
1348 /**
1349   @brief Destroy CeedQFunctionAssemblyData
1350 
1351   @param[out] data  CeedQFunctionAssemblyData to destroy
1352 
1353   @return An error code: 0 - success, otherwise - failure
1354 
1355   @ref Backend
1356 **/
1357 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1358   int ierr;
1359 
1360   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1361 
1362   ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr);
1363   ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr);
1364   ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr);
1365 
1366   ierr = CeedFree(data); CeedChk(ierr);
1367   return CEED_ERROR_SUCCESS;
1368 }
1369 
1370 /// @}
1371 
1372 /// ----------------------------------------------------------------------------
1373 /// CeedOperator Public API
1374 /// ----------------------------------------------------------------------------
1375 /// @addtogroup CeedOperatorUser
1376 /// @{
1377 
1378 /**
1379   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1380 
1381   This returns a CeedVector containing a matrix at each quadrature point
1382     providing the action of the CeedQFunction associated with the CeedOperator.
1383     The vector 'assembled' is of shape
1384       [num_elements, num_input_fields, num_output_fields, num_quad_points]
1385     and contains column-major matrices representing the action of the
1386     CeedQFunction for a corresponding quadrature point on an element. Inputs and
1387     outputs are in the order provided by the user when adding CeedOperator fields.
1388     For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and
1389     'v', provided in that order, would result in an assembled QFunction that
1390     consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting
1391     on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1392 
1393   Note: Calling this function asserts that setup is complete
1394           and sets the CeedOperator as immutable.
1395 
1396   @param op              CeedOperator to assemble CeedQFunction
1397   @param[out] assembled  CeedVector to store assembled CeedQFunction at
1398                            quadrature points
1399   @param[out] rstr       CeedElemRestriction for CeedVector containing assembled
1400                            CeedQFunction
1401   @param request         Address of CeedRequest for non-blocking completion, else
1402                            @ref CEED_REQUEST_IMMEDIATE
1403 
1404   @return An error code: 0 - success, otherwise - failure
1405 
1406   @ref User
1407 **/
1408 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled,
1409                                         CeedElemRestriction *rstr,
1410                                         CeedRequest *request) {
1411   int ierr;
1412   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1413 
1414   if (op->LinearAssembleQFunction) {
1415     // Backend version
1416     ierr = op->LinearAssembleQFunction(op, assembled, rstr, request);
1417     CeedChk(ierr);
1418   } else {
1419     // Operator fallback
1420     CeedOperator op_fallback;
1421 
1422     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1423     if (op_fallback) {
1424       ierr = CeedOperatorLinearAssembleQFunction(op_fallback, assembled,
1425              rstr, request); CeedChk(ierr);
1426     } else {
1427       // LCOV_EXCL_START
1428       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
1429                        "Backend does not support CeedOperatorLinearAssembleQFunction");
1430       // LCOV_EXCL_STOP
1431     }
1432   }
1433   return CEED_ERROR_SUCCESS;
1434 }
1435 
1436 /**
1437   @brief Assemble CeedQFunction and store result internall. Return copied
1438            references of stored data to the caller. Caller is responsible for
1439            ownership and destruction of the copied references. See also
1440            @ref CeedOperatorLinearAssembleQFunction
1441 
1442   @param op              CeedOperator to assemble CeedQFunction
1443   @param assembled       CeedVector to store assembled CeedQFunction at
1444                            quadrature points
1445   @param rstr            CeedElemRestriction for CeedVector containing assembled
1446                            CeedQFunction
1447   @param request         Address of CeedRequest for non-blocking completion, else
1448                            @ref CEED_REQUEST_IMMEDIATE
1449 
1450   @return An error code: 0 - success, otherwise - failure
1451 
1452   @ref User
1453 **/
1454 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op,
1455     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1456   int ierr;
1457   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1458 
1459   if (op->LinearAssembleQFunctionUpdate) {
1460     // Backend version
1461     bool qf_assembled_is_setup;
1462     CeedVector assembled_vec = NULL;
1463     CeedElemRestriction assembled_rstr = NULL;
1464 
1465     ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled,
1466                                             &qf_assembled_is_setup); CeedChk(ierr);
1467     if (qf_assembled_is_setup) {
1468       bool update_needed;
1469 
1470       ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec,
1471              &assembled_rstr); CeedChk(ierr);
1472       ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled,
1473              &update_needed); CeedChk(ierr);
1474       if (update_needed) {
1475         ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr,
1476                request); CeedChk(ierr);
1477       }
1478     } else {
1479       ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr,
1480                                          request); CeedChk(ierr);
1481       ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec,
1482              assembled_rstr); CeedChk(ierr);
1483     }
1484     ierr = CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false);
1485     CeedChk(ierr);
1486 
1487     // Copy reference from internally held copy
1488     *assembled = NULL;
1489     *rstr = NULL;
1490     ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr);
1491     ierr = CeedVectorDestroy(&assembled_vec); CeedChk(ierr);
1492     ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr);
1493     ierr = CeedElemRestrictionDestroy(&assembled_rstr); CeedChk(ierr);
1494   } else {
1495     // Operator fallback
1496     CeedOperator op_fallback;
1497 
1498     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1499     if (op_fallback) {
1500       ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled,
1501              rstr, request); CeedChk(ierr);
1502     } else {
1503       // LCOV_EXCL_START
1504       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
1505                        "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1506       // LCOV_EXCL_STOP
1507     }
1508   }
1509 
1510   return CEED_ERROR_SUCCESS;
1511 }
1512 
1513 /**
1514   @brief Assemble the diagonal of a square linear CeedOperator
1515 
1516   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1517 
1518   Note: Currently only non-composite CeedOperators with a single field and
1519           composite CeedOperators with single field sub-operators are supported.
1520 
1521   Note: Calling this function asserts that setup is complete
1522           and sets the CeedOperator as immutable.
1523 
1524   @param op              CeedOperator to assemble CeedQFunction
1525   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1526   @param request         Address of CeedRequest for non-blocking completion, else
1527                            @ref CEED_REQUEST_IMMEDIATE
1528 
1529   @return An error code: 0 - success, otherwise - failure
1530 
1531   @ref User
1532 **/
1533 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled,
1534                                        CeedRequest *request) {
1535   int ierr;
1536   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1537 
1538   CeedSize input_size = 0, output_size = 0;
1539   ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size);
1540   CeedChk(ierr);
1541   if (input_size != output_size)
1542     // LCOV_EXCL_START
1543     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1544   // LCOV_EXCL_STOP
1545 
1546   if (op->LinearAssembleDiagonal) {
1547     // Backend version
1548     ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr);
1549     return CEED_ERROR_SUCCESS;
1550   } else if (op->LinearAssembleAddDiagonal) {
1551     // Backend version with zeroing first
1552     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1553     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1554     return CEED_ERROR_SUCCESS;
1555   } else {
1556     // Operator fallback
1557     CeedOperator op_fallback;
1558 
1559     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1560     if (op_fallback) {
1561       ierr = CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request);
1562       CeedChk(ierr);
1563       return CEED_ERROR_SUCCESS;
1564     }
1565   }
1566   // Default interface implementation
1567   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1568   ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request);
1569   CeedChk(ierr);
1570 
1571   return CEED_ERROR_SUCCESS;
1572 }
1573 
1574 /**
1575   @brief Assemble the diagonal of a square linear CeedOperator
1576 
1577   This sums into a CeedVector the diagonal of a linear CeedOperator.
1578 
1579   Note: Currently only non-composite CeedOperators with a single field and
1580           composite CeedOperators with single field sub-operators are supported.
1581 
1582   Note: Calling this function asserts that setup is complete
1583           and sets the CeedOperator as immutable.
1584 
1585   @param op              CeedOperator to assemble CeedQFunction
1586   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1587   @param request         Address of CeedRequest for non-blocking completion, else
1588                            @ref CEED_REQUEST_IMMEDIATE
1589 
1590   @return An error code: 0 - success, otherwise - failure
1591 
1592   @ref User
1593 **/
1594 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled,
1595     CeedRequest *request) {
1596   int ierr;
1597   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1598 
1599   CeedSize input_size = 0, output_size = 0;
1600   ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size);
1601   CeedChk(ierr);
1602   if (input_size != output_size)
1603     // LCOV_EXCL_START
1604     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1605   // LCOV_EXCL_STOP
1606 
1607   if (op->LinearAssembleAddDiagonal) {
1608     // Backend version
1609     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1610     return CEED_ERROR_SUCCESS;
1611   } else {
1612     // Operator fallback
1613     CeedOperator op_fallback;
1614 
1615     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1616     if (op_fallback) {
1617       ierr = CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request);
1618       CeedChk(ierr);
1619       return CEED_ERROR_SUCCESS;
1620     }
1621   }
1622   // Default interface implementation
1623   bool is_composite;
1624   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1625   if (is_composite) {
1626     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1627            false, assembled); CeedChk(ierr);
1628   } else {
1629     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled);
1630     CeedChk(ierr);
1631   }
1632 
1633   return CEED_ERROR_SUCCESS;
1634 }
1635 
1636 /**
1637   @brief Assemble the point block diagonal of a square linear CeedOperator
1638 
1639   This overwrites a CeedVector with the point block diagonal of a linear
1640     CeedOperator.
1641 
1642   Note: Currently only non-composite CeedOperators with a single field and
1643           composite CeedOperators with single field sub-operators are supported.
1644 
1645   Note: Calling this function asserts that setup is complete
1646           and sets the CeedOperator as immutable.
1647 
1648   @param op              CeedOperator to assemble CeedQFunction
1649   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1650                            diagonal, provided in row-major form with an
1651                            @a num_comp * @a num_comp block at each node. The dimensions
1652                            of this vector are derived from the active vector
1653                            for the CeedOperator. The array has shape
1654                            [nodes, component out, component in].
1655   @param request         Address of CeedRequest for non-blocking completion, else
1656                            CEED_REQUEST_IMMEDIATE
1657 
1658   @return An error code: 0 - success, otherwise - failure
1659 
1660   @ref User
1661 **/
1662 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op,
1663     CeedVector assembled, CeedRequest *request) {
1664   int ierr;
1665   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1666 
1667   CeedSize input_size = 0, output_size = 0;
1668   ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size);
1669   CeedChk(ierr);
1670   if (input_size != output_size)
1671     // LCOV_EXCL_START
1672     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1673   // LCOV_EXCL_STOP
1674 
1675   if (op->LinearAssemblePointBlockDiagonal) {
1676     // Backend version
1677     ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request);
1678     CeedChk(ierr);
1679     return CEED_ERROR_SUCCESS;
1680   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1681     // Backend version with zeroing first
1682     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1683     ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled,
1684            request); CeedChk(ierr);
1685     return CEED_ERROR_SUCCESS;
1686   } else {
1687     // Operator fallback
1688     CeedOperator op_fallback;
1689 
1690     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1691     if (op_fallback) {
1692       ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled,
1693              request); CeedChk(ierr);
1694       return CEED_ERROR_SUCCESS;
1695     }
1696   }
1697   // Default interface implementation
1698   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1699   ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request);
1700   CeedChk(ierr);
1701 
1702   return CEED_ERROR_SUCCESS;
1703 }
1704 
1705 /**
1706   @brief Assemble the point block diagonal of a square linear CeedOperator
1707 
1708   This sums into a CeedVector with the point block diagonal of a linear
1709     CeedOperator.
1710 
1711   Note: Currently only non-composite CeedOperators with a single field and
1712           composite CeedOperators with single field sub-operators are supported.
1713 
1714   Note: Calling this function asserts that setup is complete
1715           and sets the CeedOperator as immutable.
1716 
1717   @param op              CeedOperator to assemble CeedQFunction
1718   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1719                            diagonal, provided in row-major form with an
1720                            @a num_comp * @a num_comp block at each node. The dimensions
1721                            of this vector are derived from the active vector
1722                            for the CeedOperator. The array has shape
1723                            [nodes, component out, component in].
1724   @param request         Address of CeedRequest for non-blocking completion, else
1725                            CEED_REQUEST_IMMEDIATE
1726 
1727   @return An error code: 0 - success, otherwise - failure
1728 
1729   @ref User
1730 **/
1731 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op,
1732     CeedVector assembled, CeedRequest *request) {
1733   int ierr;
1734   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1735 
1736   CeedSize input_size = 0, output_size = 0;
1737   ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size);
1738   CeedChk(ierr);
1739   if (input_size != output_size)
1740     // LCOV_EXCL_START
1741     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1742   // LCOV_EXCL_STOP
1743 
1744   if (op->LinearAssembleAddPointBlockDiagonal) {
1745     // Backend version
1746     ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request);
1747     CeedChk(ierr);
1748     return CEED_ERROR_SUCCESS;
1749   } else {
1750     // Operator fallback
1751     CeedOperator op_fallback;
1752 
1753     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1754     if (op_fallback) {
1755       ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled,
1756              request); CeedChk(ierr);
1757       return CEED_ERROR_SUCCESS;
1758     }
1759   }
1760   // Default interface implemenation
1761   bool is_composite;
1762   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1763   if (is_composite) {
1764     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1765            true, assembled); CeedChk(ierr);
1766   } else {
1767     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled);
1768     CeedChk(ierr);
1769   }
1770 
1771   return CEED_ERROR_SUCCESS;
1772 }
1773 
1774 /**
1775    @brief Fully assemble the nonzero pattern of a linear operator.
1776 
1777    Expected to be used in conjunction with CeedOperatorLinearAssemble()
1778 
1779    The assembly routines use coordinate format, with num_entries tuples of the
1780    form (i, j, value) which indicate that value should be added to the matrix
1781    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1782    This function returns the number of entries and their (i, j) locations,
1783    while CeedOperatorLinearAssemble() provides the values in the same
1784    ordering.
1785 
1786    This will generally be slow unless your operator is low-order.
1787 
1788   Note: Calling this function asserts that setup is complete
1789           and sets the CeedOperator as immutable.
1790 
1791    @param[in]  op           CeedOperator to assemble
1792    @param[out] num_entries  Number of entries in coordinate nonzero pattern
1793    @param[out] rows         Row number for each entry
1794    @param[out] cols         Column number for each entry
1795 
1796    @ref User
1797 **/
1798 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries,
1799                                        CeedInt **rows, CeedInt **cols) {
1800   int ierr;
1801   CeedInt num_suboperators, single_entries;
1802   CeedOperator *sub_operators;
1803   bool is_composite;
1804   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1805 
1806   if (op->LinearAssembleSymbolic) {
1807     // Backend version
1808     ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr);
1809     return CEED_ERROR_SUCCESS;
1810   } else {
1811     // Operator fallback
1812     CeedOperator op_fallback;
1813 
1814     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1815     if (op_fallback) {
1816       ierr = CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols);
1817       CeedChk(ierr);
1818       return CEED_ERROR_SUCCESS;
1819     }
1820   }
1821 
1822   // Default interface implementation
1823 
1824   // count entries and allocate rows, cols arrays
1825   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1826   *num_entries = 0;
1827   if (is_composite) {
1828     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1829     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1830     for (CeedInt k = 0; k < num_suboperators; ++k) {
1831       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1832              &single_entries); CeedChk(ierr);
1833       *num_entries += single_entries;
1834     }
1835   } else {
1836     ierr = CeedSingleOperatorAssemblyCountEntries(op,
1837            &single_entries); CeedChk(ierr);
1838     *num_entries += single_entries;
1839   }
1840   ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr);
1841   ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr);
1842 
1843   // assemble nonzero locations
1844   CeedInt offset = 0;
1845   if (is_composite) {
1846     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1847     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1848     for (CeedInt k = 0; k < num_suboperators; ++k) {
1849       ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows,
1850              *cols); CeedChk(ierr);
1851       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1852              &single_entries);
1853       CeedChk(ierr);
1854       offset += single_entries;
1855     }
1856   } else {
1857     ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols);
1858     CeedChk(ierr);
1859   }
1860 
1861   return CEED_ERROR_SUCCESS;
1862 }
1863 
1864 /**
1865    @brief Fully assemble the nonzero entries of a linear operator.
1866 
1867    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic()
1868 
1869    The assembly routines use coordinate format, with num_entries tuples of the
1870    form (i, j, value) which indicate that value should be added to the matrix
1871    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1872    This function returns the values of the nonzero entries to be added, their
1873    (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1874 
1875    This will generally be slow unless your operator is low-order.
1876 
1877   Note: Calling this function asserts that setup is complete
1878           and sets the CeedOperator as immutable.
1879 
1880    @param[in]  op      CeedOperator to assemble
1881    @param[out] values  Values to assemble into matrix
1882 
1883    @ref User
1884 **/
1885 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1886   int ierr;
1887   CeedInt num_suboperators, single_entries = 0;
1888   CeedOperator *sub_operators;
1889   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1890 
1891   if (op->LinearAssemble) {
1892     // Backend version
1893     ierr = op->LinearAssemble(op, values); CeedChk(ierr);
1894     return CEED_ERROR_SUCCESS;
1895   } else {
1896     // Operator fallback
1897     CeedOperator op_fallback;
1898 
1899     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1900     if (op_fallback) {
1901       ierr = CeedOperatorLinearAssemble(op_fallback, values); CeedChk(ierr);
1902       return CEED_ERROR_SUCCESS;
1903     }
1904   }
1905 
1906   // Default interface implementation
1907   bool is_composite;
1908   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1909 
1910   CeedInt offset = 0;
1911   if (is_composite) {
1912     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1913     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1914     for (CeedInt k = 0; k < num_suboperators; k++) {
1915       ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values);
1916       CeedChk(ierr);
1917       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1918              &single_entries);
1919       CeedChk(ierr);
1920       offset += single_entries;
1921     }
1922   } else {
1923     ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr);
1924   }
1925 
1926   return CEED_ERROR_SUCCESS;
1927 }
1928 
1929 /**
1930   @brief Create a multigrid coarse operator and level transfer operators
1931            for a CeedOperator, creating the prolongation basis from the
1932            fine and coarse grid interpolation
1933 
1934   Note: Calling this function asserts that setup is complete
1935           and sets the CeedOperator as immutable.
1936 
1937   @param[in] op_fine       Fine grid operator
1938   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
1939   @param[in] rstr_coarse   Coarse grid restriction
1940   @param[in] basis_coarse  Coarse grid active vector basis
1941   @param[out] op_coarse    Coarse grid operator
1942   @param[out] op_prolong   Coarse to fine operator
1943   @param[out] op_restrict  Fine to coarse operator
1944 
1945   @return An error code: 0 - success, otherwise - failure
1946 
1947   @ref User
1948 **/
1949 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine,
1950                                      CeedVector p_mult_fine,
1951                                      CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1952                                      CeedOperator *op_coarse, CeedOperator *op_prolong,
1953                                      CeedOperator *op_restrict) {
1954   int ierr;
1955   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
1956   Ceed ceed;
1957   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1958 
1959   // Check for compatible quadrature spaces
1960   CeedBasis basis_fine;
1961   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1962   CeedInt Q_f, Q_c;
1963   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1964   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1965   if (Q_f != Q_c)
1966     // LCOV_EXCL_START
1967     return CeedError(ceed, CEED_ERROR_DIMENSION,
1968                      "Bases must have compatible quadrature spaces");
1969   // LCOV_EXCL_STOP
1970 
1971   // Coarse to fine basis
1972   CeedInt P_f, P_c, Q = Q_f;
1973   bool is_tensor_f, is_tensor_c;
1974   ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr);
1975   ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr);
1976   CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau;
1977   if (is_tensor_f && is_tensor_c) {
1978     ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr);
1979     ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr);
1980     ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr);
1981   } else if (!is_tensor_f && !is_tensor_c) {
1982     ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr);
1983     ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr);
1984   } else {
1985     // LCOV_EXCL_START
1986     return CeedError(ceed, CEED_ERROR_MINOR,
1987                      "Bases must both be tensor or non-tensor");
1988     // LCOV_EXCL_STOP
1989   }
1990 
1991   ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr);
1992   ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr);
1993   ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr);
1994   ierr = CeedMalloc(Q, &tau); CeedChk(ierr);
1995   const CeedScalar *interp_f_source = NULL, *interp_c_source = NULL;
1996   if (is_tensor_f) {
1997     ierr = CeedBasisGetInterp1D(basis_fine, &interp_f_source); CeedChk(ierr);
1998     ierr = CeedBasisGetInterp1D(basis_coarse, &interp_c_source); CeedChk(ierr);
1999   } else {
2000     ierr = CeedBasisGetInterp(basis_fine, &interp_f_source); CeedChk(ierr);
2001     ierr = CeedBasisGetInterp(basis_coarse, &interp_c_source); CeedChk(ierr);
2002   }
2003   memcpy(interp_f, interp_f_source, Q*P_f*sizeof interp_f_source[0]);
2004   memcpy(interp_c, interp_c_source, Q*P_c*sizeof interp_c_source[0]);
2005 
2006   // -- QR Factorization, interp_f = Q R
2007   ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr);
2008 
2009   // -- Apply Qtranspose, interp_c = Qtranspose interp_c
2010   ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE,
2011                                Q, P_c, P_f, P_c, 1); CeedChk(ierr);
2012 
2013   // -- Apply Rinv, interp_c_to_f = Rinv interp_c
2014   for (CeedInt j=0; j<P_c; j++) { // Column j
2015     interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1];
2016     for (CeedInt i=P_f-2; i>=0; i--) { // Row i
2017       interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i];
2018       for (CeedInt k=i+1; k<P_f; k++)
2019         interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k];
2020       interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i];
2021     }
2022   }
2023   ierr = CeedFree(&tau); CeedChk(ierr);
2024   ierr = CeedFree(&interp_c); CeedChk(ierr);
2025   ierr = CeedFree(&interp_f); CeedChk(ierr);
2026 
2027   // Complete with interp_c_to_f versions of code
2028   if (is_tensor_f) {
2029     ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine,
2030            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
2031     CeedChk(ierr);
2032   } else {
2033     ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine,
2034            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
2035     CeedChk(ierr);
2036   }
2037 
2038   // Cleanup
2039   ierr = CeedFree(&interp_c_to_f); CeedChk(ierr);
2040   return CEED_ERROR_SUCCESS;
2041 }
2042 
2043 /**
2044   @brief Create a multigrid coarse operator and level transfer operators
2045            for a CeedOperator with a tensor basis for the active basis
2046 
2047   Note: Calling this function asserts that setup is complete
2048           and sets the CeedOperator as immutable.
2049 
2050   @param[in] op_fine        Fine grid operator
2051   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
2052   @param[in] rstr_coarse    Coarse grid restriction
2053   @param[in] basis_coarse   Coarse grid active vector basis
2054   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
2055   @param[out] op_coarse     Coarse grid operator
2056   @param[out] op_prolong    Coarse to fine operator
2057   @param[out] op_restrict   Fine to coarse operator
2058 
2059   @return An error code: 0 - success, otherwise - failure
2060 
2061   @ref User
2062 **/
2063 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine,
2064     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2065     const CeedScalar *interp_c_to_f, CeedOperator *op_coarse,
2066     CeedOperator *op_prolong, CeedOperator *op_restrict) {
2067   int ierr;
2068   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
2069   Ceed ceed;
2070   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
2071 
2072   // Check for compatible quadrature spaces
2073   CeedBasis basis_fine;
2074   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
2075   CeedInt Q_f, Q_c;
2076   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
2077   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
2078   if (Q_f != Q_c)
2079     // LCOV_EXCL_START
2080     return CeedError(ceed, CEED_ERROR_DIMENSION,
2081                      "Bases must have compatible quadrature spaces");
2082   // LCOV_EXCL_STOP
2083 
2084   // Coarse to fine basis
2085   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2086   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
2087   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
2088   ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr);
2089   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
2090   CeedChk(ierr);
2091   P_1d_c = dim == 1 ? num_nodes_c :
2092            dim == 2 ? sqrt(num_nodes_c) :
2093            cbrt(num_nodes_c);
2094   CeedScalar *q_ref, *q_weight, *grad;
2095   ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr);
2096   ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr);
2097   ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr);
2098   CeedBasis basis_c_to_f;
2099   ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f,
2100                                  interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
2101   CeedChk(ierr);
2102   ierr = CeedFree(&q_ref); CeedChk(ierr);
2103   ierr = CeedFree(&q_weight); CeedChk(ierr);
2104   ierr = CeedFree(&grad); CeedChk(ierr);
2105 
2106   // Core code
2107   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
2108                                           basis_coarse, basis_c_to_f, op_coarse,
2109                                           op_prolong, op_restrict);
2110   CeedChk(ierr);
2111   return CEED_ERROR_SUCCESS;
2112 }
2113 
2114 /**
2115   @brief Create a multigrid coarse operator and level transfer operators
2116            for a CeedOperator with a non-tensor basis for the active vector
2117 
2118   Note: Calling this function asserts that setup is complete
2119           and sets the CeedOperator as immutable.
2120 
2121   @param[in] op_fine        Fine grid operator
2122   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
2123   @param[in] rstr_coarse    Coarse grid restriction
2124   @param[in] basis_coarse   Coarse grid active vector basis
2125   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
2126   @param[out] op_coarse     Coarse grid operator
2127   @param[out] op_prolong    Coarse to fine operator
2128   @param[out] op_restrict   Fine to coarse operator
2129 
2130   @return An error code: 0 - success, otherwise - failure
2131 
2132   @ref User
2133 **/
2134 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine,
2135                                        CeedVector p_mult_fine,
2136                                        CeedElemRestriction rstr_coarse,
2137                                        CeedBasis basis_coarse,
2138                                        const CeedScalar *interp_c_to_f,
2139                                        CeedOperator *op_coarse,
2140                                        CeedOperator *op_prolong,
2141                                        CeedOperator *op_restrict) {
2142   int ierr;
2143   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
2144   Ceed ceed;
2145   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
2146 
2147   // Check for compatible quadrature spaces
2148   CeedBasis basis_fine;
2149   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
2150   CeedInt Q_f, Q_c;
2151   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
2152   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
2153   if (Q_f != Q_c)
2154     // LCOV_EXCL_START
2155     return CeedError(ceed, CEED_ERROR_DIMENSION,
2156                      "Bases must have compatible quadrature spaces");
2157   // LCOV_EXCL_STOP
2158 
2159   // Coarse to fine basis
2160   CeedElemTopology topo;
2161   ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr);
2162   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2163   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
2164   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
2165   ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr);
2166   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
2167   CeedChk(ierr);
2168   CeedScalar *q_ref, *q_weight, *grad;
2169   ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr);
2170   ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr);
2171   ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr);
2172   CeedBasis basis_c_to_f;
2173   ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f,
2174                            interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
2175   CeedChk(ierr);
2176   ierr = CeedFree(&q_ref); CeedChk(ierr);
2177   ierr = CeedFree(&q_weight); CeedChk(ierr);
2178   ierr = CeedFree(&grad); CeedChk(ierr);
2179 
2180   // Core code
2181   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
2182                                           basis_coarse, basis_c_to_f, op_coarse,
2183                                           op_prolong, op_restrict);
2184   CeedChk(ierr);
2185   return CEED_ERROR_SUCCESS;
2186 }
2187 
2188 /**
2189   @brief Build a FDM based approximate inverse for each element for a
2190            CeedOperator
2191 
2192   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization
2193     Method based approximate inverse. This function obtains the simultaneous
2194     diagonalization for the 1D mass and Laplacian operators,
2195       M = V^T V, K = V^T S V.
2196     The assembled QFunction is used to modify the eigenvalues from simultaneous
2197     diagonalization and obtain an approximate inverse of the form
2198       V^T S^hat V. The CeedOperator must be linear and non-composite. The
2199     associated CeedQFunction must therefore also be linear.
2200 
2201   Note: Calling this function asserts that setup is complete
2202           and sets the CeedOperator as immutable.
2203 
2204   @param op            CeedOperator to create element inverses
2205   @param[out] fdm_inv  CeedOperator to apply the action of a FDM based inverse
2206                          for each element
2207   @param request       Address of CeedRequest for non-blocking completion, else
2208                          @ref CEED_REQUEST_IMMEDIATE
2209 
2210   @return An error code: 0 - success, otherwise - failure
2211 
2212   @ref User
2213 **/
2214 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv,
2215                                         CeedRequest *request) {
2216   int ierr;
2217   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
2218 
2219   if (op->CreateFDMElementInverse) {
2220     // Backend version
2221     ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr);
2222     return CEED_ERROR_SUCCESS;
2223   } else {
2224     // Operator fallback
2225     CeedOperator op_fallback;
2226 
2227     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
2228     if (op_fallback) {
2229       ierr = CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request);
2230       CeedChk(ierr);
2231       return CEED_ERROR_SUCCESS;
2232     }
2233   }
2234 
2235   // Default interface implementation
2236   Ceed ceed, ceed_parent;
2237   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
2238   ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr);
2239   ceed_parent = ceed_parent ? ceed_parent : ceed;
2240   CeedQFunction qf;
2241   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
2242 
2243   // Determine active input basis
2244   bool interp = false, grad = false;
2245   CeedBasis basis = NULL;
2246   CeedElemRestriction rstr = NULL;
2247   CeedOperatorField *op_fields;
2248   CeedQFunctionField *qf_fields;
2249   CeedInt num_input_fields;
2250   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL);
2251   CeedChk(ierr);
2252   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
2253   for (CeedInt i=0; i<num_input_fields; i++) {
2254     CeedVector vec;
2255     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
2256     if (vec == CEED_VECTOR_ACTIVE) {
2257       CeedEvalMode eval_mode;
2258       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
2259       interp = interp || eval_mode == CEED_EVAL_INTERP;
2260       grad = grad || eval_mode == CEED_EVAL_GRAD;
2261       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr);
2262       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
2263     }
2264   }
2265   if (!basis)
2266     // LCOV_EXCL_START
2267     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2268   // LCOV_EXCL_STOP
2269   CeedSize l_size = 1;
2270   CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2271   ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr);
2272   ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr);
2273   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr);
2274   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
2275   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
2276   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
2277   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
2278   ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr);
2279 
2280   // Build and diagonalize 1D Mass and Laplacian
2281   bool tensor_basis;
2282   ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr);
2283   if (!tensor_basis)
2284     // LCOV_EXCL_START
2285     return CeedError(ceed, CEED_ERROR_BACKEND,
2286                      "FDMElementInverse only supported for tensor "
2287                      "bases");
2288   // LCOV_EXCL_STOP
2289   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2290   ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr);
2291   ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr);
2292   ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr);
2293   ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr);
2294   ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr);
2295   // -- Build matrices
2296   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2297   ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr);
2298   ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr);
2299   ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr);
2300   ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim,
2301                               mass, laplace); CeedChk(ierr);
2302 
2303   // -- Diagonalize
2304   ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d);
2305   CeedChk(ierr);
2306   ierr = CeedFree(&mass); CeedChk(ierr);
2307   ierr = CeedFree(&laplace); CeedChk(ierr);
2308   for (CeedInt i=0; i<P_1d; i++)
2309     for (CeedInt j=0; j<P_1d; j++)
2310       fdm_interp[i+j*P_1d] = x[j+i*P_1d];
2311   ierr = CeedFree(&x); CeedChk(ierr);
2312 
2313   // Assemble QFunction
2314   CeedVector assembled;
2315   CeedElemRestriction rstr_qf;
2316   ierr =  CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled,
2317           &rstr_qf, request); CeedChk(ierr);
2318   CeedInt layout[3];
2319   ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr);
2320   ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr);
2321   CeedScalar max_norm = 0;
2322   ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
2323 
2324   // Calculate element averages
2325   CeedInt num_modes = (interp?1:0) + (grad?dim:0);
2326   CeedScalar *elem_avg;
2327   const CeedScalar *assembled_array, *q_weight_array;
2328   CeedVector q_weight;
2329   ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr);
2330   ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
2331                         CEED_VECTOR_NONE, q_weight); CeedChk(ierr);
2332   ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array);
2333   CeedChk(ierr);
2334   ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array);
2335   CeedChk(ierr);
2336   ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr);
2337   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
2338   for (CeedInt e=0; e<num_elem; e++) {
2339     CeedInt count = 0;
2340     for (CeedInt q=0; q<num_qpts; q++)
2341       for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++)
2342         if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) >
2343             qf_value_bound) {
2344           elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] /
2345                          q_weight_array[q];
2346           count++;
2347         }
2348     if (count) {
2349       elem_avg[e] /= count;
2350     } else {
2351       elem_avg[e] = 1.0;
2352     }
2353   }
2354   ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr);
2355   ierr = CeedVectorDestroy(&assembled); CeedChk(ierr);
2356   ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr);
2357   ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr);
2358 
2359   // Build FDM diagonal
2360   CeedVector q_data;
2361   CeedScalar *q_data_array, *fdm_diagonal;
2362   ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr);
2363   const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON;
2364   for (CeedInt c=0; c<num_comp; c++)
2365     for (CeedInt n=0; n<elem_size; n++) {
2366       if (interp)
2367         fdm_diagonal[c*elem_size + n] = 1.0;
2368       if (grad)
2369         for (CeedInt d=0; d<dim; d++) {
2370           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2371           fdm_diagonal[c*elem_size + n] += lambda[i];
2372         }
2373       if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound)
2374         fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound;
2375     }
2376   ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data);
2377   CeedChk(ierr);
2378   ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr);
2379   ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array);
2380   CeedChk(ierr);
2381   for (CeedInt e=0; e<num_elem; e++)
2382     for (CeedInt c=0; c<num_comp; c++)
2383       for (CeedInt n=0; n<elem_size; n++)
2384         q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] *
2385             fdm_diagonal[c*elem_size + n]);
2386   ierr = CeedFree(&elem_avg); CeedChk(ierr);
2387   ierr = CeedFree(&fdm_diagonal); CeedChk(ierr);
2388   ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr);
2389 
2390   // Setup FDM operator
2391   // -- Basis
2392   CeedBasis fdm_basis;
2393   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2394   ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr);
2395   ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr);
2396   ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr);
2397   ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d,
2398                                  fdm_interp, grad_dummy, q_ref_dummy,
2399                                  q_weight_dummy, &fdm_basis); CeedChk(ierr);
2400   ierr = CeedFree(&fdm_interp); CeedChk(ierr);
2401   ierr = CeedFree(&grad_dummy); CeedChk(ierr);
2402   ierr = CeedFree(&q_ref_dummy); CeedChk(ierr);
2403   ierr = CeedFree(&q_weight_dummy); CeedChk(ierr);
2404   ierr = CeedFree(&lambda); CeedChk(ierr);
2405 
2406   // -- Restriction
2407   CeedElemRestriction rstr_qd_i;
2408   CeedInt strides[3] = {1, elem_size, elem_size*num_comp};
2409   ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size,
2410                                           num_comp, num_elem*num_comp*elem_size,
2411                                           strides, &rstr_qd_i); CeedChk(ierr);
2412   // -- QFunction
2413   CeedQFunction qf_fdm;
2414   ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm);
2415   CeedChk(ierr);
2416   ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP);
2417   CeedChk(ierr);
2418   ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE);
2419   CeedChk(ierr);
2420   ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP);
2421   CeedChk(ierr);
2422   ierr = CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp); CeedChk(ierr);
2423   // -- QFunction context
2424   CeedInt *num_comp_data;
2425   ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr);
2426   num_comp_data[0] = num_comp;
2427   CeedQFunctionContext ctx_fdm;
2428   ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr);
2429   ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER,
2430                                      sizeof(*num_comp_data), num_comp_data);
2431   CeedChk(ierr);
2432   ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr);
2433   ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr);
2434   // -- Operator
2435   ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv);
2436   CeedChk(ierr);
2437   CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2438   CeedChk(ierr);
2439   CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED,
2440                        q_data); CeedChk(ierr);
2441   CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2442   CeedChk(ierr);
2443 
2444   // Cleanup
2445   ierr = CeedVectorDestroy(&q_data); CeedChk(ierr);
2446   ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr);
2447   ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr);
2448   ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr);
2449 
2450   return CEED_ERROR_SUCCESS;
2451 }
2452 
2453 /// @}
2454