xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision a2cba4be6838e1a18742bb6cef1b14b2cf87e1c8)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed/ceed.h>
9 #include <ceed/backend.h>
10 #include <ceed-impl.h>
11 #include <math.h>
12 #include <assert.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced
28          CeedOperator functionality
29 
30   @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction
31   @param[in] qf            CeedQFunction to create fallback for
32   @param[out] qf_fallback  fallback CeedQFunction
33 
34   @return An error code: 0 - success, otherwise - failure
35 
36   @ref Developer
37 **/
38 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf,
39                                        CeedQFunction *qf_fallback) {
40   int ierr;
41 
42   // Check if NULL qf passed in
43   if (!qf) return CEED_ERROR_SUCCESS;
44 
45   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
46   CeedDebug256(qf->ceed, 255, "Creating fallback CeedQFunction\n");
47 
48   char *source_path_with_name = "";
49   if (qf->source_path) {
50     size_t path_len = strlen(qf->source_path),
51            name_len = strlen(qf->kernel_name);
52     ierr = CeedCalloc(path_len + name_len + 2, &source_path_with_name);
53     CeedChk(ierr);
54     memcpy(source_path_with_name, qf->source_path, path_len);
55     memcpy(&source_path_with_name[path_len], ":", 1);
56     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
57   } else {
58     ierr = CeedCalloc(1, &source_path_with_name); CeedChk(ierr);
59   }
60 
61   ierr = CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length,
62                                      qf->function, source_path_with_name,
63                                      qf_fallback); CeedChk(ierr);
64   {
65     CeedQFunctionContext ctx;
66 
67     ierr = CeedQFunctionGetContext(qf, &ctx); CeedChk(ierr);
68     ierr = CeedQFunctionSetContext(*qf_fallback, ctx); CeedChk(ierr);
69   }
70   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
71     ierr = CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name,
72                                  qf->input_fields[i]->size,
73                                  qf->input_fields[i]->eval_mode); CeedChk(ierr);
74   }
75   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
76     ierr = CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name,
77                                   qf->output_fields[i]->size,
78                                   qf->output_fields[i]->eval_mode); CeedChk(ierr);
79   }
80   ierr = CeedFree(&source_path_with_name); CeedChk(ierr);
81 
82   return CEED_ERROR_SUCCESS;
83 }
84 
85 /**
86   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced
87          CeedOperator functionality
88 
89   @param op  CeedOperator to create fallback for
90 
91   @return An error code: 0 - success, otherwise - failure
92 
93   @ref Developer
94 **/
95 static int CeedOperatorCreateFallback(CeedOperator op) {
96   int ierr;
97   Ceed ceed_fallback;
98 
99   // Check not already created
100   if (op->op_fallback) return CEED_ERROR_SUCCESS;
101 
102   // Fallback Ceed
103   ierr = CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback); CeedChk(ierr);
104   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
105 
106   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
107   CeedDebug256(op->ceed, 255, "Creating fallback CeedOperator\n");
108 
109   // Clone Op
110   CeedOperator op_fallback;
111   if (op->is_composite) {
112     ierr = CeedCompositeOperatorCreate(ceed_fallback, &op_fallback);
113     CeedChk(ierr);
114     for (CeedInt i = 0; i < op->num_suboperators; i++) {
115       CeedOperator op_sub_fallback;
116 
117       ierr = CeedOperatorGetFallback(op->sub_operators[i], &op_sub_fallback);
118       CeedChk(ierr);
119       ierr = CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback); CeedChk(ierr);
120     }
121   } else {
122     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
123     ierr = CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback);
124     CeedChk(ierr);
125     ierr = CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback);
126     CeedChk(ierr);
127     ierr = CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback);
128     CeedChk(ierr);
129     ierr = CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback,
130                               dqfT_fallback, &op_fallback); CeedChk(ierr);
131     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
132       ierr = CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name,
133                                   op->input_fields[i]->elem_restr,
134                                   op->input_fields[i]->basis,
135                                   op->input_fields[i]->vec); CeedChk(ierr);
136     }
137     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
138       ierr = CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name,
139                                   op->output_fields[i]->elem_restr,
140                                   op->output_fields[i]->basis,
141                                   op->output_fields[i]->vec); CeedChk(ierr);
142     }
143     ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled,
144            &op_fallback->qf_assembled); CeedChk(ierr);
145     if (op_fallback->num_qpts == 0) {
146       ierr = CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts);
147       CeedChk(ierr);
148     }
149     // Cleanup
150     ierr = CeedQFunctionDestroy(&qf_fallback); CeedChk(ierr);
151     ierr = CeedQFunctionDestroy(&dqf_fallback); CeedChk(ierr);
152     ierr = CeedQFunctionDestroy(&dqfT_fallback); CeedChk(ierr);
153   }
154   ierr = CeedOperatorSetName(op_fallback, op->name); CeedChk(ierr);
155   ierr = CeedOperatorCheckReady(op_fallback); CeedChk(ierr);
156   op->op_fallback = op_fallback;
157 
158   return CEED_ERROR_SUCCESS;
159 }
160 
161 /**
162   @brief Retreive fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
163 
164   @param[in] op            CeedOperator to retrieve fallback for
165   @param[out] op_fallback  Fallback CeedOperator
166 
167   @return An error code: 0 - success, otherwise - failure
168 
169   @ref Developer
170 **/
171 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
172   int ierr;
173 
174   // Create if needed
175   if (!op->op_fallback) {
176     ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
177   }
178   if (op->op_fallback) {
179     bool is_debug;
180 
181     ierr = CeedIsDebug(op->ceed, &is_debug); CeedChk(ierr);
182     if (is_debug) {
183       Ceed ceed_fallback;
184       const char *resource, *resource_fallback;
185 
186       ierr = CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback); CeedChk(ierr);
187       ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
188       ierr = CeedGetResource(ceed_fallback, &resource_fallback); CeedChk(ierr);
189 
190       CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
191       CeedDebug256(op->ceed, 255,
192                    "Falling back from %s operator at address %ld to %s operator at address %ld\n",
193                    resource, op, resource_fallback, op->op_fallback);
194     }
195   }
196   *op_fallback = op->op_fallback;
197 
198   return CEED_ERROR_SUCCESS;
199 }
200 
201 /**
202   @brief Select correct basis matrix pointer based on CeedEvalMode
203 
204   @param[in] eval_mode   Current basis evaluation mode
205   @param[in] identity    Pointer to identity matrix
206   @param[in] interp      Pointer to interpolation matrix
207   @param[in] grad        Pointer to gradient matrix
208   @param[out] basis_ptr  Basis pointer to set
209 
210   @ref Developer
211 **/
212 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode,
213     const CeedScalar *identity, const CeedScalar *interp,
214     const CeedScalar *grad, const CeedScalar **basis_ptr) {
215   switch (eval_mode) {
216   case CEED_EVAL_NONE:
217     *basis_ptr = identity;
218     break;
219   case CEED_EVAL_INTERP:
220     *basis_ptr = interp;
221     break;
222   case CEED_EVAL_GRAD:
223     *basis_ptr = grad;
224     break;
225   case CEED_EVAL_WEIGHT:
226   case CEED_EVAL_DIV:
227   case CEED_EVAL_CURL:
228     break; // Caught by QF Assembly
229   }
230   assert(*basis_ptr != NULL);
231 }
232 
233 /**
234   @brief Create point block restriction for active operator field
235 
236   @param[in] rstr              Original CeedElemRestriction for active field
237   @param[out] pointblock_rstr  Address of the variable where the newly created
238                                  CeedElemRestriction will be stored
239 
240   @return An error code: 0 - success, otherwise - failure
241 
242   @ref Developer
243 **/
244 static int CeedOperatorCreateActivePointBlockRestriction(
245   CeedElemRestriction rstr,
246   CeedElemRestriction *pointblock_rstr) {
247   int ierr;
248   Ceed ceed;
249   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr);
250   const CeedInt *offsets;
251   ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets);
252   CeedChk(ierr);
253 
254   // Expand offsets
255   CeedInt num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
256   CeedSize l_size;
257   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
258   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
259   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
260   ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr);
261   ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr);
262   CeedInt shift = num_comp;
263   if (comp_stride != 1)
264     shift *= num_comp;
265   ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets);
266   CeedChk(ierr);
267   for (CeedInt i = 0; i < num_elem*elem_size; i++) {
268     pointblock_offsets[i] = offsets[i]*shift;
269   }
270 
271   // Create new restriction
272   ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp,
273                                    1, l_size * num_comp, CEED_MEM_HOST,
274                                    CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr);
275   CeedChk(ierr);
276 
277   // Cleanup
278   ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr);
279 
280   return CEED_ERROR_SUCCESS;
281 }
282 
283 /**
284   @brief Core logic for assembling operator diagonal or point block diagonal
285 
286   @param[in] op             CeedOperator to assemble point block diagonal
287   @param[in] request        Address of CeedRequest for non-blocking completion, else
288                               CEED_REQUEST_IMMEDIATE
289   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
290   @param[out] assembled     CeedVector to store assembled diagonal
291 
292   @return An error code: 0 - success, otherwise - failure
293 
294   @ref Developer
295 **/
296 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op,
297     CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
298   int ierr;
299   Ceed ceed;
300   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
301 
302   // Assemble QFunction
303   CeedQFunction qf;
304   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
305   CeedInt num_input_fields, num_output_fields;
306   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
307   CeedChk(ierr);
308   CeedVector assembled_qf;
309   CeedElemRestriction rstr;
310   ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op,  &assembled_qf,
311          &rstr, request); CeedChk(ierr);
312   CeedInt layout[3];
313   ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr);
314   ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr);
315 
316   // Get assembly data
317   CeedOperatorAssemblyData data;
318   ierr = CeedOperatorGetOperatorAssemblyData(op, &data); CeedChk(ierr);
319   const CeedEvalMode *eval_mode_in, *eval_mode_out;
320   CeedInt num_eval_mode_in, num_eval_mode_out;
321   ierr = CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in,
322          &eval_mode_in, &num_eval_mode_out, &eval_mode_out); CeedChk(ierr);
323   CeedBasis basis_in, basis_out;
324   ierr = CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out,
325                                           NULL); CeedChk(ierr);
326   CeedInt num_comp;
327   ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr);
328 
329   // Assemble point block diagonal restriction, if needed
330   CeedElemRestriction diag_rstr;
331   ierr = CeedOperatorGetActiveElemRestriction(op, &diag_rstr); CeedChk(ierr);
332   if (is_pointblock) {
333     CeedElemRestriction point_block_rstr;
334     ierr = CeedOperatorCreateActivePointBlockRestriction(diag_rstr,
335            &point_block_rstr);
336     CeedChk(ierr);
337     diag_rstr = point_block_rstr;
338   }
339 
340   // Create diagonal vector
341   CeedVector elem_diag;
342   ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag);
343   CeedChk(ierr);
344 
345   // Assemble element operator diagonals
346   CeedScalar *elem_diag_array;
347   const CeedScalar *assembled_qf_array;
348   ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr);
349   ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array);
350   CeedChk(ierr);
351   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
352   CeedChk(ierr);
353   CeedInt num_elem, num_nodes, num_qpts;
354   ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr);
355   ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr);
356   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
357 
358   // Basis matrices
359   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
360   CeedScalar *identity = NULL;
361   bool has_eval_none = false;
362   for (CeedInt i = 0; i < num_eval_mode_in; i++) {
363     has_eval_none = has_eval_none || (eval_mode_in[i] == CEED_EVAL_NONE);
364   }
365   for (CeedInt i = 0; i<num_eval_mode_out; i++) {
366     has_eval_none = has_eval_none || (eval_mode_out[i] == CEED_EVAL_NONE);
367   }
368   if (has_eval_none) {
369     ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr);
370     for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++)
371       identity[i*num_nodes+i] = 1.0;
372   }
373   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
374   ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr);
375   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
376   ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr);
377   // Compute the diagonal of B^T D B
378   // Each element
379   for (CeedInt e=0; e<num_elem; e++) {
380     CeedInt d_out = -1;
381     // Each basis eval mode pair
382     for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) {
383       const CeedScalar *bt = NULL;
384       if (eval_mode_out[e_out] == CEED_EVAL_GRAD)
385         d_out += 1;
386       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out,
387                                   &grad_out[d_out*num_qpts*num_nodes], &bt);
388       CeedInt d_in = -1;
389       for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) {
390         const CeedScalar *b = NULL;
391         if (eval_mode_in[e_in] == CEED_EVAL_GRAD)
392           d_in += 1;
393         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in,
394                                     &grad_in[d_in*num_qpts*num_nodes], &b);
395         // Each component
396         for (CeedInt c_out=0; c_out<num_comp; c_out++)
397           // Each qpoint/node pair
398           for (CeedInt q=0; q<num_qpts; q++)
399             if (is_pointblock) {
400               // Point Block Diagonal
401               for (CeedInt c_in=0; c_in<num_comp; c_in++) {
402                 const CeedScalar qf_value =
403                   assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)*
404                                                      num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
405                 for (CeedInt n=0; n<num_nodes; n++)
406                   elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] +=
407                     bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
408               }
409             } else {
410               // Diagonal Only
411               const CeedScalar qf_value =
412                 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)*
413                                                    num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
414               for (CeedInt n=0; n<num_nodes; n++)
415                 elem_diag_array[(e*num_comp+c_out)*num_nodes+n] +=
416                   bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
417             }
418       }
419     }
420   }
421   ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr);
422   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
423   CeedChk(ierr);
424 
425   // Assemble local operator diagonal
426   ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag,
427                                   assembled, request); CeedChk(ierr);
428 
429   // Cleanup
430   if (is_pointblock) {
431     ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr);
432   }
433   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
434   ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr);
435   ierr = CeedFree(&identity); CeedChk(ierr);
436 
437   return CEED_ERROR_SUCCESS;
438 }
439 
440 /**
441   @brief Core logic for assembling composite operator diagonal
442 
443   @param[in] op             CeedOperator to assemble point block diagonal
444   @param[in] request        Address of CeedRequest for non-blocking completion, else
445                             CEED_REQUEST_IMMEDIATE
446   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
447   @param[out] assembled     CeedVector to store assembled diagonal
448 
449   @return An error code: 0 - success, otherwise - failure
450 
451   @ref Developer
452 **/
453 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(
454   CeedOperator op, CeedRequest *request, const bool is_pointblock,
455   CeedVector assembled) {
456   int ierr;
457   CeedInt num_sub;
458   CeedOperator *suboperators;
459   ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr);
460   ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr);
461   for (CeedInt i = 0; i < num_sub; i++) {
462     if (is_pointblock) {
463       ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i],
464              assembled, request); CeedChk(ierr);
465     } else {
466       ierr = CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled,
467              request); CeedChk(ierr);
468     }
469   }
470   return CEED_ERROR_SUCCESS;
471 }
472 
473 /**
474   @brief Build nonzero pattern for non-composite operator
475 
476   Users should generally use CeedOperatorLinearAssembleSymbolic()
477 
478   @param[in] op      CeedOperator to assemble nonzero pattern
479   @param[in] offset  Offset for number of entries
480   @param[out] rows   Row number for each entry
481   @param[out] cols   Column number for each entry
482 
483   @return An error code: 0 - success, otherwise - failure
484 
485   @ref Developer
486 **/
487 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset,
488     CeedInt *rows, CeedInt *cols) {
489   int ierr;
490   Ceed ceed = op->ceed;
491   if (op->is_composite)
492     // LCOV_EXCL_START
493     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
494                      "Composite operator not supported");
495   // LCOV_EXCL_STOP
496 
497   CeedSize num_nodes;
498   ierr = CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL); CeedChk(ierr);
499   CeedElemRestriction rstr_in;
500   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr);
501   CeedInt num_elem, elem_size, num_comp;
502   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
503   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
504   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
505   CeedInt layout_er[3];
506   ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr);
507 
508   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
509 
510   // Determine elem_dof relation
511   CeedVector index_vec;
512   ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr);
513   CeedScalar *array;
514   ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr);
515   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
516   ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr);
517   CeedVector elem_dof;
518   ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof);
519   CeedChk(ierr);
520   ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr);
521   CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec,
522                            elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
523   const CeedScalar *elem_dof_a;
524   ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a);
525   CeedChk(ierr);
526   ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr);
527 
528   // Determine i, j locations for element matrices
529   CeedInt count = 0;
530   for (CeedInt e = 0; e < num_elem; e++) {
531     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
532       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
533         for (CeedInt i = 0; i < elem_size; i++) {
534           for (CeedInt j = 0; j < elem_size; j++) {
535             const CeedInt elem_dof_index_row = i*layout_er[0] +
536                                                (comp_out)*layout_er[1] + e*layout_er[2];
537             const CeedInt elem_dof_index_col = j*layout_er[0] +
538                                                comp_in*layout_er[1] + e*layout_er[2];
539 
540             const CeedInt row = elem_dof_a[elem_dof_index_row];
541             const CeedInt col = elem_dof_a[elem_dof_index_col];
542 
543             rows[offset + count] = row;
544             cols[offset + count] = col;
545             count++;
546           }
547         }
548       }
549     }
550   }
551   if (count != local_num_entries)
552     // LCOV_EXCL_START
553     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
554   // LCOV_EXCL_STOP
555   ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr);
556   ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr);
557 
558   return CEED_ERROR_SUCCESS;
559 }
560 
561 /**
562   @brief Assemble nonzero entries for non-composite operator
563 
564   Users should generally use CeedOperatorLinearAssemble()
565 
566   @param[in] op       CeedOperator to assemble
567   @param[in] offset   Offest for number of entries
568   @param[out] values  Values to assemble into matrix
569 
570   @return An error code: 0 - success, otherwise - failure
571 
572   @ref Developer
573 **/
574 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
575                                       CeedVector values) {
576   int ierr;
577   Ceed ceed = op->ceed;
578   if (op->is_composite)
579     // LCOV_EXCL_START
580     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
581                      "Composite operator not supported");
582   // LCOV_EXCL_STOP
583   if (op->num_elem == 0) return CEED_ERROR_SUCCESS;
584 
585   if (op->LinearAssembleSingle) {
586     // Backend version
587     ierr = op->LinearAssembleSingle(op, offset, values); CeedChk(ierr);
588     return CEED_ERROR_SUCCESS;
589   } else {
590     // Operator fallback
591     CeedOperator op_fallback;
592 
593     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
594     if (op_fallback) {
595       ierr = CeedSingleOperatorAssemble(op_fallback, offset, values);
596       CeedChk(ierr);
597       return CEED_ERROR_SUCCESS;
598     }
599   }
600 
601   // Assemble QFunction
602   CeedQFunction qf;
603   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
604   CeedVector assembled_qf;
605   CeedElemRestriction rstr_q;
606   ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(
607            op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
608   CeedSize qf_length;
609   ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr);
610 
611   CeedInt num_input_fields, num_output_fields;
612   CeedOperatorField *input_fields;
613   CeedOperatorField *output_fields;
614   ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields,
615                                &num_output_fields, &output_fields); CeedChk(ierr);
616 
617   // Get assembly data
618   CeedOperatorAssemblyData data;
619   ierr = CeedOperatorGetOperatorAssemblyData(op, &data); CeedChk(ierr);
620   const CeedEvalMode *eval_mode_in, *eval_mode_out;
621   CeedInt num_eval_mode_in, num_eval_mode_out;
622   ierr = CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in,
623          &eval_mode_in, &num_eval_mode_out, &eval_mode_out); CeedChk(ierr);
624   CeedBasis basis_in, basis_out;
625   ierr = CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out,
626                                           NULL); CeedChk(ierr);
627 
628   if (num_eval_mode_in == 0 || num_eval_mode_out == 0)
629     // LCOV_EXCL_START
630     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
631                      "Cannot assemble operator with out inputs/outputs");
632   // LCOV_EXCL_STOP
633 
634   CeedElemRestriction active_rstr;
635   CeedInt num_elem, elem_size, num_qpts, num_comp;
636   ierr = CeedOperatorGetActiveElemRestriction(op, &active_rstr); CeedChk(ierr);
637   ierr = CeedElemRestrictionGetNumElements(active_rstr, &num_elem); CeedChk(ierr);
638   ierr = CeedElemRestrictionGetElementSize(active_rstr, &elem_size);
639   CeedChk(ierr);
640   ierr = CeedElemRestrictionGetNumComponents(active_rstr, &num_comp);
641   CeedChk(ierr);
642   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
643 
644   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
645 
646   // loop over elements and put in data structure
647   const CeedScalar *interp_in, *grad_in;
648   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
649   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
650 
651   const CeedScalar *assembled_qf_array;
652   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
653   CeedChk(ierr);
654 
655   CeedInt layout_qf[3];
656   ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr);
657   ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr);
658 
659   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
660   const CeedScalar *B_mat_in, *B_mat_out;
661   ierr = CeedOperatorAssemblyDataGetBases(data, NULL, &B_mat_in, NULL,
662                                           &B_mat_out); CeedChk(ierr);
663   CeedScalar BTD_mat[elem_size * num_qpts*num_eval_mode_in];
664   CeedScalar elem_mat[elem_size * elem_size];
665   CeedInt count = 0;
666   CeedScalar *vals;
667   ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr);
668   for (CeedInt e = 0; e < num_elem; e++) {
669     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
670       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
671         // Compute B^T*D
672         for (CeedInt n = 0; n < elem_size; n++) {
673           for (CeedInt q = 0; q < num_qpts; q++) {
674             for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
675               const CeedInt btd_index = n*(num_qpts*num_eval_mode_in) +
676                                         (num_eval_mode_in*q + e_in);
677               CeedScalar sum = 0.0;
678               for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
679                 const CeedInt b_out_index = (num_eval_mode_out*q + e_out)*elem_size + n;
680                 const CeedInt eval_mode_index = ((e_in*num_comp+comp_in)*num_eval_mode_out
681                                                  +e_out)*num_comp + comp_out;
682                 const CeedInt qf_index = q*layout_qf[0] + eval_mode_index*layout_qf[1] +
683                                          e*layout_qf[2];
684                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
685               }
686               BTD_mat[btd_index] = sum;
687             }
688           }
689         }
690         // form element matrix itself (for each block component)
691         ierr = CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size,
692                                         elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr);
693 
694         // put element matrix in coordinate data structure
695         for (CeedInt i = 0; i < elem_size; i++) {
696           for (CeedInt j = 0; j < elem_size; j++) {
697             vals[offset + count] = elem_mat[i*elem_size + j];
698             count++;
699           }
700         }
701       }
702     }
703   }
704   if (count != local_num_entries)
705     // LCOV_EXCL_START
706     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
707   // LCOV_EXCL_STOP
708   ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr);
709 
710   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
711   CeedChk(ierr);
712   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
713 
714   return CEED_ERROR_SUCCESS;
715 }
716 
717 /**
718   @brief Count number of entries for assembled CeedOperator
719 
720   @param[in] op            CeedOperator to assemble
721   @param[out] num_entries  Number of entries in assembled representation
722 
723   @return An error code: 0 - success, otherwise - failure
724 
725   @ref Utility
726 **/
727 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op,
728     CeedInt *num_entries) {
729   int ierr;
730   CeedElemRestriction rstr;
731   CeedInt num_elem, elem_size, num_comp;
732 
733   if (op->is_composite)
734     // LCOV_EXCL_START
735     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
736                      "Composite operator not supported");
737   // LCOV_EXCL_STOP
738   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr);
739   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
740   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
741   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
742   *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
743 
744   return CEED_ERROR_SUCCESS;
745 }
746 
747 /**
748   @brief Common code for creating a multigrid coarse operator and level
749            transfer operators for a CeedOperator
750 
751   @param[in] op_fine       Fine grid operator
752   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
753   @param[in] rstr_coarse   Coarse grid restriction
754   @param[in] basis_coarse  Coarse grid active vector basis
755   @param[in] basis_c_to_f  Basis for coarse to fine interpolation
756   @param[out] op_coarse    Coarse grid operator
757   @param[out] op_prolong   Coarse to fine operator
758   @param[out] op_restrict  Fine to coarse operator
759 
760   @return An error code: 0 - success, otherwise - failure
761 
762   @ref Developer
763 **/
764 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
765     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
766     CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
767     CeedOperator *op_restrict) {
768   int ierr;
769   Ceed ceed;
770   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
771 
772   // Check for composite operator
773   bool is_composite;
774   ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr);
775   if (is_composite)
776     // LCOV_EXCL_START
777     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
778                      "Automatic multigrid setup for composite operators not supported");
779   // LCOV_EXCL_STOP
780 
781   // Coarse Grid
782   ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT,
783                             op_coarse); CeedChk(ierr);
784   CeedElemRestriction rstr_fine = NULL;
785   // -- Clone input fields
786   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
787     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
788       rstr_fine = op_fine->input_fields[i]->elem_restr;
789       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
790                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
791       CeedChk(ierr);
792     } else {
793       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
794                                   op_fine->input_fields[i]->elem_restr,
795                                   op_fine->input_fields[i]->basis,
796                                   op_fine->input_fields[i]->vec); CeedChk(ierr);
797     }
798   }
799   // -- Clone output fields
800   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
801     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
802       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
803                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
804       CeedChk(ierr);
805     } else {
806       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
807                                   op_fine->output_fields[i]->elem_restr,
808                                   op_fine->output_fields[i]->basis,
809                                   op_fine->output_fields[i]->vec); CeedChk(ierr);
810     }
811   }
812   // -- Clone QFunctionAssemblyData
813   ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled,
814          &(*op_coarse)->qf_assembled); CeedChk(ierr);
815 
816   // Multiplicity vector
817   CeedVector mult_vec, mult_e_vec;
818   ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec);
819   CeedChk(ierr);
820   ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr);
821   ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine,
822                                   mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
823   ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr);
824   ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec,
825                                   CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
826   ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr);
827   ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr);
828 
829   // Restriction
830   CeedInt num_comp;
831   ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr);
832   CeedQFunction qf_restrict;
833   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict);
834   CeedChk(ierr);
835   CeedInt *num_comp_r_data;
836   ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr);
837   num_comp_r_data[0] = num_comp;
838   CeedQFunctionContext ctx_r;
839   ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr);
840   ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER,
841                                      sizeof(*num_comp_r_data), num_comp_r_data);
842   CeedChk(ierr);
843   ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr);
844   ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr);
845   ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE);
846   CeedChk(ierr);
847   ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE);
848   CeedChk(ierr);
849   ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp,
850                                 CEED_EVAL_INTERP); CeedChk(ierr);
851   ierr = CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp); CeedChk(ierr);
852 
853   ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE,
854                             CEED_QFUNCTION_NONE, op_restrict);
855   CeedChk(ierr);
856   ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine,
857                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
858   CeedChk(ierr);
859   ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine,
860                               CEED_BASIS_COLLOCATED, mult_vec);
861   CeedChk(ierr);
862   ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f,
863                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
864 
865   // Prolongation
866   CeedQFunction qf_prolong;
867   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong);
868   CeedChk(ierr);
869   CeedInt *num_comp_p_data;
870   ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr);
871   num_comp_p_data[0] = num_comp;
872   CeedQFunctionContext ctx_p;
873   ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr);
874   ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER,
875                                      sizeof(*num_comp_p_data), num_comp_p_data);
876   CeedChk(ierr);
877   ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr);
878   ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr);
879   ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP);
880   CeedChk(ierr);
881   ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE);
882   CeedChk(ierr);
883   ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE);
884   CeedChk(ierr);
885   ierr = CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp); CeedChk(ierr);
886 
887   ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE,
888                             CEED_QFUNCTION_NONE, op_prolong);
889   CeedChk(ierr);
890   ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f,
891                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
892   ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine,
893                               CEED_BASIS_COLLOCATED, mult_vec);
894   CeedChk(ierr);
895   ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine,
896                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
897   CeedChk(ierr);
898 
899   // Clone name
900   bool has_name = op_fine->name;
901   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
902   ierr = CeedOperatorSetName(*op_coarse, op_fine->name); CeedChk(ierr);
903   {
904     char *prolongation_name;
905     ierr = CeedCalloc(18 + name_len, &prolongation_name); CeedChk(ierr);
906     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "",
907             has_name ? op_fine->name : "");
908     ierr = CeedOperatorSetName(*op_prolong, prolongation_name); CeedChk(ierr);
909     ierr = CeedFree(&prolongation_name); CeedChk(ierr);
910   }
911   {
912     char *restriction_name;
913     ierr = CeedCalloc(17 + name_len, &restriction_name); CeedChk(ierr);
914     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "",
915             has_name ? op_fine->name : "");
916     ierr = CeedOperatorSetName(*op_restrict, restriction_name); CeedChk(ierr);
917     ierr = CeedFree(&restriction_name); CeedChk(ierr);
918   }
919 
920   // Cleanup
921   ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr);
922   ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr);
923   ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr);
924   ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr);
925 
926   return CEED_ERROR_SUCCESS;
927 }
928 
929 /**
930   @brief Build 1D mass matrix and Laplacian with perturbation
931 
932   @param[in] interp_1d    Interpolation matrix in one dimension
933   @param[in] grad_1d      Gradient matrix in one dimension
934   @param[in] q_weight_1d  Quadrature weights in one dimension
935   @param[in] P_1d         Number of basis nodes in one dimension
936   @param[in] Q_1d         Number of quadrature points in one dimension
937   @param[in] dim          Dimension of basis
938   @param[out] mass        Assembled mass matrix in one dimension
939   @param[out] laplace     Assembled perturbed Laplacian in one dimension
940 
941   @return An error code: 0 - success, otherwise - failure
942 
943   @ref Developer
944 **/
945 CeedPragmaOptimizeOff
946 static int CeedBuildMassLaplace(const CeedScalar *interp_1d,
947                                 const CeedScalar *grad_1d,
948                                 const CeedScalar *q_weight_1d, CeedInt P_1d,
949                                 CeedInt Q_1d, CeedInt dim,
950                                 CeedScalar *mass, CeedScalar *laplace) {
951 
952   for (CeedInt i=0; i<P_1d; i++)
953     for (CeedInt j=0; j<P_1d; j++) {
954       CeedScalar sum = 0.0;
955       for (CeedInt k=0; k<Q_1d; k++)
956         sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j];
957       mass[i+j*P_1d] = sum;
958     }
959   // -- Laplacian
960   for (CeedInt i=0; i<P_1d; i++)
961     for (CeedInt j=0; j<P_1d; j++) {
962       CeedScalar sum = 0.0;
963       for (CeedInt k=0; k<Q_1d; k++)
964         sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j];
965       laplace[i+j*P_1d] = sum;
966     }
967   CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4;
968   for (CeedInt i=0; i<P_1d; i++)
969     laplace[i+P_1d*i] += perturbation;
970   return CEED_ERROR_SUCCESS;
971 }
972 CeedPragmaOptimizeOn
973 
974 /// @}
975 
976 /// ----------------------------------------------------------------------------
977 /// CeedOperator Backend API
978 /// ----------------------------------------------------------------------------
979 /// @addtogroup CeedOperatorBackend
980 /// @{
981 
982 /**
983   @brief Create object holding CeedQFunction assembly data for CeedOperator
984 
985   @param[in] ceed  A Ceed object where the CeedQFunctionAssemblyData will be created
986   @param[out] data Address of the variable where the newly created
987                      CeedQFunctionAssemblyData will be stored
988 
989   @return An error code: 0 - success, otherwise - failure
990 
991   @ref Backend
992 **/
993 int CeedQFunctionAssemblyDataCreate(Ceed ceed,
994                                     CeedQFunctionAssemblyData *data) {
995   int ierr;
996 
997   ierr = CeedCalloc(1, data); CeedChk(ierr);
998   (*data)->ref_count = 1;
999   (*data)->ceed = ceed;
1000   ierr = CeedReference(ceed); CeedChk(ierr);
1001 
1002   return CEED_ERROR_SUCCESS;
1003 }
1004 
1005 /**
1006   @brief Increment the reference counter for a CeedQFunctionAssemblyData
1007 
1008   @param data  CeedQFunctionAssemblyData to increment the reference counter
1009 
1010   @return An error code: 0 - success, otherwise - failure
1011 
1012   @ref Backend
1013 **/
1014 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1015   data->ref_count++;
1016   return CEED_ERROR_SUCCESS;
1017 }
1018 
1019 /**
1020   @brief Set re-use of CeedQFunctionAssemblyData
1021 
1022   @param data       CeedQFunctionAssemblyData to mark for reuse
1023   @param reuse_data Boolean flag indicating data re-use
1024 
1025   @return An error code: 0 - success, otherwise - failure
1026 
1027   @ref Backend
1028 **/
1029 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data,
1030                                       bool reuse_data) {
1031   data->reuse_data = reuse_data;
1032   data->needs_data_update = true;
1033   return CEED_ERROR_SUCCESS;
1034 }
1035 
1036 /**
1037   @brief Mark QFunctionAssemblyData as stale
1038 
1039   @param data              CeedQFunctionAssemblyData to mark as stale
1040   @param needs_data_update Boolean flag indicating if update is needed or completed
1041 
1042   @return An error code: 0 - success, otherwise - failure
1043 
1044   @ref Backend
1045 **/
1046 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data,
1047     bool needs_data_update) {
1048   data->needs_data_update = needs_data_update;
1049   return CEED_ERROR_SUCCESS;
1050 }
1051 
1052 /**
1053   @brief Determine if QFunctionAssemblyData needs update
1054 
1055   @param[in] data              CeedQFunctionAssemblyData to mark as stale
1056   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1057 
1058   @return An error code: 0 - success, otherwise - failure
1059 
1060   @ref Backend
1061 **/
1062 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data,
1063     bool *is_update_needed) {
1064   *is_update_needed = !data->reuse_data || data->needs_data_update;
1065   return CEED_ERROR_SUCCESS;
1066 }
1067 
1068 /**
1069   @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should
1070            be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`;
1071            Note: If `*data_copy` is non-NULL, then it is assumed that
1072            `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This
1073            CeedQFunctionAssemblyData will be destroyed if `*data_copy` is
1074            the only reference to this CeedQFunctionAssemblyData.
1075 
1076   @param data            CeedQFunctionAssemblyData to copy reference to
1077   @param[out] data_copy  Variable to store copied reference
1078 
1079   @return An error code: 0 - success, otherwise - failure
1080 
1081   @ref Backend
1082 **/
1083 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data,
1084     CeedQFunctionAssemblyData *data_copy) {
1085   int ierr;
1086 
1087   ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr);
1088   ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr);
1089   *data_copy = data;
1090   return CEED_ERROR_SUCCESS;
1091 }
1092 
1093 /**
1094   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1095 
1096   @param[in] data      CeedQFunctionAssemblyData to retreive status
1097   @param[out] is_setup Boolean flag for setup status
1098 
1099   @return An error code: 0 - success, otherwise - failure
1100 
1101   @ref Backend
1102 **/
1103 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data,
1104                                      bool *is_setup) {
1105   *is_setup = data->is_setup;
1106   return CEED_ERROR_SUCCESS;
1107 }
1108 
1109 /**
1110   @brief Set internal objects for CeedQFunctionAssemblyData
1111 
1112   @param[in] data  CeedQFunctionAssemblyData to set objects
1113   @param[in] vec   CeedVector to store assembled CeedQFunction at quadrature points
1114   @param[in] rstr  CeedElemRestriction for CeedVector containing assembled CeedQFunction
1115 
1116   @return An error code: 0 - success, otherwise - failure
1117 
1118   @ref Backend
1119 **/
1120 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data,
1121                                         CeedVector vec, CeedElemRestriction rstr) {
1122   int ierr;
1123 
1124   ierr = CeedVectorReferenceCopy(vec, &data->vec); CeedChk(ierr);
1125   ierr = CeedElemRestrictionReferenceCopy(rstr, &data->rstr); CeedChk(ierr);
1126 
1127   data->is_setup = true;
1128   return CEED_ERROR_SUCCESS;
1129 }
1130 
1131 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data,
1132                                         CeedVector *vec, CeedElemRestriction *rstr) {
1133   int ierr;
1134 
1135   if (!data->is_setup)
1136     // LCOV_EXCL_START
1137     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE,
1138                      "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1139   // LCOV_EXCL_STOP
1140 
1141   ierr = CeedVectorReferenceCopy(data->vec, vec); CeedChk(ierr);
1142   ierr = CeedElemRestrictionReferenceCopy(data->rstr, rstr); CeedChk(ierr);
1143 
1144   return CEED_ERROR_SUCCESS;
1145 }
1146 
1147 /**
1148   @brief Destroy CeedQFunctionAssemblyData
1149 
1150   @param[out] data  CeedQFunctionAssemblyData to destroy
1151 
1152   @return An error code: 0 - success, otherwise - failure
1153 
1154   @ref Backend
1155 **/
1156 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1157   int ierr;
1158 
1159   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1160 
1161   ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr);
1162   ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr);
1163   ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr);
1164 
1165   ierr = CeedFree(data); CeedChk(ierr);
1166   return CEED_ERROR_SUCCESS;
1167 }
1168 
1169 /**
1170   @brief Get CeedOperatorAssemblyData
1171 
1172   @param[in] op     CeedOperator to assemble
1173   @param[out] data  CeedQFunctionAssemblyData
1174 
1175   @return An error code: 0 - success, otherwise - failure
1176 
1177   @ref Backend
1178 **/
1179 int CeedOperatorGetOperatorAssemblyData(CeedOperator op,
1180                                         CeedOperatorAssemblyData *data) {
1181   int ierr;
1182 
1183   if (!op->op_assembled) {
1184     CeedOperatorAssemblyData data;
1185 
1186     ierr = CeedOperatorAssemblyDataCreate(op->ceed, op, &data); CeedChk(ierr);
1187     op->op_assembled = data;
1188   }
1189   *data = op->op_assembled;
1190 
1191   return CEED_ERROR_SUCCESS;
1192 }
1193 
1194 /**
1195   @brief Create object holding CeedOperator assembly data
1196 
1197   @param[in] ceed   A Ceed object where the CeedOperatorAssemblyData will be created
1198   @param[in] op     CeedOperator to be assembled
1199   @param[out] data  Address of the variable where the newly created
1200                       CeedOperatorAssemblyData will be stored
1201 
1202   @return An error code: 0 - success, otherwise - failure
1203 
1204   @ref Backend
1205 **/
1206 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op,
1207                                    CeedOperatorAssemblyData *data) {
1208   int ierr;
1209 
1210   ierr = CeedCalloc(1, data); CeedChk(ierr);
1211   (*data)->ceed = ceed;
1212   ierr = CeedReference(ceed); CeedChk(ierr);
1213 
1214   // Build OperatorAssembly data
1215   CeedQFunction qf;
1216   CeedQFunctionField *qf_fields;
1217   CeedOperatorField *op_fields;
1218   CeedInt num_input_fields;
1219   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
1220   ierr = CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL);
1221   CeedChk(ierr);
1222   ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr);
1223 
1224   // Determine active input basis
1225   CeedInt num_eval_mode_in = 0, dim = 1;
1226   CeedEvalMode *eval_mode_in = NULL;
1227   CeedBasis basis_in = NULL;
1228   for (CeedInt i=0; i<num_input_fields; i++) {
1229     CeedVector vec;
1230     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
1231     if (vec == CEED_VECTOR_ACTIVE) {
1232       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in);
1233       CeedChk(ierr);
1234       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
1235       CeedEvalMode eval_mode;
1236       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
1237       CeedChk(ierr);
1238       switch (eval_mode) {
1239       case CEED_EVAL_NONE:
1240       case CEED_EVAL_INTERP:
1241         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
1242         eval_mode_in[num_eval_mode_in] = eval_mode;
1243         num_eval_mode_in += 1;
1244         break;
1245       case CEED_EVAL_GRAD:
1246         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
1247         for (CeedInt d=0; d<dim; d++) {
1248           eval_mode_in[num_eval_mode_in+d] = eval_mode;
1249         }
1250         num_eval_mode_in += dim;
1251         break;
1252       case CEED_EVAL_WEIGHT:
1253       case CEED_EVAL_DIV:
1254       case CEED_EVAL_CURL:
1255         break; // Caught by QF Assembly
1256       }
1257     }
1258   }
1259   (*data)->num_eval_mode_in = num_eval_mode_in;
1260   (*data)->eval_mode_in = eval_mode_in;
1261   ierr = CeedBasisReferenceCopy(basis_in, &(*data)->basis_in); CeedChk(ierr);
1262 
1263   // Determine active output basis
1264   CeedInt num_output_fields;
1265   ierr = CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields);
1266   CeedChk(ierr);
1267   ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr);
1268   CeedInt num_eval_mode_out = 0;
1269   CeedEvalMode *eval_mode_out = NULL;
1270   CeedBasis basis_out = NULL;
1271   for (CeedInt i=0; i<num_output_fields; i++) {
1272     CeedVector vec;
1273     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
1274     if (vec == CEED_VECTOR_ACTIVE) {
1275       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out);
1276       CeedChk(ierr);
1277       CeedEvalMode eval_mode;
1278       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
1279       CeedChk(ierr);
1280       switch (eval_mode) {
1281       case CEED_EVAL_NONE:
1282       case CEED_EVAL_INTERP:
1283         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
1284         eval_mode_out[num_eval_mode_out] = eval_mode;
1285         num_eval_mode_out += 1;
1286         break;
1287       case CEED_EVAL_GRAD:
1288         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
1289         for (CeedInt d=0; d<dim; d++) {
1290           eval_mode_out[num_eval_mode_out+d] = eval_mode;
1291         }
1292         num_eval_mode_out += dim;
1293         break;
1294       case CEED_EVAL_WEIGHT:
1295       case CEED_EVAL_DIV:
1296       case CEED_EVAL_CURL:
1297         break; // Caught by QF Assembly
1298       }
1299     }
1300   }
1301   (*data)->num_eval_mode_out = num_eval_mode_out;
1302   (*data)->eval_mode_out = eval_mode_out;
1303   ierr = CeedBasisReferenceCopy(basis_out, &(*data)->basis_out); CeedChk(ierr);
1304 
1305   return CEED_ERROR_SUCCESS;
1306 }
1307 
1308 /**
1309   @brief Get CeedOperator CeedEvalModes for assembly
1310 
1311   @param[in] data                CeedOperatorAssemblyData
1312   @param[out] num_eval_mode_in   Pointer to hold number of input CeedEvalModes, or NULL
1313   @param[out] eval_mode_in       Pointer to hold input CeedEvalModes, or NULL
1314   @param[out] num_eval_mode_out  Pointer to hold number of output CeedEvalModes, or NULL
1315   @param[out] eval_mode_out      Pointer to hold output CeedEvalModes, or NULL
1316 
1317   @return An error code: 0 - success, otherwise - failure
1318 
1319   @ref Backend
1320 **/
1321 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data,
1322     CeedInt *num_eval_mode_in, const CeedEvalMode **eval_mode_in,
1323     CeedInt *num_eval_mode_out, const CeedEvalMode **eval_mode_out) {
1324   if (num_eval_mode_in) *num_eval_mode_in   = data->num_eval_mode_in;
1325   if (eval_mode_in) *eval_mode_in           = data->eval_mode_in;
1326   if (num_eval_mode_out) *num_eval_mode_out = data->num_eval_mode_out;
1327   if (eval_mode_out) *eval_mode_out         = data->eval_mode_out;
1328 
1329   return CEED_ERROR_SUCCESS;
1330 }
1331 
1332 /**
1333   @brief Get CeedOperator CeedBasis data for assembly
1334 
1335   @param[in] data        CeedOperatorAssemblyData
1336   @param[out] basis_in   Pointer to hold active input CeedBasis, or NULL
1337   @param[out] B_in       Pointer to hold assembled active input B, or NULL
1338   @param[out] basis_out  Pointer to hold active output CeedBasis, or NULL
1339   @param[out] B_out      Pointer to hold assembled active output B, or NULL
1340 
1341   @return An error code: 0 - success, otherwise - failure
1342 
1343   @ref Backend
1344 **/
1345 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data,
1346                                      CeedBasis *basis_in, const CeedScalar **B_in, CeedBasis *basis_out,
1347                                      const CeedScalar **B_out) {
1348   int ierr;
1349 
1350   // Assemble B_in, B_out if needed
1351   if (B_in && !data->B_in) {
1352     CeedInt num_qpts, elem_size;
1353     CeedScalar *B_in, *identity = NULL;
1354     const CeedScalar *interp_in, *grad_in;
1355     bool has_eval_none = false;
1356 
1357     ierr = CeedBasisGetNumQuadraturePoints(data->basis_in, &num_qpts);
1358     CeedChk(ierr);
1359     ierr = CeedBasisGetNumNodes(data->basis_in, &elem_size); CeedChk(ierr);
1360     ierr = CeedCalloc(num_qpts * elem_size * data->num_eval_mode_in, &B_in);
1361     CeedChk(ierr);
1362 
1363     for (CeedInt i = 0; i < data->num_eval_mode_in; i++) {
1364       has_eval_none = has_eval_none || (data->eval_mode_in[i] == CEED_EVAL_NONE);
1365     }
1366     if (has_eval_none) {
1367       ierr = CeedCalloc(num_qpts * elem_size, &identity); CeedChk(ierr);
1368       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1369         identity[i * elem_size + i] = 1.0;
1370       }
1371     }
1372     ierr = CeedBasisGetInterp(data->basis_in, &interp_in); CeedChk(ierr);
1373     ierr = CeedBasisGetGrad(data->basis_in, &grad_in); CeedChk(ierr);
1374 
1375     for (CeedInt q = 0; q < num_qpts; q++) {
1376       for (CeedInt n = 0; n < elem_size; n++) {
1377         CeedInt d_in = -1;
1378         for (CeedInt e_in = 0; e_in < data->num_eval_mode_in; e_in++) {
1379           const CeedInt qq = data->num_eval_mode_in * q;
1380           const CeedScalar *b = NULL;
1381 
1382           if (data->eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in++;
1383           CeedOperatorGetBasisPointer(data->eval_mode_in[e_in], identity,
1384                                       interp_in, &grad_in[d_in * num_qpts * elem_size], &b); CeedChk(ierr);
1385           B_in[(qq + e_in)*elem_size + n] = b[q * elem_size + n];
1386         }
1387       }
1388     }
1389     data->B_in = B_in;
1390   }
1391 
1392   if (B_out && !data->B_out) {
1393     CeedInt num_qpts, elem_size;
1394     CeedScalar *B_out, *identity = NULL;
1395     const CeedScalar *interp_out, *grad_out;
1396     bool has_eval_none = false;
1397 
1398     ierr = CeedBasisGetNumQuadraturePoints(data->basis_out, &num_qpts);
1399     CeedChk(ierr);
1400     ierr = CeedBasisGetNumNodes(data->basis_out, &elem_size); CeedChk(ierr);
1401     ierr = CeedCalloc(num_qpts * elem_size * data->num_eval_mode_out, &B_out);
1402     CeedChk(ierr);
1403 
1404     for (CeedInt i = 0; i < data->num_eval_mode_out; i++) {
1405       has_eval_none = has_eval_none || (data->eval_mode_out[i] == CEED_EVAL_NONE);
1406     }
1407     if (has_eval_none) {
1408       ierr = CeedCalloc(num_qpts * elem_size, &identity); CeedChk(ierr);
1409       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1410         identity[i * elem_size + i] = 1.0;
1411       }
1412     }
1413     ierr = CeedBasisGetInterp(data->basis_out, &interp_out); CeedChk(ierr);
1414     ierr = CeedBasisGetGrad(data->basis_out, &grad_out); CeedChk(ierr);
1415 
1416     for (CeedInt q = 0; q < num_qpts; q++) {
1417       for (CeedInt n = 0; n < elem_size; n++) {
1418         CeedInt d_out = -1;
1419         for (CeedInt e_out = 0; e_out < data->num_eval_mode_out; e_out++) {
1420           const CeedInt qq = data->num_eval_mode_out * q;
1421           const CeedScalar *b = NULL;
1422 
1423           if (data->eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out++;
1424           CeedOperatorGetBasisPointer(data->eval_mode_out[e_out], identity,
1425                                       interp_out, &grad_out[d_out * num_qpts * elem_size], &b); CeedChk(ierr);
1426           B_out[(qq + e_out)*elem_size + n] = b[q * elem_size + n];
1427         }
1428       }
1429     }
1430     data->B_out = B_out;
1431   }
1432 
1433   if (basis_in) *basis_in   = data->basis_in;
1434   if (B_in) *B_in           = data->B_in;
1435   if (basis_out) *basis_out = data->basis_out;
1436   if (B_out) *B_out         = data->B_out;
1437 
1438   return CEED_ERROR_SUCCESS;
1439 }
1440 
1441 /**
1442   @brief Destroy CeedOperatorAssemblyData
1443 
1444   @param[out] data  CeedOperatorAssemblyData to destroy
1445 
1446   @return An error code: 0 - success, otherwise - failure
1447 
1448   @ref Backend
1449 **/
1450 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1451   int ierr;
1452 
1453   if (!*data) return CEED_ERROR_SUCCESS;
1454 
1455   ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr);
1456   ierr = CeedBasisDestroy(&(*data)->basis_in); CeedChk(ierr);
1457   ierr = CeedBasisDestroy(&(*data)->basis_out); CeedChk(ierr);
1458   ierr = CeedFree(&(*data)->B_in); CeedChk(ierr);
1459   ierr = CeedFree(&(*data)->B_out); CeedChk(ierr);
1460 
1461   ierr = CeedFree(data); CeedChk(ierr);
1462   return CEED_ERROR_SUCCESS;
1463 }
1464 
1465 /// @}
1466 
1467 /// ----------------------------------------------------------------------------
1468 /// CeedOperator Public API
1469 /// ----------------------------------------------------------------------------
1470 /// @addtogroup CeedOperatorUser
1471 /// @{
1472 
1473 /**
1474   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1475 
1476   This returns a CeedVector containing a matrix at each quadrature point
1477     providing the action of the CeedQFunction associated with the CeedOperator.
1478     The vector 'assembled' is of shape
1479       [num_elements, num_input_fields, num_output_fields, num_quad_points]
1480     and contains column-major matrices representing the action of the
1481     CeedQFunction for a corresponding quadrature point on an element. Inputs and
1482     outputs are in the order provided by the user when adding CeedOperator fields.
1483     For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and
1484     'v', provided in that order, would result in an assembled QFunction that
1485     consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting
1486     on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1487 
1488   Note: Calling this function asserts that setup is complete
1489           and sets the CeedOperator as immutable.
1490 
1491   @param op              CeedOperator to assemble CeedQFunction
1492   @param[out] assembled  CeedVector to store assembled CeedQFunction at
1493                            quadrature points
1494   @param[out] rstr       CeedElemRestriction for CeedVector containing assembled
1495                            CeedQFunction
1496   @param request         Address of CeedRequest for non-blocking completion, else
1497                            @ref CEED_REQUEST_IMMEDIATE
1498 
1499   @return An error code: 0 - success, otherwise - failure
1500 
1501   @ref User
1502 **/
1503 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled,
1504                                         CeedElemRestriction *rstr,
1505                                         CeedRequest *request) {
1506   int ierr;
1507   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1508 
1509   if (op->LinearAssembleQFunction) {
1510     // Backend version
1511     ierr = op->LinearAssembleQFunction(op, assembled, rstr, request);
1512     CeedChk(ierr);
1513   } else {
1514     // Operator fallback
1515     CeedOperator op_fallback;
1516 
1517     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1518     if (op_fallback) {
1519       ierr = CeedOperatorLinearAssembleQFunction(op_fallback, assembled,
1520              rstr, request); CeedChk(ierr);
1521     } else {
1522       // LCOV_EXCL_START
1523       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
1524                        "Backend does not support CeedOperatorLinearAssembleQFunction");
1525       // LCOV_EXCL_STOP
1526     }
1527   }
1528   return CEED_ERROR_SUCCESS;
1529 }
1530 
1531 /**
1532   @brief Assemble CeedQFunction and store result internall. Return copied
1533            references of stored data to the caller. Caller is responsible for
1534            ownership and destruction of the copied references. See also
1535            @ref CeedOperatorLinearAssembleQFunction
1536 
1537   @param op              CeedOperator to assemble CeedQFunction
1538   @param assembled       CeedVector to store assembled CeedQFunction at
1539                            quadrature points
1540   @param rstr            CeedElemRestriction for CeedVector containing assembled
1541                            CeedQFunction
1542   @param request         Address of CeedRequest for non-blocking completion, else
1543                            @ref CEED_REQUEST_IMMEDIATE
1544 
1545   @return An error code: 0 - success, otherwise - failure
1546 
1547   @ref User
1548 **/
1549 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op,
1550     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1551   int ierr;
1552   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1553 
1554   if (op->LinearAssembleQFunctionUpdate) {
1555     // Backend version
1556     bool qf_assembled_is_setup;
1557     CeedVector assembled_vec = NULL;
1558     CeedElemRestriction assembled_rstr = NULL;
1559 
1560     ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled,
1561                                             &qf_assembled_is_setup); CeedChk(ierr);
1562     if (qf_assembled_is_setup) {
1563       bool update_needed;
1564 
1565       ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec,
1566              &assembled_rstr); CeedChk(ierr);
1567       ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled,
1568              &update_needed); CeedChk(ierr);
1569       if (update_needed) {
1570         ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr,
1571                request); CeedChk(ierr);
1572       }
1573     } else {
1574       ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr,
1575                                          request); CeedChk(ierr);
1576       ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec,
1577              assembled_rstr); CeedChk(ierr);
1578     }
1579     ierr = CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false);
1580     CeedChk(ierr);
1581 
1582     // Copy reference from internally held copy
1583     *assembled = NULL;
1584     *rstr = NULL;
1585     ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr);
1586     ierr = CeedVectorDestroy(&assembled_vec); CeedChk(ierr);
1587     ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr);
1588     ierr = CeedElemRestrictionDestroy(&assembled_rstr); CeedChk(ierr);
1589   } else {
1590     // Operator fallback
1591     CeedOperator op_fallback;
1592 
1593     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1594     if (op_fallback) {
1595       ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled,
1596              rstr, request); CeedChk(ierr);
1597     } else {
1598       // LCOV_EXCL_START
1599       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
1600                        "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1601       // LCOV_EXCL_STOP
1602     }
1603   }
1604 
1605   return CEED_ERROR_SUCCESS;
1606 }
1607 
1608 /**
1609   @brief Assemble the diagonal of a square linear CeedOperator
1610 
1611   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1612 
1613   Note: Currently only non-composite CeedOperators with a single field and
1614           composite CeedOperators with single field sub-operators are supported.
1615 
1616   Note: Calling this function asserts that setup is complete
1617           and sets the CeedOperator as immutable.
1618 
1619   @param op              CeedOperator to assemble CeedQFunction
1620   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1621   @param request         Address of CeedRequest for non-blocking completion, else
1622                            @ref CEED_REQUEST_IMMEDIATE
1623 
1624   @return An error code: 0 - success, otherwise - failure
1625 
1626   @ref User
1627 **/
1628 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled,
1629                                        CeedRequest *request) {
1630   int ierr;
1631   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1632 
1633   CeedSize input_size = 0, output_size = 0;
1634   ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size);
1635   CeedChk(ierr);
1636   if (input_size != output_size)
1637     // LCOV_EXCL_START
1638     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1639   // LCOV_EXCL_STOP
1640 
1641   if (op->LinearAssembleDiagonal) {
1642     // Backend version
1643     ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr);
1644     return CEED_ERROR_SUCCESS;
1645   } else if (op->LinearAssembleAddDiagonal) {
1646     // Backend version with zeroing first
1647     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1648     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1649     return CEED_ERROR_SUCCESS;
1650   } else {
1651     // Operator fallback
1652     CeedOperator op_fallback;
1653 
1654     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1655     if (op_fallback) {
1656       ierr = CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request);
1657       CeedChk(ierr);
1658       return CEED_ERROR_SUCCESS;
1659     }
1660   }
1661   // Default interface implementation
1662   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1663   ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request);
1664   CeedChk(ierr);
1665 
1666   return CEED_ERROR_SUCCESS;
1667 }
1668 
1669 /**
1670   @brief Assemble the diagonal of a square linear CeedOperator
1671 
1672   This sums into a CeedVector the diagonal of a linear CeedOperator.
1673 
1674   Note: Currently only non-composite CeedOperators with a single field and
1675           composite CeedOperators with single field sub-operators are supported.
1676 
1677   Note: Calling this function asserts that setup is complete
1678           and sets the CeedOperator as immutable.
1679 
1680   @param op              CeedOperator to assemble CeedQFunction
1681   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1682   @param request         Address of CeedRequest for non-blocking completion, else
1683                            @ref CEED_REQUEST_IMMEDIATE
1684 
1685   @return An error code: 0 - success, otherwise - failure
1686 
1687   @ref User
1688 **/
1689 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled,
1690     CeedRequest *request) {
1691   int ierr;
1692   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1693 
1694   CeedSize input_size = 0, output_size = 0;
1695   ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size);
1696   CeedChk(ierr);
1697   if (input_size != output_size)
1698     // LCOV_EXCL_START
1699     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1700   // LCOV_EXCL_STOP
1701 
1702   if (op->LinearAssembleAddDiagonal) {
1703     // Backend version
1704     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1705     return CEED_ERROR_SUCCESS;
1706   } else {
1707     // Operator fallback
1708     CeedOperator op_fallback;
1709 
1710     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1711     if (op_fallback) {
1712       ierr = CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request);
1713       CeedChk(ierr);
1714       return CEED_ERROR_SUCCESS;
1715     }
1716   }
1717   // Default interface implementation
1718   bool is_composite;
1719   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1720   if (is_composite) {
1721     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1722            false, assembled); CeedChk(ierr);
1723   } else {
1724     ierr = CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false,
1725            assembled); CeedChk(ierr);
1726   }
1727 
1728   return CEED_ERROR_SUCCESS;
1729 }
1730 
1731 /**
1732   @brief Assemble the point block diagonal of a square linear CeedOperator
1733 
1734   This overwrites a CeedVector with the point block diagonal of a linear
1735     CeedOperator.
1736 
1737   Note: Currently only non-composite CeedOperators with a single field and
1738           composite CeedOperators with single field sub-operators are supported.
1739 
1740   Note: Calling this function asserts that setup is complete
1741           and sets the CeedOperator as immutable.
1742 
1743   @param op              CeedOperator to assemble CeedQFunction
1744   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1745                            diagonal, provided in row-major form with an
1746                            @a num_comp * @a num_comp block at each node. The dimensions
1747                            of this vector are derived from the active vector
1748                            for the CeedOperator. The array has shape
1749                            [nodes, component out, component in].
1750   @param request         Address of CeedRequest for non-blocking completion, else
1751                            CEED_REQUEST_IMMEDIATE
1752 
1753   @return An error code: 0 - success, otherwise - failure
1754 
1755   @ref User
1756 **/
1757 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op,
1758     CeedVector assembled, CeedRequest *request) {
1759   int ierr;
1760   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1761 
1762   CeedSize input_size = 0, output_size = 0;
1763   ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size);
1764   CeedChk(ierr);
1765   if (input_size != output_size)
1766     // LCOV_EXCL_START
1767     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1768   // LCOV_EXCL_STOP
1769 
1770   if (op->LinearAssemblePointBlockDiagonal) {
1771     // Backend version
1772     ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request);
1773     CeedChk(ierr);
1774     return CEED_ERROR_SUCCESS;
1775   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1776     // Backend version with zeroing first
1777     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1778     ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled,
1779            request); CeedChk(ierr);
1780     return CEED_ERROR_SUCCESS;
1781   } else {
1782     // Operator fallback
1783     CeedOperator op_fallback;
1784 
1785     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1786     if (op_fallback) {
1787       ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled,
1788              request); CeedChk(ierr);
1789       return CEED_ERROR_SUCCESS;
1790     }
1791   }
1792   // Default interface implementation
1793   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1794   ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request);
1795   CeedChk(ierr);
1796 
1797   return CEED_ERROR_SUCCESS;
1798 }
1799 
1800 /**
1801   @brief Assemble the point block diagonal of a square linear CeedOperator
1802 
1803   This sums into a CeedVector with the point block diagonal of a linear
1804     CeedOperator.
1805 
1806   Note: Currently only non-composite CeedOperators with a single field and
1807           composite CeedOperators with single field sub-operators are supported.
1808 
1809   Note: Calling this function asserts that setup is complete
1810           and sets the CeedOperator as immutable.
1811 
1812   @param op              CeedOperator to assemble CeedQFunction
1813   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1814                            diagonal, provided in row-major form with an
1815                            @a num_comp * @a num_comp block at each node. The dimensions
1816                            of this vector are derived from the active vector
1817                            for the CeedOperator. The array has shape
1818                            [nodes, component out, component in].
1819   @param request         Address of CeedRequest for non-blocking completion, else
1820                            CEED_REQUEST_IMMEDIATE
1821 
1822   @return An error code: 0 - success, otherwise - failure
1823 
1824   @ref User
1825 **/
1826 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op,
1827     CeedVector assembled, CeedRequest *request) {
1828   int ierr;
1829   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1830 
1831   CeedSize input_size = 0, output_size = 0;
1832   ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size);
1833   CeedChk(ierr);
1834   if (input_size != output_size)
1835     // LCOV_EXCL_START
1836     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1837   // LCOV_EXCL_STOP
1838 
1839   if (op->LinearAssembleAddPointBlockDiagonal) {
1840     // Backend version
1841     ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request);
1842     CeedChk(ierr);
1843     return CEED_ERROR_SUCCESS;
1844   } else {
1845     // Operator fallback
1846     CeedOperator op_fallback;
1847 
1848     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1849     if (op_fallback) {
1850       ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled,
1851              request); CeedChk(ierr);
1852       return CEED_ERROR_SUCCESS;
1853     }
1854   }
1855   // Default interface implemenation
1856   bool is_composite;
1857   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1858   if (is_composite) {
1859     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1860            true, assembled); CeedChk(ierr);
1861   } else {
1862     ierr = CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled);
1863     CeedChk(ierr);
1864   }
1865 
1866   return CEED_ERROR_SUCCESS;
1867 }
1868 
1869 /**
1870    @brief Fully assemble the nonzero pattern of a linear operator.
1871 
1872    Expected to be used in conjunction with CeedOperatorLinearAssemble()
1873 
1874    The assembly routines use coordinate format, with num_entries tuples of the
1875    form (i, j, value) which indicate that value should be added to the matrix
1876    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1877    This function returns the number of entries and their (i, j) locations,
1878    while CeedOperatorLinearAssemble() provides the values in the same
1879    ordering.
1880 
1881    This will generally be slow unless your operator is low-order.
1882 
1883   Note: Calling this function asserts that setup is complete
1884           and sets the CeedOperator as immutable.
1885 
1886    @param[in]  op           CeedOperator to assemble
1887    @param[out] num_entries  Number of entries in coordinate nonzero pattern
1888    @param[out] rows         Row number for each entry
1889    @param[out] cols         Column number for each entry
1890 
1891    @ref User
1892 **/
1893 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries,
1894                                        CeedInt **rows, CeedInt **cols) {
1895   int ierr;
1896   CeedInt num_suboperators, single_entries;
1897   CeedOperator *sub_operators;
1898   bool is_composite;
1899   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1900 
1901   if (op->LinearAssembleSymbolic) {
1902     // Backend version
1903     ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr);
1904     return CEED_ERROR_SUCCESS;
1905   } else {
1906     // Operator fallback
1907     CeedOperator op_fallback;
1908 
1909     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1910     if (op_fallback) {
1911       ierr = CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols);
1912       CeedChk(ierr);
1913       return CEED_ERROR_SUCCESS;
1914     }
1915   }
1916 
1917   // Default interface implementation
1918 
1919   // count entries and allocate rows, cols arrays
1920   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1921   *num_entries = 0;
1922   if (is_composite) {
1923     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1924     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1925     for (CeedInt k = 0; k < num_suboperators; ++k) {
1926       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1927              &single_entries); CeedChk(ierr);
1928       *num_entries += single_entries;
1929     }
1930   } else {
1931     ierr = CeedSingleOperatorAssemblyCountEntries(op,
1932            &single_entries); CeedChk(ierr);
1933     *num_entries += single_entries;
1934   }
1935   ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr);
1936   ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr);
1937 
1938   // assemble nonzero locations
1939   CeedInt offset = 0;
1940   if (is_composite) {
1941     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1942     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1943     for (CeedInt k = 0; k < num_suboperators; ++k) {
1944       ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows,
1945              *cols); CeedChk(ierr);
1946       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1947              &single_entries);
1948       CeedChk(ierr);
1949       offset += single_entries;
1950     }
1951   } else {
1952     ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols);
1953     CeedChk(ierr);
1954   }
1955 
1956   return CEED_ERROR_SUCCESS;
1957 }
1958 
1959 /**
1960    @brief Fully assemble the nonzero entries of a linear operator.
1961 
1962    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic()
1963 
1964    The assembly routines use coordinate format, with num_entries tuples of the
1965    form (i, j, value) which indicate that value should be added to the matrix
1966    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1967    This function returns the values of the nonzero entries to be added, their
1968    (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1969 
1970    This will generally be slow unless your operator is low-order.
1971 
1972   Note: Calling this function asserts that setup is complete
1973           and sets the CeedOperator as immutable.
1974 
1975    @param[in]  op      CeedOperator to assemble
1976    @param[out] values  Values to assemble into matrix
1977 
1978    @ref User
1979 **/
1980 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1981   int ierr;
1982   CeedInt num_suboperators, single_entries = 0;
1983   CeedOperator *sub_operators;
1984   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1985 
1986   if (op->LinearAssemble) {
1987     // Backend version
1988     ierr = op->LinearAssemble(op, values); CeedChk(ierr);
1989     return CEED_ERROR_SUCCESS;
1990   } else {
1991     // Operator fallback
1992     CeedOperator op_fallback;
1993 
1994     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1995     if (op_fallback) {
1996       ierr = CeedOperatorLinearAssemble(op_fallback, values); CeedChk(ierr);
1997       return CEED_ERROR_SUCCESS;
1998     }
1999   }
2000 
2001   // Default interface implementation
2002   bool is_composite;
2003   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
2004 
2005   CeedInt offset = 0;
2006   if (is_composite) {
2007     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
2008     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
2009     for (CeedInt k = 0; k < num_suboperators; k++) {
2010       ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values);
2011       CeedChk(ierr);
2012       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
2013              &single_entries);
2014       CeedChk(ierr);
2015       offset += single_entries;
2016     }
2017   } else {
2018     ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr);
2019   }
2020 
2021   return CEED_ERROR_SUCCESS;
2022 }
2023 
2024 /**
2025   @brief Create a multigrid coarse operator and level transfer operators
2026            for a CeedOperator, creating the prolongation basis from the
2027            fine and coarse grid interpolation
2028 
2029   Note: Calling this function asserts that setup is complete
2030           and sets the CeedOperator as immutable.
2031 
2032   @param[in] op_fine       Fine grid operator
2033   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
2034   @param[in] rstr_coarse   Coarse grid restriction
2035   @param[in] basis_coarse  Coarse grid active vector basis
2036   @param[out] op_coarse    Coarse grid operator
2037   @param[out] op_prolong   Coarse to fine operator
2038   @param[out] op_restrict  Fine to coarse operator
2039 
2040   @return An error code: 0 - success, otherwise - failure
2041 
2042   @ref User
2043 **/
2044 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine,
2045                                      CeedVector p_mult_fine,
2046                                      CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2047                                      CeedOperator *op_coarse, CeedOperator *op_prolong,
2048                                      CeedOperator *op_restrict) {
2049   int ierr;
2050   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
2051   Ceed ceed;
2052   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
2053 
2054   // Check for compatible quadrature spaces
2055   CeedBasis basis_fine;
2056   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
2057   CeedInt Q_f, Q_c;
2058   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
2059   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
2060   if (Q_f != Q_c)
2061     // LCOV_EXCL_START
2062     return CeedError(ceed, CEED_ERROR_DIMENSION,
2063                      "Bases must have compatible quadrature spaces");
2064   // LCOV_EXCL_STOP
2065 
2066   // Coarse to fine basis
2067   CeedInt P_f, P_c, Q = Q_f;
2068   bool is_tensor_f, is_tensor_c;
2069   ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr);
2070   ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr);
2071   CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau;
2072   if (is_tensor_f && is_tensor_c) {
2073     ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr);
2074     ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr);
2075     ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr);
2076   } else if (!is_tensor_f && !is_tensor_c) {
2077     ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr);
2078     ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr);
2079   } else {
2080     // LCOV_EXCL_START
2081     return CeedError(ceed, CEED_ERROR_MINOR,
2082                      "Bases must both be tensor or non-tensor");
2083     // LCOV_EXCL_STOP
2084   }
2085 
2086   ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr);
2087   ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr);
2088   ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr);
2089   ierr = CeedMalloc(Q, &tau); CeedChk(ierr);
2090   const CeedScalar *interp_f_source = NULL, *interp_c_source = NULL;
2091   if (is_tensor_f) {
2092     ierr = CeedBasisGetInterp1D(basis_fine, &interp_f_source); CeedChk(ierr);
2093     ierr = CeedBasisGetInterp1D(basis_coarse, &interp_c_source); CeedChk(ierr);
2094   } else {
2095     ierr = CeedBasisGetInterp(basis_fine, &interp_f_source); CeedChk(ierr);
2096     ierr = CeedBasisGetInterp(basis_coarse, &interp_c_source); CeedChk(ierr);
2097   }
2098   memcpy(interp_f, interp_f_source, Q*P_f*sizeof interp_f_source[0]);
2099   memcpy(interp_c, interp_c_source, Q*P_c*sizeof interp_c_source[0]);
2100 
2101   // -- QR Factorization, interp_f = Q R
2102   ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr);
2103 
2104   // -- Apply Qtranspose, interp_c = Qtranspose interp_c
2105   ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE,
2106                                Q, P_c, P_f, P_c, 1); CeedChk(ierr);
2107 
2108   // -- Apply Rinv, interp_c_to_f = Rinv interp_c
2109   for (CeedInt j=0; j<P_c; j++) { // Column j
2110     interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1];
2111     for (CeedInt i=P_f-2; i>=0; i--) { // Row i
2112       interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i];
2113       for (CeedInt k=i+1; k<P_f; k++)
2114         interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k];
2115       interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i];
2116     }
2117   }
2118   ierr = CeedFree(&tau); CeedChk(ierr);
2119   ierr = CeedFree(&interp_c); CeedChk(ierr);
2120   ierr = CeedFree(&interp_f); CeedChk(ierr);
2121 
2122   // Complete with interp_c_to_f versions of code
2123   if (is_tensor_f) {
2124     ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine,
2125            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
2126     CeedChk(ierr);
2127   } else {
2128     ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine,
2129            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
2130     CeedChk(ierr);
2131   }
2132 
2133   // Cleanup
2134   ierr = CeedFree(&interp_c_to_f); CeedChk(ierr);
2135   return CEED_ERROR_SUCCESS;
2136 }
2137 
2138 /**
2139   @brief Create a multigrid coarse operator and level transfer operators
2140            for a CeedOperator with a tensor basis for the active basis
2141 
2142   Note: Calling this function asserts that setup is complete
2143           and sets the CeedOperator as immutable.
2144 
2145   @param[in] op_fine        Fine grid operator
2146   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
2147   @param[in] rstr_coarse    Coarse grid restriction
2148   @param[in] basis_coarse   Coarse grid active vector basis
2149   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
2150   @param[out] op_coarse     Coarse grid operator
2151   @param[out] op_prolong    Coarse to fine operator
2152   @param[out] op_restrict   Fine to coarse operator
2153 
2154   @return An error code: 0 - success, otherwise - failure
2155 
2156   @ref User
2157 **/
2158 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine,
2159     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2160     const CeedScalar *interp_c_to_f, CeedOperator *op_coarse,
2161     CeedOperator *op_prolong, CeedOperator *op_restrict) {
2162   int ierr;
2163   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
2164   Ceed ceed;
2165   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
2166 
2167   // Check for compatible quadrature spaces
2168   CeedBasis basis_fine;
2169   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
2170   CeedInt Q_f, Q_c;
2171   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
2172   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
2173   if (Q_f != Q_c)
2174     // LCOV_EXCL_START
2175     return CeedError(ceed, CEED_ERROR_DIMENSION,
2176                      "Bases must have compatible quadrature spaces");
2177   // LCOV_EXCL_STOP
2178 
2179   // Coarse to fine basis
2180   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2181   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
2182   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
2183   ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr);
2184   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
2185   CeedChk(ierr);
2186   P_1d_c = dim == 1 ? num_nodes_c :
2187            dim == 2 ? sqrt(num_nodes_c) :
2188            cbrt(num_nodes_c);
2189   CeedScalar *q_ref, *q_weight, *grad;
2190   ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr);
2191   ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr);
2192   ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr);
2193   CeedBasis basis_c_to_f;
2194   ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f,
2195                                  interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
2196   CeedChk(ierr);
2197   ierr = CeedFree(&q_ref); CeedChk(ierr);
2198   ierr = CeedFree(&q_weight); CeedChk(ierr);
2199   ierr = CeedFree(&grad); CeedChk(ierr);
2200 
2201   // Core code
2202   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
2203                                           basis_coarse, basis_c_to_f, op_coarse,
2204                                           op_prolong, op_restrict);
2205   CeedChk(ierr);
2206   return CEED_ERROR_SUCCESS;
2207 }
2208 
2209 /**
2210   @brief Create a multigrid coarse operator and level transfer operators
2211            for a CeedOperator with a non-tensor basis for the active vector
2212 
2213   Note: Calling this function asserts that setup is complete
2214           and sets the CeedOperator as immutable.
2215 
2216   @param[in] op_fine        Fine grid operator
2217   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
2218   @param[in] rstr_coarse    Coarse grid restriction
2219   @param[in] basis_coarse   Coarse grid active vector basis
2220   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
2221   @param[out] op_coarse     Coarse grid operator
2222   @param[out] op_prolong    Coarse to fine operator
2223   @param[out] op_restrict   Fine to coarse operator
2224 
2225   @return An error code: 0 - success, otherwise - failure
2226 
2227   @ref User
2228 **/
2229 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine,
2230                                        CeedVector p_mult_fine,
2231                                        CeedElemRestriction rstr_coarse,
2232                                        CeedBasis basis_coarse,
2233                                        const CeedScalar *interp_c_to_f,
2234                                        CeedOperator *op_coarse,
2235                                        CeedOperator *op_prolong,
2236                                        CeedOperator *op_restrict) {
2237   int ierr;
2238   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
2239   Ceed ceed;
2240   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
2241 
2242   // Check for compatible quadrature spaces
2243   CeedBasis basis_fine;
2244   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
2245   CeedInt Q_f, Q_c;
2246   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
2247   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
2248   if (Q_f != Q_c)
2249     // LCOV_EXCL_START
2250     return CeedError(ceed, CEED_ERROR_DIMENSION,
2251                      "Bases must have compatible quadrature spaces");
2252   // LCOV_EXCL_STOP
2253 
2254   // Coarse to fine basis
2255   CeedElemTopology topo;
2256   ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr);
2257   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2258   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
2259   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
2260   ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr);
2261   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
2262   CeedChk(ierr);
2263   CeedScalar *q_ref, *q_weight, *grad;
2264   ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr);
2265   ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr);
2266   ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr);
2267   CeedBasis basis_c_to_f;
2268   ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f,
2269                            interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
2270   CeedChk(ierr);
2271   ierr = CeedFree(&q_ref); CeedChk(ierr);
2272   ierr = CeedFree(&q_weight); CeedChk(ierr);
2273   ierr = CeedFree(&grad); CeedChk(ierr);
2274 
2275   // Core code
2276   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
2277                                           basis_coarse, basis_c_to_f, op_coarse,
2278                                           op_prolong, op_restrict);
2279   CeedChk(ierr);
2280   return CEED_ERROR_SUCCESS;
2281 }
2282 
2283 /**
2284   @brief Build a FDM based approximate inverse for each element for a
2285            CeedOperator
2286 
2287   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization
2288     Method based approximate inverse. This function obtains the simultaneous
2289     diagonalization for the 1D mass and Laplacian operators,
2290       M = V^T V, K = V^T S V.
2291     The assembled QFunction is used to modify the eigenvalues from simultaneous
2292     diagonalization and obtain an approximate inverse of the form
2293       V^T S^hat V. The CeedOperator must be linear and non-composite. The
2294     associated CeedQFunction must therefore also be linear.
2295 
2296   Note: Calling this function asserts that setup is complete
2297           and sets the CeedOperator as immutable.
2298 
2299   @param op            CeedOperator to create element inverses
2300   @param[out] fdm_inv  CeedOperator to apply the action of a FDM based inverse
2301                          for each element
2302   @param request       Address of CeedRequest for non-blocking completion, else
2303                          @ref CEED_REQUEST_IMMEDIATE
2304 
2305   @return An error code: 0 - success, otherwise - failure
2306 
2307   @ref User
2308 **/
2309 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv,
2310                                         CeedRequest *request) {
2311   int ierr;
2312   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
2313 
2314   if (op->CreateFDMElementInverse) {
2315     // Backend version
2316     ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr);
2317     return CEED_ERROR_SUCCESS;
2318   } else {
2319     // Operator fallback
2320     CeedOperator op_fallback;
2321 
2322     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
2323     if (op_fallback) {
2324       ierr = CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request);
2325       CeedChk(ierr);
2326       return CEED_ERROR_SUCCESS;
2327     }
2328   }
2329 
2330   // Default interface implementation
2331   Ceed ceed, ceed_parent;
2332   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
2333   ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr);
2334   ceed_parent = ceed_parent ? ceed_parent : ceed;
2335   CeedQFunction qf;
2336   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
2337 
2338   // Determine active input basis
2339   bool interp = false, grad = false;
2340   CeedBasis basis = NULL;
2341   CeedElemRestriction rstr = NULL;
2342   CeedOperatorField *op_fields;
2343   CeedQFunctionField *qf_fields;
2344   CeedInt num_input_fields;
2345   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL);
2346   CeedChk(ierr);
2347   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
2348   for (CeedInt i=0; i<num_input_fields; i++) {
2349     CeedVector vec;
2350     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
2351     if (vec == CEED_VECTOR_ACTIVE) {
2352       CeedEvalMode eval_mode;
2353       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
2354       interp = interp || eval_mode == CEED_EVAL_INTERP;
2355       grad = grad || eval_mode == CEED_EVAL_GRAD;
2356       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr);
2357       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
2358     }
2359   }
2360   if (!basis)
2361     // LCOV_EXCL_START
2362     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2363   // LCOV_EXCL_STOP
2364   CeedSize l_size = 1;
2365   CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2366   ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr);
2367   ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr);
2368   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr);
2369   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
2370   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
2371   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
2372   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
2373   ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr);
2374 
2375   // Build and diagonalize 1D Mass and Laplacian
2376   bool tensor_basis;
2377   ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr);
2378   if (!tensor_basis)
2379     // LCOV_EXCL_START
2380     return CeedError(ceed, CEED_ERROR_BACKEND,
2381                      "FDMElementInverse only supported for tensor "
2382                      "bases");
2383   // LCOV_EXCL_STOP
2384   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2385   ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr);
2386   ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr);
2387   ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr);
2388   ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr);
2389   ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr);
2390   // -- Build matrices
2391   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2392   ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr);
2393   ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr);
2394   ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr);
2395   ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim,
2396                               mass, laplace); CeedChk(ierr);
2397 
2398   // -- Diagonalize
2399   ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d);
2400   CeedChk(ierr);
2401   ierr = CeedFree(&mass); CeedChk(ierr);
2402   ierr = CeedFree(&laplace); CeedChk(ierr);
2403   for (CeedInt i=0; i<P_1d; i++)
2404     for (CeedInt j=0; j<P_1d; j++)
2405       fdm_interp[i+j*P_1d] = x[j+i*P_1d];
2406   ierr = CeedFree(&x); CeedChk(ierr);
2407 
2408   // Assemble QFunction
2409   CeedVector assembled;
2410   CeedElemRestriction rstr_qf;
2411   ierr =  CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled,
2412           &rstr_qf, request); CeedChk(ierr);
2413   CeedInt layout[3];
2414   ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr);
2415   ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr);
2416   CeedScalar max_norm = 0;
2417   ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
2418 
2419   // Calculate element averages
2420   CeedInt num_modes = (interp?1:0) + (grad?dim:0);
2421   CeedScalar *elem_avg;
2422   const CeedScalar *assembled_array, *q_weight_array;
2423   CeedVector q_weight;
2424   ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr);
2425   ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
2426                         CEED_VECTOR_NONE, q_weight); CeedChk(ierr);
2427   ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array);
2428   CeedChk(ierr);
2429   ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array);
2430   CeedChk(ierr);
2431   ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr);
2432   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
2433   for (CeedInt e=0; e<num_elem; e++) {
2434     CeedInt count = 0;
2435     for (CeedInt q=0; q<num_qpts; q++)
2436       for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++)
2437         if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) >
2438             qf_value_bound) {
2439           elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] /
2440                          q_weight_array[q];
2441           count++;
2442         }
2443     if (count) {
2444       elem_avg[e] /= count;
2445     } else {
2446       elem_avg[e] = 1.0;
2447     }
2448   }
2449   ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr);
2450   ierr = CeedVectorDestroy(&assembled); CeedChk(ierr);
2451   ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr);
2452   ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr);
2453 
2454   // Build FDM diagonal
2455   CeedVector q_data;
2456   CeedScalar *q_data_array, *fdm_diagonal;
2457   ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr);
2458   const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON;
2459   for (CeedInt c=0; c<num_comp; c++)
2460     for (CeedInt n=0; n<elem_size; n++) {
2461       if (interp)
2462         fdm_diagonal[c*elem_size + n] = 1.0;
2463       if (grad)
2464         for (CeedInt d=0; d<dim; d++) {
2465           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2466           fdm_diagonal[c*elem_size + n] += lambda[i];
2467         }
2468       if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound)
2469         fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound;
2470     }
2471   ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data);
2472   CeedChk(ierr);
2473   ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr);
2474   ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array);
2475   CeedChk(ierr);
2476   for (CeedInt e=0; e<num_elem; e++)
2477     for (CeedInt c=0; c<num_comp; c++)
2478       for (CeedInt n=0; n<elem_size; n++)
2479         q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] *
2480             fdm_diagonal[c*elem_size + n]);
2481   ierr = CeedFree(&elem_avg); CeedChk(ierr);
2482   ierr = CeedFree(&fdm_diagonal); CeedChk(ierr);
2483   ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr);
2484 
2485   // Setup FDM operator
2486   // -- Basis
2487   CeedBasis fdm_basis;
2488   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2489   ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr);
2490   ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr);
2491   ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr);
2492   ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d,
2493                                  fdm_interp, grad_dummy, q_ref_dummy,
2494                                  q_weight_dummy, &fdm_basis); CeedChk(ierr);
2495   ierr = CeedFree(&fdm_interp); CeedChk(ierr);
2496   ierr = CeedFree(&grad_dummy); CeedChk(ierr);
2497   ierr = CeedFree(&q_ref_dummy); CeedChk(ierr);
2498   ierr = CeedFree(&q_weight_dummy); CeedChk(ierr);
2499   ierr = CeedFree(&lambda); CeedChk(ierr);
2500 
2501   // -- Restriction
2502   CeedElemRestriction rstr_qd_i;
2503   CeedInt strides[3] = {1, elem_size, elem_size*num_comp};
2504   ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size,
2505                                           num_comp, num_elem*num_comp*elem_size,
2506                                           strides, &rstr_qd_i); CeedChk(ierr);
2507   // -- QFunction
2508   CeedQFunction qf_fdm;
2509   ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm);
2510   CeedChk(ierr);
2511   ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP);
2512   CeedChk(ierr);
2513   ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE);
2514   CeedChk(ierr);
2515   ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP);
2516   CeedChk(ierr);
2517   ierr = CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp); CeedChk(ierr);
2518   // -- QFunction context
2519   CeedInt *num_comp_data;
2520   ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr);
2521   num_comp_data[0] = num_comp;
2522   CeedQFunctionContext ctx_fdm;
2523   ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr);
2524   ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER,
2525                                      sizeof(*num_comp_data), num_comp_data);
2526   CeedChk(ierr);
2527   ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr);
2528   ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr);
2529   // -- Operator
2530   ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv);
2531   CeedChk(ierr);
2532   CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2533   CeedChk(ierr);
2534   CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED,
2535                        q_data); CeedChk(ierr);
2536   CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2537   CeedChk(ierr);
2538 
2539   // Cleanup
2540   ierr = CeedVectorDestroy(&q_data); CeedChk(ierr);
2541   ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr);
2542   ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr);
2543   ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr);
2544 
2545   return CEED_ERROR_SUCCESS;
2546 }
2547 
2548 /// @}
2549