xref: /libCEED/interface/ceed-preconditioning.c (revision ba746a46940e5367d5f473ead73700e6f2d99476)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed/backend.h>
11 #include <ceed/ceed.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  eval_mode Current basis evaluation mode
184   @param[in]  identity  Pointer to identity matrix
185   @param[in]  interp    Pointer to interpolation matrix
186   @param[in]  grad      Pointer to gradient matrix
187   @param[out] basis_ptr Basis pointer to set
188 
189   @ref Developer
190 **/
191 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad,
192                                                const CeedScalar **basis_ptr) {
193   switch (eval_mode) {
194     case CEED_EVAL_NONE:
195       *basis_ptr = identity;
196       break;
197     case CEED_EVAL_INTERP:
198       *basis_ptr = interp;
199       break;
200     case CEED_EVAL_GRAD:
201       *basis_ptr = grad;
202       break;
203     case CEED_EVAL_WEIGHT:
204     case CEED_EVAL_DIV:
205     case CEED_EVAL_CURL:
206       break;  // Caught by QF Assembly
207   }
208   assert(*basis_ptr != NULL);
209 }
210 
211 /**
212   @brief Create point block restriction for active operator field
213 
214   @param[in]  rstr            Original CeedElemRestriction for active field
215   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
216 
217   @return An error code: 0 - success, otherwise - failure
218 
219   @ref Developer
220 **/
221 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
222   Ceed ceed;
223   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
224   const CeedInt *offsets;
225   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
226 
227   // Expand offsets
228   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
229   CeedSize l_size;
230   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
231   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
232   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
233   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
234   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
235   CeedInt shift = num_comp;
236   if (comp_stride != 1) shift *= num_comp;
237   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
238   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
239     pointblock_offsets[i] = offsets[i] * shift;
240   }
241 
242   // Create new restriction
243   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
244                                      pointblock_offsets, pointblock_rstr));
245 
246   // Cleanup
247   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
248 
249   return CEED_ERROR_SUCCESS;
250 }
251 
252 /**
253   @brief Core logic for assembling operator diagonal or point block diagonal
254 
255   @param[in]  op            CeedOperator to assemble point block diagonal
256   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
257   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
258   @param[out] assembled     CeedVector to store assembled diagonal
259 
260   @return An error code: 0 - success, otherwise - failure
261 
262   @ref Developer
263 **/
264 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
265   Ceed ceed;
266   CeedCall(CeedOperatorGetCeed(op, &ceed));
267 
268   // Assemble QFunction
269   CeedQFunction       qf;
270   const CeedScalar   *assembled_qf_array;
271   CeedVector          assembled_qf;
272   CeedElemRestriction assembled_elem_rstr;
273   CeedInt             num_input_fields, num_output_fields;
274   CeedInt             layout[3];
275 
276   CeedCall(CeedOperatorGetQFunction(op, &qf));
277   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
278   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
279   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout));
280   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
281   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
282 
283   // Get assembly data
284   CeedOperatorAssemblyData data;
285   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
286   CeedInt                 *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
287   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
288   CeedBasis               *active_bases;
289   CeedElemRestriction     *active_elem_rstrs;
290 
291   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
292   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
293                                                 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components));
294   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL));
295   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs));
296 
297   // Loop over all active bases
298   for (CeedInt b = 0; b < num_active_bases; b++) {
299     // Assemble point block diagonal restriction, if needed
300     CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b];
301 
302     if (is_pointblock) {
303       CeedElemRestriction point_block_elem_rstr;
304 
305       CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr));
306       diag_elem_rstr = point_block_elem_rstr;
307     }
308 
309     // Create diagonal vector
310     CeedVector elem_diag;
311     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
312 
313     // Assemble element operator diagonals
314     CeedScalar *elem_diag_array;
315     CeedInt     num_elem, num_nodes, num_qpts, num_components;
316 
317     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
318     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
319     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
320     CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes));
321     CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components));
322     CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts));
323 
324     // Basis matrices
325     const CeedScalar *interp, *grad;
326     CeedScalar       *identity      = NULL;
327     bool              has_eval_none = false;
328     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
329       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
330     }
331     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
332       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
333     }
334     if (has_eval_none) {
335       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
336       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
337     }
338     CeedCall(CeedBasisGetInterp(active_bases[b], &interp));
339     CeedCall(CeedBasisGetGrad(active_bases[b], &grad));
340     // Compute the diagonal of B^T D B
341     // Each element
342     for (CeedInt e = 0; e < num_elem; e++) {
343       CeedInt d_out = -1;
344       // Each basis eval mode pair
345       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
346         const CeedScalar *B_t = NULL;
347         if (eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out += 1;
348         CeedOperatorGetBasisPointer(eval_modes_out[b][e_out], identity, interp, &grad[d_out * num_qpts * num_nodes], &B_t);
349         CeedInt d_in = -1;
350         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
351           const CeedScalar *B = NULL;
352           if (eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in += 1;
353           CeedOperatorGetBasisPointer(eval_modes_in[b][e_in], identity, interp, &grad[d_in * num_qpts * num_nodes], &B);
354           // Each component
355           for (CeedInt c_out = 0; c_out < num_components; c_out++) {
356             // Each qpt/node pair
357             for (CeedInt q = 0; q < num_qpts; q++) {
358               if (is_pointblock) {
359                 // Point Block Diagonal
360                 for (CeedInt c_in = 0; c_in < num_components; c_in++) {
361                   const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
362                   const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
363                   for (CeedInt n = 0; n < num_nodes; n++) {
364                     elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] +=
365                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
366                   }
367                 }
368               } else {
369                 // Diagonal Only
370                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
371                 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
372                 for (CeedInt n = 0; n < num_nodes; n++) {
373                   elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
374                 }
375               }
376             }
377           }
378         }
379       }
380     }
381     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
382 
383     // Assemble local operator diagonal
384     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
385 
386     // Cleanup
387     if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
388     CeedCall(CeedVectorDestroy(&elem_diag));
389     CeedCall(CeedFree(&identity));
390   }
391   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
392   CeedCall(CeedVectorDestroy(&assembled_qf));
393 
394   return CEED_ERROR_SUCCESS;
395 }
396 
397 /**
398   @brief Core logic for assembling composite operator diagonal
399 
400   @param[in]  op            CeedOperator to assemble point block diagonal
401   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
402   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
403   @param[out] assembled     CeedVector to store assembled diagonal
404 
405   @return An error code: 0 - success, otherwise - failure
406 
407   @ref Developer
408 **/
409 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
410                                                                  CeedVector assembled) {
411   CeedInt       num_sub;
412   CeedOperator *suboperators;
413   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
414   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
415   for (CeedInt i = 0; i < num_sub; i++) {
416     if (is_pointblock) {
417       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
418     } else {
419       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
420     }
421   }
422   return CEED_ERROR_SUCCESS;
423 }
424 
425 /**
426   @brief Build nonzero pattern for non-composite operator
427 
428   Users should generally use CeedOperatorLinearAssembleSymbolic()
429 
430   @param[in]  op     CeedOperator to assemble nonzero pattern
431   @param[in]  offset Offset for number of entries
432   @param[out] rows   Row number for each entry
433   @param[out] cols   Column number for each entry
434 
435   @return An error code: 0 - success, otherwise - failure
436 
437   @ref Developer
438 **/
439 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
440   Ceed ceed;
441   bool is_composite;
442   CeedCall(CeedOperatorGetCeed(op, &ceed));
443   CeedCall(CeedOperatorIsComposite(op, &is_composite));
444 
445   if (is_composite) {
446     // LCOV_EXCL_START
447     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
448     // LCOV_EXCL_STOP
449   }
450 
451   CeedSize num_nodes;
452   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
453   CeedElemRestriction rstr_in;
454   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
455   CeedInt num_elem, elem_size, num_comp;
456   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
457   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
458   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
459   CeedInt layout_er[3];
460   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
461 
462   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
463 
464   // Determine elem_dof relation
465   CeedVector index_vec;
466   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
467   CeedScalar *array;
468   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
469   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
470   CeedCall(CeedVectorRestoreArray(index_vec, &array));
471   CeedVector elem_dof;
472   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
473   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
474   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
475   const CeedScalar *elem_dof_a;
476   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
477   CeedCall(CeedVectorDestroy(&index_vec));
478 
479   // Determine i, j locations for element matrices
480   CeedInt count = 0;
481   for (CeedInt e = 0; e < num_elem; e++) {
482     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
483       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
484         for (CeedInt i = 0; i < elem_size; i++) {
485           for (CeedInt j = 0; j < elem_size; j++) {
486             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
487             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
488 
489             const CeedInt row = elem_dof_a[elem_dof_index_row];
490             const CeedInt col = elem_dof_a[elem_dof_index_col];
491 
492             rows[offset + count] = row;
493             cols[offset + count] = col;
494             count++;
495           }
496         }
497       }
498     }
499   }
500   if (count != local_num_entries) {
501     // LCOV_EXCL_START
502     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
503     // LCOV_EXCL_STOP
504   }
505   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
506   CeedCall(CeedVectorDestroy(&elem_dof));
507 
508   return CEED_ERROR_SUCCESS;
509 }
510 
511 /**
512   @brief Assemble nonzero entries for non-composite operator
513 
514   Users should generally use CeedOperatorLinearAssemble()
515 
516   @param[in]  op     CeedOperator to assemble
517   @param[in]  offset Offset for number of entries
518   @param[out] values Values to assemble into matrix
519 
520   @return An error code: 0 - success, otherwise - failure
521 
522   @ref Developer
523 **/
524 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
525   Ceed ceed;
526   bool is_composite;
527   CeedCall(CeedOperatorGetCeed(op, &ceed));
528   CeedCall(CeedOperatorIsComposite(op, &is_composite));
529 
530   if (is_composite) {
531     // LCOV_EXCL_START
532     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
533     // LCOV_EXCL_STOP
534   }
535 
536   // Early exit for empty operator
537   {
538     CeedInt num_elem = 0;
539 
540     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
541     if (num_elem == 0) return CEED_ERROR_SUCCESS;
542   }
543 
544   if (op->LinearAssembleSingle) {
545     // Backend version
546     CeedCall(op->LinearAssembleSingle(op, offset, values));
547     return CEED_ERROR_SUCCESS;
548   } else {
549     // Operator fallback
550     CeedOperator op_fallback;
551 
552     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
553     if (op_fallback) {
554       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
555       return CEED_ERROR_SUCCESS;
556     }
557   }
558 
559   // Assemble QFunction
560   CeedQFunction qf;
561   CeedCall(CeedOperatorGetQFunction(op, &qf));
562   CeedVector          assembled_qf;
563   CeedElemRestriction rstr_q;
564   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
565   CeedSize qf_length;
566   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
567 
568   CeedInt            num_input_fields, num_output_fields;
569   CeedOperatorField *input_fields;
570   CeedOperatorField *output_fields;
571   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
572 
573   // Get assembly data
574   CeedOperatorAssemblyData data;
575   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
576   const CeedEvalMode **eval_modes_in, **eval_modes_out;
577   CeedInt             *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
578   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out,
579                                                 &eval_modes_out, NULL, NULL));
580   CeedBasis *bases;
581   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL));
582   CeedBasis basis_in = bases[0];
583 
584   if (num_active_bases > 1) {
585     // LCOV_EXCL_START
586     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases");
587     // LCOV_EXCL_STOP
588   }
589   if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) {
590     // LCOV_EXCL_START
591     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
592     // LCOV_EXCL_STOP
593   }
594 
595   CeedElemRestriction active_rstr;
596   CeedInt             num_elem, elem_size, num_qpts, num_comp;
597   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
598   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
599   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
600   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
601   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
602 
603   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
604 
605   // loop over elements and put in data structure
606   const CeedScalar *interp_in, *grad_in;
607   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
608   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
609 
610   const CeedScalar *assembled_qf_array;
611   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
612 
613   CeedInt layout_qf[3];
614   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
615   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
616 
617   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
618   const CeedScalar **B_mats_in, **B_mats_out;
619   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out));
620   const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0];
621   CeedScalar        BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]];
622   CeedScalar        elem_mat[elem_size * elem_size];
623   CeedInt           count = 0;
624   CeedScalar       *vals;
625   CeedCall(CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals));
626   for (CeedInt e = 0; e < num_elem; e++) {
627     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
628       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
629         // Compute B^T*D
630         for (CeedInt n = 0; n < elem_size; n++) {
631           for (CeedInt q = 0; q < num_qpts; q++) {
632             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
633               const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in);
634               CeedScalar    sum       = 0.0;
635               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
636                 const CeedInt b_out_index     = (num_eval_modes_out[0] * q + e_out) * elem_size + n;
637                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out;
638                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
639                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
640               }
641               BTD_mat[btd_index] = sum;
642             }
643           }
644         }
645         // form element matrix itself (for each block component)
646         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0]));
647 
648         // put element matrix in coordinate data structure
649         for (CeedInt i = 0; i < elem_size; i++) {
650           for (CeedInt j = 0; j < elem_size; j++) {
651             vals[offset + count] = elem_mat[i * elem_size + j];
652             count++;
653           }
654         }
655       }
656     }
657   }
658   if (count != local_num_entries) {
659     // LCOV_EXCL_START
660     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
661     // LCOV_EXCL_STOP
662   }
663   CeedCall(CeedVectorRestoreArray(values, &vals));
664 
665   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
666   CeedCall(CeedVectorDestroy(&assembled_qf));
667 
668   return CEED_ERROR_SUCCESS;
669 }
670 
671 /**
672   @brief Count number of entries for assembled CeedOperator
673 
674   @param[in]  op          CeedOperator to assemble
675   @param[out] num_entries Number of entries in assembled representation
676 
677   @return An error code: 0 - success, otherwise - failure
678 
679   @ref Utility
680 **/
681 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
682   bool                is_composite;
683   CeedElemRestriction rstr;
684   CeedInt             num_elem, elem_size, num_comp;
685 
686   CeedCall(CeedOperatorIsComposite(op, &is_composite));
687   if (is_composite) {
688     // LCOV_EXCL_START
689     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
690     // LCOV_EXCL_STOP
691   }
692   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
693   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
694   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
695   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
696   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
697 
698   return CEED_ERROR_SUCCESS;
699 }
700 
701 /**
702   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
703 
704   @param[in]  op_fine      Fine grid operator
705   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
706   @param[in]  rstr_coarse  Coarse grid restriction
707   @param[in]  basis_coarse Coarse grid active vector basis
708   @param[in]  basis_c_to_f Basis for coarse to fine interpolation
709   @param[out] op_coarse    Coarse grid operator
710   @param[out] op_prolong   Coarse to fine operator
711   @param[out] op_restrict  Fine to coarse operator
712 
713   @return An error code: 0 - success, otherwise - failure
714 
715   @ref Developer
716 **/
717 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
718                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
719   Ceed ceed;
720   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
721 
722   // Check for composite operator
723   bool is_composite;
724   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
725   if (is_composite) {
726     // LCOV_EXCL_START
727     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
728     // LCOV_EXCL_STOP
729   }
730 
731   // Coarse Grid
732   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
733   CeedElemRestriction rstr_fine = NULL;
734   // -- Clone input fields
735   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
736     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
737       rstr_fine = op_fine->input_fields[i]->elem_rstr;
738       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
739     } else {
740       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
741                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
742     }
743   }
744   // -- Clone output fields
745   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
746     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
747       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
748     } else {
749       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
750                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
751     }
752   }
753   // -- Clone QFunctionAssemblyData
754   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
755 
756   // Multiplicity vector
757   CeedVector mult_vec, mult_e_vec;
758   CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
759   CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
760   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
761   CeedCall(CeedVectorSetValue(mult_vec, 0.0));
762   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
763   CeedCall(CeedVectorDestroy(&mult_e_vec));
764   CeedCall(CeedVectorReciprocal(mult_vec));
765 
766   // Restriction
767   CeedInt num_comp;
768   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
769   CeedQFunction qf_restrict;
770   CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
771   CeedInt *num_comp_r_data;
772   CeedCall(CeedCalloc(1, &num_comp_r_data));
773   num_comp_r_data[0] = num_comp;
774   CeedQFunctionContext ctx_r;
775   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
776   CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
777   CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
778   CeedCall(CeedQFunctionContextDestroy(&ctx_r));
779   CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
780   CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
781   CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
782   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
783 
784   CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
785   CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
786   CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
787   CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
788 
789   // Prolongation
790   CeedQFunction qf_prolong;
791   CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
792   CeedInt *num_comp_p_data;
793   CeedCall(CeedCalloc(1, &num_comp_p_data));
794   num_comp_p_data[0] = num_comp;
795   CeedQFunctionContext ctx_p;
796   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
797   CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
798   CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
799   CeedCall(CeedQFunctionContextDestroy(&ctx_p));
800   CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
801   CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
802   CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
803   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
804 
805   CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
806   CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
807   CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
808   CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
809 
810   // Clone name
811   bool   has_name = op_fine->name;
812   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
813   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
814   {
815     char *prolongation_name;
816     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
817     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
818     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
819     CeedCall(CeedFree(&prolongation_name));
820   }
821   {
822     char *restriction_name;
823     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
824     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
825     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
826     CeedCall(CeedFree(&restriction_name));
827   }
828 
829   // Check
830   CeedCall(CeedOperatorCheckReady(*op_coarse));
831   CeedCall(CeedOperatorCheckReady(*op_prolong));
832   CeedCall(CeedOperatorCheckReady(*op_restrict));
833 
834   // Cleanup
835   CeedCall(CeedVectorDestroy(&mult_vec));
836   CeedCall(CeedBasisDestroy(&basis_c_to_f));
837   CeedCall(CeedQFunctionDestroy(&qf_restrict));
838   CeedCall(CeedQFunctionDestroy(&qf_prolong));
839 
840   return CEED_ERROR_SUCCESS;
841 }
842 
843 /**
844   @brief Build 1D mass matrix and Laplacian with perturbation
845 
846   @param[in]  interp_1d   Interpolation matrix in one dimension
847   @param[in]  grad_1d     Gradient matrix in one dimension
848   @param[in]  q_weight_1d Quadrature weights in one dimension
849   @param[in]  P_1d        Number of basis nodes in one dimension
850   @param[in]  Q_1d        Number of quadrature points in one dimension
851   @param[in]  dim         Dimension of basis
852   @param[out] mass        Assembled mass matrix in one dimension
853   @param[out] laplace     Assembled perturbed Laplacian in one dimension
854 
855   @return An error code: 0 - success, otherwise - failure
856 
857   @ref Developer
858 **/
859 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d,
860                                                       CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
861   for (CeedInt i = 0; i < P_1d; i++) {
862     for (CeedInt j = 0; j < P_1d; j++) {
863       CeedScalar sum = 0.0;
864       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
865       mass[i + j * P_1d] = sum;
866     }
867   }
868   // -- Laplacian
869   for (CeedInt i = 0; i < P_1d; i++) {
870     for (CeedInt j = 0; j < P_1d; j++) {
871       CeedScalar sum = 0.0;
872       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
873       laplace[i + j * P_1d] = sum;
874     }
875   }
876   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
877   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
878   return CEED_ERROR_SUCCESS;
879 }
880 CeedPragmaOptimizeOn;
881 
882 /// @}
883 
884 /// ----------------------------------------------------------------------------
885 /// CeedOperator Backend API
886 /// ----------------------------------------------------------------------------
887 /// @addtogroup CeedOperatorBackend
888 /// @{
889 
890 /**
891   @brief Create object holding CeedQFunction assembly data for CeedOperator
892 
893   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
894   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
895 
896   @return An error code: 0 - success, otherwise - failure
897 
898   @ref Backend
899 **/
900 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
901   CeedCall(CeedCalloc(1, data));
902   (*data)->ref_count = 1;
903   (*data)->ceed      = ceed;
904   CeedCall(CeedReference(ceed));
905 
906   return CEED_ERROR_SUCCESS;
907 }
908 
909 /**
910   @brief Increment the reference counter for a CeedQFunctionAssemblyData
911 
912   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
913 
914   @return An error code: 0 - success, otherwise - failure
915 
916   @ref Backend
917 **/
918 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
919   data->ref_count++;
920   return CEED_ERROR_SUCCESS;
921 }
922 
923 /**
924   @brief Set re-use of CeedQFunctionAssemblyData
925 
926   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
927   @param[in]     reuse_data Boolean flag indicating data re-use
928 
929   @return An error code: 0 - success, otherwise - failure
930 
931   @ref Backend
932 **/
933 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
934   data->reuse_data        = reuse_data;
935   data->needs_data_update = true;
936   return CEED_ERROR_SUCCESS;
937 }
938 
939 /**
940   @brief Mark QFunctionAssemblyData as stale
941 
942   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
943   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
944 
945   @return An error code: 0 - success, otherwise - failure
946 
947   @ref Backend
948 **/
949 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
950   data->needs_data_update = needs_data_update;
951   return CEED_ERROR_SUCCESS;
952 }
953 
954 /**
955   @brief Determine if QFunctionAssemblyData needs update
956 
957   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
958   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
959 
960   @return An error code: 0 - success, otherwise - failure
961 
962   @ref Backend
963 **/
964 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
965   *is_update_needed = !data->reuse_data || data->needs_data_update;
966   return CEED_ERROR_SUCCESS;
967 }
968 
969 /**
970   @brief Copy the pointer to a CeedQFunctionAssemblyData.
971            Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
972            Note: If `*data_copy` is non-NULL, then it is assumed that `*data_copy` is a pointer to a CeedQFunctionAssemblyData.
973              This CeedQFunctionAssemblyData will be destroyed if `*data_copy` is the only reference to this CeedQFunctionAssemblyData.
974 
975   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
976   @param[in,out] data_copy Variable to store copied reference
977 
978   @return An error code: 0 - success, otherwise - failure
979 
980   @ref Backend
981 **/
982 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
983   CeedCall(CeedQFunctionAssemblyDataReference(data));
984   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
985   *data_copy = data;
986   return CEED_ERROR_SUCCESS;
987 }
988 
989 /**
990   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
991 
992   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
993   @param[out] is_setup Boolean flag for setup status
994 
995   @return An error code: 0 - success, otherwise - failure
996 
997   @ref Backend
998 **/
999 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1000   *is_setup = data->is_setup;
1001   return CEED_ERROR_SUCCESS;
1002 }
1003 
1004 /**
1005   @brief Set internal objects for CeedQFunctionAssemblyData
1006 
1007   @param[in,out] data CeedQFunctionAssemblyData to set objects
1008   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1009   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1010 
1011   @return An error code: 0 - success, otherwise - failure
1012 
1013   @ref Backend
1014 **/
1015 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1016   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1017   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1018 
1019   data->is_setup = true;
1020   return CEED_ERROR_SUCCESS;
1021 }
1022 
1023 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1024   if (!data->is_setup) {
1025     // LCOV_EXCL_START
1026     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1027     // LCOV_EXCL_STOP
1028   }
1029 
1030   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1031   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1032 
1033   return CEED_ERROR_SUCCESS;
1034 }
1035 
1036 /**
1037   @brief Destroy CeedQFunctionAssemblyData
1038 
1039   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1040 
1041   @return An error code: 0 - success, otherwise - failure
1042 
1043   @ref Backend
1044 **/
1045 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1046   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1047 
1048   CeedCall(CeedDestroy(&(*data)->ceed));
1049   CeedCall(CeedVectorDestroy(&(*data)->vec));
1050   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1051 
1052   CeedCall(CeedFree(data));
1053   return CEED_ERROR_SUCCESS;
1054 }
1055 
1056 /**
1057   @brief Get CeedOperatorAssemblyData
1058 
1059   @param[in]  op   CeedOperator to assemble
1060   @param[out] data CeedQFunctionAssemblyData
1061 
1062   @return An error code: 0 - success, otherwise - failure
1063 
1064   @ref Backend
1065 **/
1066 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1067   if (!op->op_assembled) {
1068     CeedOperatorAssemblyData data;
1069 
1070     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1071     op->op_assembled = data;
1072   }
1073   *data = op->op_assembled;
1074 
1075   return CEED_ERROR_SUCCESS;
1076 }
1077 
1078 /**
1079   @brief Create object holding CeedOperator assembly data.
1080 
1081     The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1082     An array with references to the corresponding active CeedElemRestrictions is also stored.
1083     For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1084     The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1085       CeedEvalMode.
1086     The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1087     Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1088 
1089   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1090   @param[in]  op   CeedOperator to be assembled
1091   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1092 
1093   @return An error code: 0 - success, otherwise - failure
1094 
1095   @ref Backend
1096 **/
1097 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1098   CeedInt num_active_bases = 0;
1099 
1100   // Allocate
1101   CeedCall(CeedCalloc(1, data));
1102   (*data)->ceed = ceed;
1103   CeedCall(CeedReference(ceed));
1104 
1105   // Build OperatorAssembly data
1106   CeedQFunction       qf;
1107   CeedQFunctionField *qf_fields;
1108   CeedOperatorField  *op_fields;
1109   CeedInt             num_input_fields;
1110   CeedCall(CeedOperatorGetQFunction(op, &qf));
1111   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1112   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1113 
1114   // Determine active input basis
1115   CeedInt       *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0;
1116   CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL;
1117   CeedSize     **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1118 
1119   for (CeedInt i = 0; i < num_input_fields; i++) {
1120     CeedVector vec;
1121 
1122     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1123     if (vec == CEED_VECTOR_ACTIVE) {
1124       CeedInt      index = -1, dim = 1, num_components;
1125       CeedBasis    basis_in = NULL;
1126       CeedEvalMode eval_mode;
1127 
1128       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1129       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1130       CeedCall(CeedBasisGetNumComponents(basis_in, &num_components));
1131       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1132       for (CeedInt i = 0; i < num_active_bases; i++) {
1133         if ((*data)->active_bases[i] == basis_in) index = i;
1134       }
1135       if (index == -1) {
1136         CeedElemRestriction elem_rstr_in;
1137 
1138         index = num_active_bases;
1139         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1140         (*data)->active_bases[num_active_bases] = NULL;
1141         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases]));
1142         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1143         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1144         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1145         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases]));
1146         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1147         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1148         num_eval_modes_in[index]  = 0;
1149         num_eval_modes_out[index] = 0;
1150         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1151         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1152         eval_modes_in[index]  = NULL;
1153         eval_modes_out[index] = NULL;
1154         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1155         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1156         eval_mode_offsets_in[index]  = NULL;
1157         eval_mode_offsets_out[index] = NULL;
1158         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1159         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1160         (*data)->assembled_bases_in[index]  = NULL;
1161         (*data)->assembled_bases_out[index] = NULL;
1162         num_active_bases++;
1163       }
1164       switch (eval_mode) {
1165         case CEED_EVAL_NONE:
1166         case CEED_EVAL_INTERP:
1167           CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_modes_in[index]));
1168           CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_mode_offsets_in[index]));
1169           eval_modes_in[index][num_eval_modes_in[index]]        = eval_mode;
1170           eval_mode_offsets_in[index][num_eval_modes_in[index]] = offset;
1171           offset += num_components;
1172           num_eval_modes_in[index] += 1;
1173           break;
1174         case CEED_EVAL_GRAD:
1175           CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_modes_in[index]));
1176           CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_mode_offsets_in[index]));
1177           for (CeedInt d = 0; d < dim; d++) {
1178             eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1179             eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1180             offset += num_components;
1181           }
1182           num_eval_modes_in[index] += dim;
1183           break;
1184         case CEED_EVAL_WEIGHT:
1185         case CEED_EVAL_DIV:
1186         case CEED_EVAL_CURL:
1187           break;  // Caught by QF Assembly
1188       }
1189     }
1190   }
1191   (*data)->num_eval_modes_in    = num_eval_modes_in;
1192   (*data)->eval_modes_in        = eval_modes_in;
1193   (*data)->eval_mode_offsets_in = eval_mode_offsets_in;
1194 
1195   // Determine active output basis
1196   CeedInt num_output_fields;
1197 
1198   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1199   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1200   offset = 0;
1201   for (CeedInt i = 0; i < num_output_fields; i++) {
1202     CeedVector vec;
1203 
1204     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1205     if (vec == CEED_VECTOR_ACTIVE) {
1206       CeedInt      index = -1, dim = 1, num_components;
1207       CeedBasis    basis_out = NULL;
1208       CeedEvalMode eval_mode;
1209 
1210       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1211       CeedCall(CeedBasisGetDimension(basis_out, &dim));
1212       CeedCall(CeedBasisGetNumComponents(basis_out, &num_components));
1213       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1214       for (CeedInt i = 0; i < num_active_bases; i++) {
1215         if ((*data)->active_bases[i] == basis_out) index = i;
1216       }
1217       if (index == -1) {
1218         CeedElemRestriction elem_rstr_out;
1219 
1220         index = num_active_bases;
1221         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1222         (*data)->active_bases[num_active_bases] = NULL;
1223         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases]));
1224         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1225         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1226         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1227         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases]));
1228         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1229         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1230         num_eval_modes_in[index]  = 0;
1231         num_eval_modes_out[index] = 0;
1232         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1233         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1234         eval_modes_in[index]  = NULL;
1235         eval_modes_out[index] = NULL;
1236         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1237         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1238         eval_mode_offsets_in[index]  = NULL;
1239         eval_mode_offsets_out[index] = NULL;
1240         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1241         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1242         (*data)->assembled_bases_in[index]  = NULL;
1243         (*data)->assembled_bases_out[index] = NULL;
1244         num_active_bases++;
1245       }
1246       switch (eval_mode) {
1247         case CEED_EVAL_NONE:
1248         case CEED_EVAL_INTERP:
1249           CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_modes_out[index]));
1250           CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_mode_offsets_out[index]));
1251           eval_modes_out[index][num_eval_modes_out[index]]        = eval_mode;
1252           eval_mode_offsets_out[index][num_eval_modes_out[index]] = offset;
1253           offset += num_components;
1254           num_eval_modes_out[index] += 1;
1255           break;
1256         case CEED_EVAL_GRAD:
1257           CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_modes_out[index]));
1258           CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_mode_offsets_out[index]));
1259           for (CeedInt d = 0; d < dim; d++) {
1260             eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1261             eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1262             offset += num_components;
1263           }
1264           num_eval_modes_out[index] += dim;
1265           break;
1266         case CEED_EVAL_WEIGHT:
1267         case CEED_EVAL_DIV:
1268         case CEED_EVAL_CURL:
1269           break;  // Caught by QF Assembly
1270       }
1271     }
1272   }
1273   (*data)->num_output_components = offset;
1274   (*data)->num_eval_modes_out    = num_eval_modes_out;
1275   (*data)->eval_modes_out        = eval_modes_out;
1276   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1277   (*data)->num_active_bases      = num_active_bases;
1278 
1279   return CEED_ERROR_SUCCESS;
1280 }
1281 
1282 /**
1283   @brief Get CeedOperator CeedEvalModes for assembly.
1284 
1285     Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1286 
1287   @param[in]  data                  CeedOperatorAssemblyData
1288   @param[out] num_active_bases      Total number of active bases
1289   @param[out] num_eval_mode_in      Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1290                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1291   @param[out] eval_mode_in          Pointer to hold arrays of input CeedEvalModes, or NULL.
1292   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1293   @param[out] num_eval_mode_out     Pointer to hold array of numbers of output CeedEvalModes, or NULL
1294   @param[out] eval_mode_out         Pointer to hold arrays of output CeedEvalModes, or NULL.
1295   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1296   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1297                                       including contributions of all active bases
1298 
1299   @return An error code: 0 - success, otherwise - failure
1300 
1301   @ref Backend
1302 **/
1303 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in,
1304                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out,
1305                                          const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) {
1306   if (num_active_bases) *num_active_bases = data->num_active_bases;
1307   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1308   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1309   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1310   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1311   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1312   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1313   if (num_output_components) *num_output_components = data->num_output_components;
1314 
1315   return CEED_ERROR_SUCCESS;
1316 }
1317 
1318 /**
1319   @brief Get CeedOperator CeedBasis data for assembly.
1320 
1321     Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1322 
1323   @param[in]  data                CeedOperatorAssemblyData
1324   @param[out] num_active_bases    Number of active bases, or NULL
1325   @param[out] active_bases        Pointer to hold active CeedBasis, or NULL
1326   @param[out] assembled_bases_in  Pointer to hold assembled active input B, or NULL
1327   @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL
1328 
1329   @return An error code: 0 - success, otherwise - failure
1330 
1331   @ref Backend
1332 **/
1333 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases,
1334                                      const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) {
1335   // Assemble B_in, B_out if needed
1336   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1337     CeedInt num_qpts;
1338 
1339     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1340     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1341       CeedInt           elem_size;
1342       CeedScalar       *B_in = NULL, *identity = NULL;
1343       const CeedScalar *interp_in, *grad_in;
1344       bool              has_eval_none = false;
1345 
1346       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size));
1347       CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_in[b], &B_in));
1348 
1349       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1350         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1351       }
1352       if (has_eval_none) {
1353         CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1354         for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1355           identity[i * elem_size + i] = 1.0;
1356         }
1357       }
1358       CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_in));
1359       CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_in));
1360 
1361       for (CeedInt q = 0; q < num_qpts; q++) {
1362         for (CeedInt n = 0; n < elem_size; n++) {
1363           CeedInt d_in = -1;
1364           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1365             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1366             const CeedScalar *B  = NULL;
1367 
1368             if (data->eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in++;
1369             CeedOperatorGetBasisPointer(data->eval_modes_in[b][e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &B);
1370             B_in[(qq + e_in) * elem_size + n] = B[q * elem_size + n];
1371           }
1372         }
1373       }
1374       if (identity) CeedCall(CeedFree(identity));
1375       data->assembled_bases_in[b] = B_in;
1376     }
1377   }
1378 
1379   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1380     CeedInt num_qpts;
1381 
1382     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1383     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1384       CeedInt           elem_size;
1385       const CeedScalar *interp_out, *grad_out;
1386       bool              has_eval_none = false;
1387       CeedScalar       *B_out = NULL, *identity = NULL;
1388 
1389       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size));
1390       CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_out[b], &B_out));
1391 
1392       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1393         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1394       }
1395       if (has_eval_none) {
1396         CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1397         for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1398           identity[i * elem_size + i] = 1.0;
1399         }
1400       }
1401       CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_out));
1402       CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_out));
1403 
1404       for (CeedInt q = 0; q < num_qpts; q++) {
1405         for (CeedInt n = 0; n < elem_size; n++) {
1406           CeedInt d_out = -1;
1407           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1408             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1409             const CeedScalar *B  = NULL;
1410 
1411             if (data->eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out++;
1412             CeedOperatorGetBasisPointer(data->eval_modes_out[b][e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &B);
1413             B_out[(qq + e_out) * elem_size + n] = B[q * elem_size + n];
1414           }
1415         }
1416       }
1417       if (identity) CeedCall(CeedFree(identity));
1418       data->assembled_bases_out[b] = B_out;
1419     }
1420   }
1421 
1422   // Pass out assembled data
1423   if (active_bases) *active_bases = data->active_bases;
1424   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1425   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1426 
1427   return CEED_ERROR_SUCCESS;
1428 }
1429 
1430 /**
1431   @brief Get CeedOperator CeedBasis data for assembly.
1432 
1433   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1434 
1435   @param[in]  data                  CeedOperatorAssemblyData
1436   @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL
1437   @param[out] active_elem_rstrs     Pointer to hold active CeedElemRestrictions, or NULL
1438 
1439   @return An error code: 0 - success, otherwise - failure
1440 
1441   @ref Backend
1442 **/
1443 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs,
1444                                                 CeedElemRestriction **active_elem_rstrs) {
1445   if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases;
1446   if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs;
1447 
1448   return CEED_ERROR_SUCCESS;
1449 }
1450 
1451 /**
1452   @brief Destroy CeedOperatorAssemblyData
1453 
1454   @param[in,out] data CeedOperatorAssemblyData to destroy
1455 
1456   @return An error code: 0 - success, otherwise - failure
1457 
1458   @ref Backend
1459 **/
1460 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1461   if (!*data) return CEED_ERROR_SUCCESS;
1462 
1463   CeedCall(CeedDestroy(&(*data)->ceed));
1464   for (CeedInt b = 0; b < (*data)->num_active_bases; b++) {
1465     CeedCall(CeedBasisDestroy(&(*data)->active_bases[b]));
1466     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b]));
1467     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1468     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1469     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1470     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1471     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1472     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1473   }
1474   CeedCall(CeedFree(&(*data)->active_bases));
1475   CeedCall(CeedFree(&(*data)->active_elem_rstrs));
1476   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1477   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1478   CeedCall(CeedFree(&(*data)->eval_modes_in));
1479   CeedCall(CeedFree(&(*data)->eval_modes_out));
1480   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1481   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1482   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1483   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1484 
1485   CeedCall(CeedFree(data));
1486   return CEED_ERROR_SUCCESS;
1487 }
1488 
1489 /// @}
1490 
1491 /// ----------------------------------------------------------------------------
1492 /// CeedOperator Public API
1493 /// ----------------------------------------------------------------------------
1494 /// @addtogroup CeedOperatorUser
1495 /// @{
1496 
1497 /**
1498   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1499 
1500   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1501     The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices
1502 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the
1503 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1504 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1505 and producing the output [dv_0, dv_1, v].
1506 
1507   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1508 
1509   @param[in]  op        CeedOperator to assemble CeedQFunction
1510   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1511   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1512   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1513 
1514   @return An error code: 0 - success, otherwise - failure
1515 
1516   @ref User
1517 **/
1518 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1519   CeedCall(CeedOperatorCheckReady(op));
1520 
1521   if (op->LinearAssembleQFunction) {
1522     // Backend version
1523     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1524   } else {
1525     // Operator fallback
1526     CeedOperator op_fallback;
1527 
1528     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1529     if (op_fallback) {
1530       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1531     } else {
1532       // LCOV_EXCL_START
1533       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1534       // LCOV_EXCL_STOP
1535     }
1536   }
1537   return CEED_ERROR_SUCCESS;
1538 }
1539 
1540 /**
1541   @brief Assemble CeedQFunction and store result internally.
1542            Return copied references of stored data to the caller.
1543            Caller is responsible for ownership and destruction of the copied references.
1544            See also @ref CeedOperatorLinearAssembleQFunction
1545 
1546   @param[in]  op        CeedOperator to assemble CeedQFunction
1547   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1548   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1549   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1550 
1551   @return An error code: 0 - success, otherwise - failure
1552 
1553   @ref User
1554 **/
1555 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1556   CeedCall(CeedOperatorCheckReady(op));
1557 
1558   if (op->LinearAssembleQFunctionUpdate) {
1559     // Backend version
1560     bool                qf_assembled_is_setup;
1561     CeedVector          assembled_vec  = NULL;
1562     CeedElemRestriction assembled_rstr = NULL;
1563 
1564     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1565     if (qf_assembled_is_setup) {
1566       bool update_needed;
1567 
1568       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1569       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1570       if (update_needed) {
1571         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1572       }
1573     } else {
1574       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1575       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1576     }
1577     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1578 
1579     // Copy reference from internally held copy
1580     *assembled = NULL;
1581     *rstr      = NULL;
1582     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1583     CeedCall(CeedVectorDestroy(&assembled_vec));
1584     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1585     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1586   } else {
1587     // Operator fallback
1588     CeedOperator op_fallback;
1589 
1590     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1591     if (op_fallback) {
1592       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1593     } else {
1594       // LCOV_EXCL_START
1595       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1596       // LCOV_EXCL_STOP
1597     }
1598   }
1599 
1600   return CEED_ERROR_SUCCESS;
1601 }
1602 
1603 /**
1604   @brief Assemble the diagonal of a square linear CeedOperator
1605 
1606   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1607 
1608   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1609 
1610   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1611 
1612   @param[in]  op        CeedOperator to assemble CeedQFunction
1613   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1614   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1615 
1616   @return An error code: 0 - success, otherwise - failure
1617 
1618   @ref User
1619 **/
1620 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1621   bool is_composite;
1622   CeedCall(CeedOperatorCheckReady(op));
1623   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1624 
1625   CeedSize input_size = 0, output_size = 0;
1626   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1627   if (input_size != output_size) {
1628     // LCOV_EXCL_START
1629     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1630     // LCOV_EXCL_STOP
1631   }
1632 
1633   // Early exit for empty operator
1634   if (!is_composite) {
1635     CeedInt num_elem = 0;
1636 
1637     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1638     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1639   }
1640 
1641   if (op->LinearAssembleDiagonal) {
1642     // Backend version
1643     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1644     return CEED_ERROR_SUCCESS;
1645   } else if (op->LinearAssembleAddDiagonal) {
1646     // Backend version with zeroing first
1647     CeedCall(CeedVectorSetValue(assembled, 0.0));
1648     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1649     return CEED_ERROR_SUCCESS;
1650   } else {
1651     // Operator fallback
1652     CeedOperator op_fallback;
1653 
1654     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1655     if (op_fallback) {
1656       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1657       return CEED_ERROR_SUCCESS;
1658     }
1659   }
1660   // Default interface implementation
1661   CeedCall(CeedVectorSetValue(assembled, 0.0));
1662   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1663 
1664   return CEED_ERROR_SUCCESS;
1665 }
1666 
1667 /**
1668   @brief Assemble the diagonal of a square linear CeedOperator
1669 
1670   This sums into a CeedVector the diagonal of a linear CeedOperator.
1671 
1672   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1673 
1674   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1675 
1676   @param[in]  op        CeedOperator to assemble CeedQFunction
1677   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1678   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1679 
1680   @return An error code: 0 - success, otherwise - failure
1681 
1682   @ref User
1683 **/
1684 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1685   bool is_composite;
1686   CeedCall(CeedOperatorCheckReady(op));
1687   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1688 
1689   CeedSize input_size = 0, output_size = 0;
1690   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1691   if (input_size != output_size) {
1692     // LCOV_EXCL_START
1693     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1694     // LCOV_EXCL_STOP
1695   }
1696 
1697   // Early exit for empty operator
1698   if (!is_composite) {
1699     CeedInt num_elem = 0;
1700 
1701     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1702     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1703   }
1704 
1705   if (op->LinearAssembleAddDiagonal) {
1706     // Backend version
1707     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1708     return CEED_ERROR_SUCCESS;
1709   } else {
1710     // Operator fallback
1711     CeedOperator op_fallback;
1712 
1713     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1714     if (op_fallback) {
1715       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1716       return CEED_ERROR_SUCCESS;
1717     }
1718   }
1719   // Default interface implementation
1720   if (is_composite) {
1721     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1722   } else {
1723     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1724   }
1725 
1726   return CEED_ERROR_SUCCESS;
1727 }
1728 
1729 /**
1730   @brief Assemble the point block diagonal of a square linear CeedOperator
1731 
1732   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1733 
1734   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1735 
1736   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1737 
1738   @param[in]  op        CeedOperator to assemble CeedQFunction
1739   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1740 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1741 component in].
1742   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1743 
1744   @return An error code: 0 - success, otherwise - failure
1745 
1746   @ref User
1747 **/
1748 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1749   bool is_composite;
1750   CeedCall(CeedOperatorCheckReady(op));
1751   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1752 
1753   CeedSize input_size = 0, output_size = 0;
1754   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1755   if (input_size != output_size) {
1756     // LCOV_EXCL_START
1757     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1758     // LCOV_EXCL_STOP
1759   }
1760 
1761   // Early exit for empty operator
1762   if (!is_composite) {
1763     CeedInt num_elem = 0;
1764 
1765     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1766     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1767   }
1768 
1769   if (op->LinearAssemblePointBlockDiagonal) {
1770     // Backend version
1771     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1772     return CEED_ERROR_SUCCESS;
1773   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1774     // Backend version with zeroing first
1775     CeedCall(CeedVectorSetValue(assembled, 0.0));
1776     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1777     return CEED_ERROR_SUCCESS;
1778   } else {
1779     // Operator fallback
1780     CeedOperator op_fallback;
1781 
1782     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1783     if (op_fallback) {
1784       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1785       return CEED_ERROR_SUCCESS;
1786     }
1787   }
1788   // Default interface implementation
1789   CeedCall(CeedVectorSetValue(assembled, 0.0));
1790   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1791 
1792   return CEED_ERROR_SUCCESS;
1793 }
1794 
1795 /**
1796   @brief Assemble the point block diagonal of a square linear CeedOperator
1797 
1798   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1799 
1800   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1801 
1802   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1803 
1804   @param[in]  op        CeedOperator to assemble CeedQFunction
1805   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1806 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1807 component in].
1808   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1809 
1810   @return An error code: 0 - success, otherwise - failure
1811 
1812   @ref User
1813 **/
1814 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1815   bool is_composite;
1816   CeedCall(CeedOperatorCheckReady(op));
1817   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1818 
1819   CeedSize input_size = 0, output_size = 0;
1820   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1821   if (input_size != output_size) {
1822     // LCOV_EXCL_START
1823     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1824     // LCOV_EXCL_STOP
1825   }
1826 
1827   // Early exit for empty operator
1828   if (!is_composite) {
1829     CeedInt num_elem = 0;
1830 
1831     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1832     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1833   }
1834 
1835   if (op->LinearAssembleAddPointBlockDiagonal) {
1836     // Backend version
1837     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1838     return CEED_ERROR_SUCCESS;
1839   } else {
1840     // Operator fallback
1841     CeedOperator op_fallback;
1842 
1843     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1844     if (op_fallback) {
1845       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1846       return CEED_ERROR_SUCCESS;
1847     }
1848   }
1849   // Default interface implementation
1850   if (is_composite) {
1851     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1852   } else {
1853     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1854   }
1855 
1856   return CEED_ERROR_SUCCESS;
1857 }
1858 
1859 /**
1860    @brief Fully assemble the nonzero pattern of a linear operator.
1861 
1862    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1863 
1864    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1865 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1866 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1867 
1868    This will generally be slow unless your operator is low-order.
1869 
1870    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1871 
1872    @param[in]  op          CeedOperator to assemble
1873    @param[out] num_entries Number of entries in coordinate nonzero pattern
1874    @param[out] rows        Row number for each entry
1875    @param[out] cols        Column number for each entry
1876 
1877    @ref User
1878 **/
1879 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1880   CeedInt       num_suboperators, single_entries;
1881   CeedOperator *sub_operators;
1882   bool          is_composite;
1883   CeedCall(CeedOperatorCheckReady(op));
1884   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1885 
1886   if (op->LinearAssembleSymbolic) {
1887     // Backend version
1888     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1889     return CEED_ERROR_SUCCESS;
1890   } else {
1891     // Operator fallback
1892     CeedOperator op_fallback;
1893 
1894     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1895     if (op_fallback) {
1896       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1897       return CEED_ERROR_SUCCESS;
1898     }
1899   }
1900 
1901   // Default interface implementation
1902 
1903   // count entries and allocate rows, cols arrays
1904   *num_entries = 0;
1905   if (is_composite) {
1906     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1907     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1908     for (CeedInt k = 0; k < num_suboperators; ++k) {
1909       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1910       *num_entries += single_entries;
1911     }
1912   } else {
1913     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1914     *num_entries += single_entries;
1915   }
1916   CeedCall(CeedCalloc(*num_entries, rows));
1917   CeedCall(CeedCalloc(*num_entries, cols));
1918 
1919   // assemble nonzero locations
1920   CeedInt offset = 0;
1921   if (is_composite) {
1922     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1923     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1924     for (CeedInt k = 0; k < num_suboperators; ++k) {
1925       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1926       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1927       offset += single_entries;
1928     }
1929   } else {
1930     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1931   }
1932 
1933   return CEED_ERROR_SUCCESS;
1934 }
1935 
1936 /**
1937    @brief Fully assemble the nonzero entries of a linear operator.
1938 
1939    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1940 
1941    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1942 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1943 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1944 
1945    This will generally be slow unless your operator is low-order.
1946 
1947    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1948 
1949    @param[in]  op     CeedOperator to assemble
1950    @param[out] values Values to assemble into matrix
1951 
1952    @ref User
1953 **/
1954 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1955   CeedInt       num_suboperators, single_entries = 0;
1956   CeedOperator *sub_operators;
1957   bool          is_composite;
1958   CeedCall(CeedOperatorCheckReady(op));
1959   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1960 
1961   // Early exit for empty operator
1962   if (!is_composite) {
1963     CeedInt num_elem = 0;
1964 
1965     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1966     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1967   }
1968 
1969   if (op->LinearAssemble) {
1970     // Backend version
1971     CeedCall(op->LinearAssemble(op, values));
1972     return CEED_ERROR_SUCCESS;
1973   } else {
1974     // Operator fallback
1975     CeedOperator op_fallback;
1976 
1977     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1978     if (op_fallback) {
1979       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
1980       return CEED_ERROR_SUCCESS;
1981     }
1982   }
1983 
1984   // Default interface implementation
1985   CeedInt offset = 0;
1986   if (is_composite) {
1987     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1988     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1989     for (CeedInt k = 0; k < num_suboperators; k++) {
1990       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
1991       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1992       offset += single_entries;
1993     }
1994   } else {
1995     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
1996   }
1997 
1998   return CEED_ERROR_SUCCESS;
1999 }
2000 
2001 /**
2002   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2003 
2004   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2005 
2006   @param[in]  op               Composite CeedOperator
2007   @param[in]  num_skip_indices Number of suboperators to skip
2008   @param[in]  skip_indices     Array of indices of suboperators to skip
2009   @param[out] mult             Vector to store multiplicity (of size l_size)
2010 
2011   @return An error code: 0 - success, otherwise - failure
2012 
2013   @ref User
2014 **/
2015 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2016   CeedCall(CeedOperatorCheckReady(op));
2017 
2018   Ceed                ceed;
2019   CeedInt             num_suboperators;
2020   CeedSize            l_vec_len;
2021   CeedScalar         *mult_array;
2022   CeedVector          ones_l_vec;
2023   CeedElemRestriction elem_rstr;
2024   CeedOperator       *sub_operators;
2025 
2026   CeedCall(CeedOperatorGetCeed(op, &ceed));
2027 
2028   // Zero mult vector
2029   CeedCall(CeedVectorSetValue(mult, 0.0));
2030 
2031   // Get suboperators
2032   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2033   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2034   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2035 
2036   // Work vector
2037   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2038   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2039   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2040   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2041 
2042   // Compute multiplicity across suboperators
2043   for (CeedInt i = 0; i < num_suboperators; i++) {
2044     const CeedScalar *sub_mult_array;
2045     CeedVector        sub_mult_l_vec, ones_e_vec;
2046 
2047     // -- Check for suboperator to skip
2048     for (CeedInt j = 0; j < num_skip_indices; j++) {
2049       if (skip_indices[j] == i) continue;
2050     }
2051 
2052     // -- Sub operator multiplicity
2053     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2054     CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2055     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2056     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2057     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2058     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2059     // ---- Flag every node present in the current suboperator
2060     for (CeedInt j = 0; j < l_vec_len; j++) {
2061       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2062     }
2063     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2064     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2065     CeedCall(CeedVectorDestroy(&ones_e_vec));
2066   }
2067   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2068   CeedCall(CeedVectorDestroy(&ones_l_vec));
2069 
2070   return CEED_ERROR_SUCCESS;
2071 }
2072 
2073 /**
2074   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2075 grid interpolation
2076 
2077   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2078 
2079   @param[in]  op_fine      Fine grid operator
2080   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
2081   @param[in]  rstr_coarse  Coarse grid restriction
2082   @param[in]  basis_coarse Coarse grid active vector basis
2083   @param[out] op_coarse    Coarse grid operator
2084   @param[out] op_prolong   Coarse to fine operator
2085   @param[out] op_restrict  Fine to coarse operator
2086 
2087   @return An error code: 0 - success, otherwise - failure
2088 
2089   @ref User
2090 **/
2091 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2092                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2093   CeedCall(CeedOperatorCheckReady(op_fine));
2094 
2095   // Build prolongation matrix
2096   CeedBasis basis_fine, basis_c_to_f;
2097   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2098   CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2099 
2100   // Core code
2101   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2102 
2103   return CEED_ERROR_SUCCESS;
2104 }
2105 
2106 /**
2107   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2108 
2109   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2110 
2111   @param[in]  op_fine       Fine grid operator
2112   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
2113   @param[in]  rstr_coarse   Coarse grid restriction
2114   @param[in]  basis_coarse  Coarse grid active vector basis
2115   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
2116   @param[out] op_coarse     Coarse grid operator
2117   @param[out] op_prolong    Coarse to fine operator
2118   @param[out] op_restrict   Fine to coarse operator
2119 
2120   @return An error code: 0 - success, otherwise - failure
2121 
2122   @ref User
2123 **/
2124 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2125                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2126                                              CeedOperator *op_restrict) {
2127   CeedCall(CeedOperatorCheckReady(op_fine));
2128   Ceed ceed;
2129   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2130 
2131   // Check for compatible quadrature spaces
2132   CeedBasis basis_fine;
2133   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2134   CeedInt Q_f, Q_c;
2135   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2136   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2137   if (Q_f != Q_c) {
2138     // LCOV_EXCL_START
2139     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2140     // LCOV_EXCL_STOP
2141   }
2142 
2143   // Coarse to fine basis
2144   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2145   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2146   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2147   CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2148   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2149   P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2150   CeedScalar *q_ref, *q_weight, *grad;
2151   CeedCall(CeedCalloc(P_1d_f, &q_ref));
2152   CeedCall(CeedCalloc(P_1d_f, &q_weight));
2153   CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2154   CeedBasis basis_c_to_f;
2155   CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2156   CeedCall(CeedFree(&q_ref));
2157   CeedCall(CeedFree(&q_weight));
2158   CeedCall(CeedFree(&grad));
2159 
2160   // Core code
2161   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2162   return CEED_ERROR_SUCCESS;
2163 }
2164 
2165 /**
2166   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2167 
2168   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2169 
2170   @param[in]  op_fine       Fine grid operator
2171   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
2172   @param[in]  rstr_coarse   Coarse grid restriction
2173   @param[in]  basis_coarse  Coarse grid active vector basis
2174   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
2175   @param[out] op_coarse     Coarse grid operator
2176   @param[out] op_prolong    Coarse to fine operator
2177   @param[out] op_restrict   Fine to coarse operator
2178 
2179   @return An error code: 0 - success, otherwise - failure
2180 
2181   @ref User
2182 **/
2183 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2184                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2185                                        CeedOperator *op_restrict) {
2186   CeedCall(CeedOperatorCheckReady(op_fine));
2187   Ceed ceed;
2188   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2189 
2190   // Check for compatible quadrature spaces
2191   CeedBasis basis_fine;
2192   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2193   CeedInt Q_f, Q_c;
2194   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2195   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2196   if (Q_f != Q_c) {
2197     // LCOV_EXCL_START
2198     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2199     // LCOV_EXCL_STOP
2200   }
2201 
2202   // Coarse to fine basis
2203   CeedElemTopology topo;
2204   CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2205   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2206   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2207   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2208   CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2209   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2210   CeedScalar *q_ref, *q_weight, *grad;
2211   CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2212   CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2213   CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2214   CeedBasis basis_c_to_f;
2215   CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2216   CeedCall(CeedFree(&q_ref));
2217   CeedCall(CeedFree(&q_weight));
2218   CeedCall(CeedFree(&grad));
2219 
2220   // Core code
2221   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2222   return CEED_ERROR_SUCCESS;
2223 }
2224 
2225 /**
2226   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2227 
2228   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2229     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V.
2230     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T
2231 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
2232 
2233   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2234 
2235   @param[in]  op      CeedOperator to create element inverses
2236   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2237   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2238 
2239   @return An error code: 0 - success, otherwise - failure
2240 
2241   @ref User
2242 **/
2243 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2244   CeedCall(CeedOperatorCheckReady(op));
2245 
2246   if (op->CreateFDMElementInverse) {
2247     // Backend version
2248     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2249     return CEED_ERROR_SUCCESS;
2250   } else {
2251     // Operator fallback
2252     CeedOperator op_fallback;
2253 
2254     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2255     if (op_fallback) {
2256       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2257       return CEED_ERROR_SUCCESS;
2258     }
2259   }
2260 
2261   // Default interface implementation
2262   Ceed ceed, ceed_parent;
2263   CeedCall(CeedOperatorGetCeed(op, &ceed));
2264   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2265   ceed_parent = ceed_parent ? ceed_parent : ceed;
2266   CeedQFunction qf;
2267   CeedCall(CeedOperatorGetQFunction(op, &qf));
2268 
2269   // Determine active input basis
2270   bool                interp = false, grad = false;
2271   CeedBasis           basis = NULL;
2272   CeedElemRestriction rstr  = NULL;
2273   CeedOperatorField  *op_fields;
2274   CeedQFunctionField *qf_fields;
2275   CeedInt             num_input_fields;
2276   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2277   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2278   for (CeedInt i = 0; i < num_input_fields; i++) {
2279     CeedVector vec;
2280     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2281     if (vec == CEED_VECTOR_ACTIVE) {
2282       CeedEvalMode eval_mode;
2283       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2284       interp = interp || eval_mode == CEED_EVAL_INTERP;
2285       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2286       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2287       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2288     }
2289   }
2290   if (!basis) {
2291     // LCOV_EXCL_START
2292     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2293     // LCOV_EXCL_STOP
2294   }
2295   CeedSize l_size = 1;
2296   CeedInt  P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2297   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2298   CeedCall(CeedBasisGetNumNodes(basis, &elem_size));
2299   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2300   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2301   CeedCall(CeedBasisGetDimension(basis, &dim));
2302   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2303   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2304   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2305 
2306   // Build and diagonalize 1D Mass and Laplacian
2307   bool tensor_basis;
2308   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2309   if (!tensor_basis) {
2310     // LCOV_EXCL_START
2311     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2312     // LCOV_EXCL_STOP
2313   }
2314   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2315   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2316   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2317   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2318   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2319   CeedCall(CeedCalloc(P_1d, &lambda));
2320   // -- Build matrices
2321   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2322   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2323   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2324   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2325   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2326 
2327   // -- Diagonalize
2328   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2329   CeedCall(CeedFree(&mass));
2330   CeedCall(CeedFree(&laplace));
2331   for (CeedInt i = 0; i < P_1d; i++) {
2332     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2333   }
2334   CeedCall(CeedFree(&x));
2335 
2336   // Assemble QFunction
2337   CeedVector          assembled;
2338   CeedElemRestriction rstr_qf;
2339   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2340   CeedInt layout[3];
2341   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2342   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2343   CeedScalar max_norm = 0;
2344   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2345 
2346   // Calculate element averages
2347   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2348   CeedScalar       *elem_avg;
2349   const CeedScalar *assembled_array, *q_weight_array;
2350   CeedVector        q_weight;
2351   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2352   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2353   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2354   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2355   CeedCall(CeedCalloc(num_elem, &elem_avg));
2356   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2357   for (CeedInt e = 0; e < num_elem; e++) {
2358     CeedInt count = 0;
2359     for (CeedInt q = 0; q < num_qpts; q++) {
2360       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2361         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2362           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2363           count++;
2364         }
2365       }
2366     }
2367     if (count) {
2368       elem_avg[e] /= count;
2369     } else {
2370       elem_avg[e] = 1.0;
2371     }
2372   }
2373   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2374   CeedCall(CeedVectorDestroy(&assembled));
2375   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2376   CeedCall(CeedVectorDestroy(&q_weight));
2377 
2378   // Build FDM diagonal
2379   CeedVector  q_data;
2380   CeedScalar *q_data_array, *fdm_diagonal;
2381   CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal));
2382   const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON;
2383   for (CeedInt c = 0; c < num_comp; c++) {
2384     for (CeedInt n = 0; n < elem_size; n++) {
2385       if (interp) fdm_diagonal[c * elem_size + n] = 1.0;
2386       if (grad) {
2387         for (CeedInt d = 0; d < dim; d++) {
2388           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2389           fdm_diagonal[c * elem_size + n] += lambda[i];
2390         }
2391       }
2392       if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound;
2393     }
2394   }
2395   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data));
2396   CeedCall(CeedVectorSetValue(q_data, 0.0));
2397   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2398   for (CeedInt e = 0; e < num_elem; e++) {
2399     for (CeedInt c = 0; c < num_comp; c++) {
2400       for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]);
2401     }
2402   }
2403   CeedCall(CeedFree(&elem_avg));
2404   CeedCall(CeedFree(&fdm_diagonal));
2405   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2406 
2407   // Setup FDM operator
2408   // -- Basis
2409   CeedBasis   fdm_basis;
2410   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2411   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2412   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2413   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2414   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2415   CeedCall(CeedFree(&fdm_interp));
2416   CeedCall(CeedFree(&grad_dummy));
2417   CeedCall(CeedFree(&q_ref_dummy));
2418   CeedCall(CeedFree(&q_weight_dummy));
2419   CeedCall(CeedFree(&lambda));
2420 
2421   // -- Restriction
2422   CeedElemRestriction rstr_qd_i;
2423   CeedInt             strides[3] = {1, elem_size, elem_size * num_comp};
2424   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i));
2425   // -- QFunction
2426   CeedQFunction qf_fdm;
2427   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2428   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2429   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2430   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2431   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2432   // -- QFunction context
2433   CeedInt *num_comp_data;
2434   CeedCall(CeedCalloc(1, &num_comp_data));
2435   num_comp_data[0] = num_comp;
2436   CeedQFunctionContext ctx_fdm;
2437   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2438   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2439   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2440   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2441   // -- Operator
2442   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2443   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2444   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2445   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2446 
2447   // Cleanup
2448   CeedCall(CeedVectorDestroy(&q_data));
2449   CeedCall(CeedBasisDestroy(&fdm_basis));
2450   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2451   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2452 
2453   return CEED_ERROR_SUCCESS;
2454 }
2455 
2456 /// @}
2457