xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision f30b1135883847d0dd65c557f1d0ec6354b82b2b)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  basis     CeedBasis from which to get the basis matrix
184   @param[in]  eval_mode Current basis evaluation mode
185   @param[in]  identity  Pointer to identity matrix
186   @param[out] basis_ptr Basis pointer to set
187 
188   @ref Developer
189 **/
190 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
191   switch (eval_mode) {
192     case CEED_EVAL_NONE:
193       *basis_ptr = identity;
194       break;
195     case CEED_EVAL_INTERP:
196       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
197       break;
198     case CEED_EVAL_GRAD:
199       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
200       break;
201     case CEED_EVAL_DIV:
202       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
203       break;
204     case CEED_EVAL_CURL:
205       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
206       break;
207     case CEED_EVAL_WEIGHT:
208       break;  // Caught by QF Assembly
209   }
210   assert(*basis_ptr != NULL);
211 
212   return CEED_ERROR_SUCCESS;
213 }
214 
215 /**
216   @brief Create point block restriction for active operator field
217 
218   @param[in]  rstr            Original CeedElemRestriction for active field
219   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
220 
221   @return An error code: 0 - success, otherwise - failure
222 
223   @ref Developer
224 **/
225 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
226   Ceed ceed;
227   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
228   const CeedInt *offsets;
229   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
230 
231   // Expand offsets
232   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
233   CeedSize l_size;
234   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
235   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
236   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
237   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
238   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
239   CeedInt shift = num_comp;
240   if (comp_stride != 1) shift *= num_comp;
241   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
242   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
243     pointblock_offsets[i] = offsets[i] * shift;
244   }
245 
246   // Create new restriction
247   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
248                                      pointblock_offsets, pointblock_rstr));
249 
250   // Cleanup
251   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
252 
253   return CEED_ERROR_SUCCESS;
254 }
255 
256 /**
257   @brief Core logic for assembling operator diagonal or point block diagonal
258 
259   @param[in]  op            CeedOperator to assemble point block diagonal
260   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
261   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
262   @param[out] assembled     CeedVector to store assembled diagonal
263 
264   @return An error code: 0 - success, otherwise - failure
265 
266   @ref Developer
267 **/
268 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
269   Ceed ceed;
270   CeedCall(CeedOperatorGetCeed(op, &ceed));
271 
272   // Assemble QFunction
273   CeedQFunction       qf;
274   const CeedScalar   *assembled_qf_array;
275   CeedVector          assembled_qf;
276   CeedElemRestriction assembled_elem_rstr;
277   CeedInt             num_input_fields, num_output_fields;
278   CeedInt             layout[3];
279 
280   CeedCall(CeedOperatorGetQFunction(op, &qf));
281   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
282   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
283   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout));
284   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
285   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
286 
287   // Get assembly data
288   CeedOperatorAssemblyData data;
289   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
290   CeedInt                 *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
291   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
292   CeedBasis               *active_bases;
293   CeedElemRestriction     *active_elem_rstrs;
294   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
295   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
296                                                 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components));
297   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL));
298   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs));
299 
300   // Loop over all active bases
301   for (CeedInt b = 0; b < num_active_bases; b++) {
302     // Assemble point block diagonal restriction, if needed
303     CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b];
304 
305     if (is_pointblock) {
306       CeedElemRestriction point_block_elem_rstr;
307 
308       CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr));
309       diag_elem_rstr = point_block_elem_rstr;
310     }
311 
312     // Create diagonal vector
313     CeedVector elem_diag;
314     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
315 
316     // Assemble element operator diagonals
317     CeedScalar *elem_diag_array;
318     CeedInt     num_elem, num_nodes, num_qpts, num_components;
319 
320     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
321     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
322     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
323     CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes));
324     CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components));
325     CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts));
326 
327     // Construct identity matrix for basis if required
328     bool        has_eval_none = false;
329     CeedScalar *identity      = NULL;
330     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
331       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
332     }
333     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
334       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
335     }
336     if (has_eval_none) {
337       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
338       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
339     }
340 
341     // Compute the diagonal of B^T D B
342     // Each element
343     for (CeedInt e = 0; e < num_elem; e++) {
344       // Each basis eval mode pair
345       CeedInt      d_out              = 0, q_comp_out;
346       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
347       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
348         const CeedScalar *B_t = NULL;
349         CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t);
350         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out));
351         if (q_comp_out > 1) {
352           if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
353           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
354         }
355         eval_mode_out_prev = eval_modes_out[b][e_out];
356 
357         CeedInt      d_in              = 0, q_comp_in;
358         CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
359         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
360           const CeedScalar *B = NULL;
361           CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B);
362           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in));
363           if (q_comp_in > 1) {
364             if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
365             else B = &B[(++d_in) * num_qpts * num_nodes];
366           }
367           eval_mode_in_prev = eval_modes_in[b][e_in];
368 
369           // Each component
370           for (CeedInt c_out = 0; c_out < num_components; c_out++) {
371             // Each qpt/node pair
372             for (CeedInt q = 0; q < num_qpts; q++) {
373               if (is_pointblock) {
374                 // Point Block Diagonal
375                 for (CeedInt c_in = 0; c_in < num_components; c_in++) {
376                   const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
377                   const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
378                   for (CeedInt n = 0; n < num_nodes; n++) {
379                     elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] +=
380                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
381                   }
382                 }
383               } else {
384                 // Diagonal Only
385                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
386                 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
387                 for (CeedInt n = 0; n < num_nodes; n++) {
388                   elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
389                 }
390               }
391             }
392           }
393         }
394       }
395     }
396     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
397 
398     // Assemble local operator diagonal
399     CeedCall(CeedElemRestrictionApplyUnsigned(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
400 
401     // Cleanup
402     if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
403     CeedCall(CeedVectorDestroy(&elem_diag));
404     CeedCall(CeedFree(&identity));
405   }
406   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
407   CeedCall(CeedVectorDestroy(&assembled_qf));
408 
409   return CEED_ERROR_SUCCESS;
410 }
411 
412 /**
413   @brief Core logic for assembling composite operator diagonal
414 
415   @param[in]  op            CeedOperator to assemble point block diagonal
416   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
417   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
418   @param[out] assembled     CeedVector to store assembled diagonal
419 
420   @return An error code: 0 - success, otherwise - failure
421 
422   @ref Developer
423 **/
424 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
425                                                                  CeedVector assembled) {
426   CeedInt       num_sub;
427   CeedOperator *suboperators;
428   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
429   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
430   for (CeedInt i = 0; i < num_sub; i++) {
431     if (is_pointblock) {
432       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
433     } else {
434       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
435     }
436   }
437   return CEED_ERROR_SUCCESS;
438 }
439 
440 /**
441   @brief Build nonzero pattern for non-composite operator
442 
443   Users should generally use CeedOperatorLinearAssembleSymbolic()
444 
445   Note: For operators using oriented element restrictions, entries in rows or cols may be negative indicating the assembled value at this nonzero should be negated
446 
447   @param[in]  op     CeedOperator to assemble nonzero pattern
448   @param[in]  offset Offset for number of entries
449   @param[out] rows   Row number for each entry
450   @param[out] cols   Column number for each entry
451 
452   @return An error code: 0 - success, otherwise - failure
453 
454   @ref Developer
455 **/
456 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
457   Ceed ceed;
458   bool is_composite;
459   CeedCall(CeedOperatorGetCeed(op, &ceed));
460   CeedCall(CeedOperatorIsComposite(op, &is_composite));
461 
462   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
463 
464   CeedSize num_nodes;
465   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
466   CeedElemRestriction rstr_in;
467   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
468   CeedInt num_elem, elem_size, num_comp;
469   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
470   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
471   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
472   CeedInt layout_er[3];
473   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
474 
475   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
476 
477   // Determine elem_dof relation
478   CeedVector index_vec;
479   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
480   CeedScalar *array;
481   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
482   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
483   CeedCall(CeedVectorRestoreArray(index_vec, &array));
484   CeedVector elem_dof;
485   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
486   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
487   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
488   const CeedScalar *elem_dof_a;
489   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
490   CeedCall(CeedVectorDestroy(&index_vec));
491 
492   // Determine i, j locations for element matrices
493   CeedInt count = 0;
494   for (CeedInt e = 0; e < num_elem; e++) {
495     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
496       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
497         for (CeedInt i = 0; i < elem_size; i++) {
498           for (CeedInt j = 0; j < elem_size; j++) {
499             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
500             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
501 
502             const CeedInt row = elem_dof_a[elem_dof_index_row];
503             const CeedInt col = elem_dof_a[elem_dof_index_col];
504 
505             rows[offset + count] = row;
506             cols[offset + count] = col;
507             count++;
508           }
509         }
510       }
511     }
512   }
513   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
514   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
515   CeedCall(CeedVectorDestroy(&elem_dof));
516 
517   return CEED_ERROR_SUCCESS;
518 }
519 
520 /**
521   @brief Assemble nonzero entries for non-composite operator
522 
523   Users should generally use CeedOperatorLinearAssemble()
524 
525   @param[in]  op     CeedOperator to assemble
526   @param[in]  offset Offset for number of entries
527   @param[out] values Values to assemble into matrix
528 
529   @return An error code: 0 - success, otherwise - failure
530 
531   @ref Developer
532 **/
533 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
534   Ceed ceed;
535   bool is_composite;
536   CeedCall(CeedOperatorGetCeed(op, &ceed));
537   CeedCall(CeedOperatorIsComposite(op, &is_composite));
538 
539   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
540 
541   // Early exit for empty operator
542   {
543     CeedInt num_elem = 0;
544 
545     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
546     if (num_elem == 0) return CEED_ERROR_SUCCESS;
547   }
548 
549   if (op->LinearAssembleSingle) {
550     // Backend version
551     CeedCall(op->LinearAssembleSingle(op, offset, values));
552     return CEED_ERROR_SUCCESS;
553   } else {
554     // Operator fallback
555     CeedOperator op_fallback;
556 
557     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
558     if (op_fallback) {
559       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
560       return CEED_ERROR_SUCCESS;
561     }
562   }
563 
564   // Assemble QFunction
565   CeedQFunction qf;
566   CeedCall(CeedOperatorGetQFunction(op, &qf));
567   CeedVector          assembled_qf;
568   CeedElemRestriction rstr_q;
569   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
570   CeedSize qf_length;
571   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
572 
573   CeedInt            num_input_fields, num_output_fields;
574   CeedOperatorField *input_fields;
575   CeedOperatorField *output_fields;
576   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
577 
578   // Get assembly data
579   CeedOperatorAssemblyData data;
580   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
581   const CeedEvalMode **eval_modes_in, **eval_modes_out;
582   CeedInt             *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
583   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out,
584                                                 &eval_modes_out, NULL, NULL));
585   CeedBasis *bases;
586   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL));
587   CeedBasis basis_in = bases[0];
588 
589   CeedCheck(num_active_bases == 1, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases");
590   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
591 
592   CeedElemRestriction active_rstr;
593   CeedInt             num_elem, elem_size, num_qpts, num_comp;
594   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
595   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
596   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
597   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
598   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
599 
600   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
601 
602   // loop over elements and put in data structure
603   const CeedScalar *assembled_qf_array;
604   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
605 
606   CeedInt layout_qf[3];
607   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
608   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
609 
610   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
611   const CeedScalar **B_mats_in, **B_mats_out;
612   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out));
613   const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0];
614   CeedScalar        BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]];
615   CeedScalar        elem_mat[elem_size * elem_size];
616   CeedInt           count = 0;
617   CeedScalar       *vals;
618   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
619   for (CeedInt e = 0; e < num_elem; e++) {
620     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
621       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
622         // Compute B^T*D
623         for (CeedInt n = 0; n < elem_size; n++) {
624           for (CeedInt q = 0; q < num_qpts; q++) {
625             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
626               const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in);
627               CeedScalar    sum       = 0.0;
628               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
629                 const CeedInt b_out_index     = (num_eval_modes_out[0] * q + e_out) * elem_size + n;
630                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out;
631                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
632                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
633               }
634               BTD_mat[btd_index] = sum;
635             }
636           }
637         }
638         // form element matrix itself (for each block component)
639         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0]));
640 
641         // put element matrix in coordinate data structure
642         for (CeedInt i = 0; i < elem_size; i++) {
643           for (CeedInt j = 0; j < elem_size; j++) {
644             vals[offset + count] = elem_mat[i * elem_size + j];
645             count++;
646           }
647         }
648       }
649     }
650   }
651   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries");
652   CeedCall(CeedVectorRestoreArray(values, &vals));
653 
654   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
655   CeedCall(CeedVectorDestroy(&assembled_qf));
656 
657   return CEED_ERROR_SUCCESS;
658 }
659 
660 /**
661   @brief Count number of entries for assembled CeedOperator
662 
663   @param[in]  op          CeedOperator to assemble
664   @param[out] num_entries Number of entries in assembled representation
665 
666   @return An error code: 0 - success, otherwise - failure
667 
668   @ref Utility
669 **/
670 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
671   bool                is_composite;
672   CeedElemRestriction rstr;
673   CeedInt             num_elem, elem_size, num_comp;
674 
675   CeedCall(CeedOperatorIsComposite(op, &is_composite));
676   CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
677   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
678   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
679   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
680   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
681   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
682 
683   return CEED_ERROR_SUCCESS;
684 }
685 
686 /**
687   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
688 
689   @param[in]  op_fine      Fine grid operator
690   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
691   @param[in]  rstr_coarse  Coarse grid restriction
692   @param[in]  basis_coarse Coarse grid active vector basis
693   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
694   @param[out] op_coarse    Coarse grid operator
695   @param[out] op_prolong   Coarse to fine operator, or NULL
696   @param[out] op_restrict  Fine to coarse operator, or NULL
697 
698   @return An error code: 0 - success, otherwise - failure
699 
700   @ref Developer
701 **/
702 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
703                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
704   Ceed       ceed;
705   CeedVector mult_vec = NULL;
706   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
707 
708   // Check for composite operator
709   bool is_composite;
710   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
711   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
712 
713   // Coarse Grid
714   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
715   CeedElemRestriction rstr_fine = NULL;
716   // -- Clone input fields
717   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
718     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
719       rstr_fine = op_fine->input_fields[i]->elem_rstr;
720       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
721     } else {
722       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
723                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
724     }
725   }
726   // -- Clone output fields
727   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
728     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
729       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
730     } else {
731       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
732                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
733     }
734   }
735   // -- Clone QFunctionAssemblyData
736   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
737 
738   // Multiplicity vector
739   if (op_restrict || op_prolong) {
740     CeedVector mult_e_vec;
741 
742     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
743     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
744     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
745     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
746     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
747     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
748     CeedCall(CeedVectorDestroy(&mult_e_vec));
749     CeedCall(CeedVectorReciprocal(mult_vec));
750   }
751 
752   // Clone name
753   bool   has_name = op_fine->name;
754   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
755   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
756 
757   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
758   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
759             "Prolongation or restriction operator creation requires coarse-to-fine basis");
760 
761   // Restriction/Prolongation Operators
762   CeedInt num_comp;
763   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
764 
765   // Restriction
766   if (op_restrict) {
767     CeedInt             *num_comp_r_data;
768     CeedQFunction        qf_restrict;
769     CeedQFunctionContext ctx_r;
770 
771     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
772     CeedCall(CeedCalloc(1, &num_comp_r_data));
773     num_comp_r_data[0] = num_comp;
774     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
775     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
776     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
777     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
778     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
779     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
780     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
781     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
782 
783     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
784     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
785     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
786     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
787 
788     // Set name
789     char *restriction_name;
790     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
791     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
792     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
793     CeedCall(CeedFree(&restriction_name));
794 
795     // Check
796     CeedCall(CeedOperatorCheckReady(*op_restrict));
797 
798     // Cleanup
799     CeedCall(CeedQFunctionDestroy(&qf_restrict));
800   }
801 
802   // Prolongation
803   if (op_prolong) {
804     CeedInt             *num_comp_p_data;
805     CeedQFunction        qf_prolong;
806     CeedQFunctionContext ctx_p;
807 
808     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
809     CeedCall(CeedCalloc(1, &num_comp_p_data));
810     num_comp_p_data[0] = num_comp;
811     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
812     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
813     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
814     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
815     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
816     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
817     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
818     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
819 
820     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
821     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
822     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
823     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
824 
825     // Set name
826     char *prolongation_name;
827     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
828     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
829     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
830     CeedCall(CeedFree(&prolongation_name));
831 
832     // Check
833     CeedCall(CeedOperatorCheckReady(*op_prolong));
834 
835     // Cleanup
836     CeedCall(CeedQFunctionDestroy(&qf_prolong));
837   }
838 
839   // Check
840   CeedCall(CeedOperatorCheckReady(*op_coarse));
841 
842   // Cleanup
843   CeedCall(CeedVectorDestroy(&mult_vec));
844   CeedCall(CeedBasisDestroy(&basis_c_to_f));
845 
846   return CEED_ERROR_SUCCESS;
847 }
848 
849 /**
850   @brief Build 1D mass matrix and Laplacian with perturbation
851 
852   @param[in]  interp_1d   Interpolation matrix in one dimension
853   @param[in]  grad_1d     Gradient matrix in one dimension
854   @param[in]  q_weight_1d Quadrature weights in one dimension
855   @param[in]  P_1d        Number of basis nodes in one dimension
856   @param[in]  Q_1d        Number of quadrature points in one dimension
857   @param[in]  dim         Dimension of basis
858   @param[out] mass        Assembled mass matrix in one dimension
859   @param[out] laplace     Assembled perturbed Laplacian in one dimension
860 
861   @return An error code: 0 - success, otherwise - failure
862 
863   @ref Developer
864 **/
865 CeedPragmaOptimizeOff
866 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
867                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
868   for (CeedInt i = 0; i < P_1d; i++) {
869     for (CeedInt j = 0; j < P_1d; j++) {
870       CeedScalar sum = 0.0;
871       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
872       mass[i + j * P_1d] = sum;
873     }
874   }
875   // -- Laplacian
876   for (CeedInt i = 0; i < P_1d; i++) {
877     for (CeedInt j = 0; j < P_1d; j++) {
878       CeedScalar sum = 0.0;
879       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
880       laplace[i + j * P_1d] = sum;
881     }
882   }
883   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
884   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
885   return CEED_ERROR_SUCCESS;
886 }
887 CeedPragmaOptimizeOn
888 
889 /// @}
890 
891 /// ----------------------------------------------------------------------------
892 /// CeedOperator Backend API
893 /// ----------------------------------------------------------------------------
894 /// @addtogroup CeedOperatorBackend
895 /// @{
896 
897 /**
898   @brief Create object holding CeedQFunction assembly data for CeedOperator
899 
900   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
901   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
902 
903   @return An error code: 0 - success, otherwise - failure
904 
905   @ref Backend
906 **/
907 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
908   CeedCall(CeedCalloc(1, data));
909   (*data)->ref_count = 1;
910   (*data)->ceed      = ceed;
911   CeedCall(CeedReference(ceed));
912 
913   return CEED_ERROR_SUCCESS;
914 }
915 
916 /**
917   @brief Increment the reference counter for a CeedQFunctionAssemblyData
918 
919   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
920 
921   @return An error code: 0 - success, otherwise - failure
922 
923   @ref Backend
924 **/
925 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
926   data->ref_count++;
927   return CEED_ERROR_SUCCESS;
928 }
929 
930 /**
931   @brief Set re-use of CeedQFunctionAssemblyData
932 
933   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
934   @param[in]     reuse_data Boolean flag indicating data re-use
935 
936   @return An error code: 0 - success, otherwise - failure
937 
938   @ref Backend
939 **/
940 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
941   data->reuse_data        = reuse_data;
942   data->needs_data_update = true;
943   return CEED_ERROR_SUCCESS;
944 }
945 
946 /**
947   @brief Mark QFunctionAssemblyData as stale
948 
949   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
950   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
951 
952   @return An error code: 0 - success, otherwise - failure
953 
954   @ref Backend
955 **/
956 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
957   data->needs_data_update = needs_data_update;
958   return CEED_ERROR_SUCCESS;
959 }
960 
961 /**
962   @brief Determine if QFunctionAssemblyData needs update
963 
964   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
965   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
966 
967   @return An error code: 0 - success, otherwise - failure
968 
969   @ref Backend
970 **/
971 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
972   *is_update_needed = !data->reuse_data || data->needs_data_update;
973   return CEED_ERROR_SUCCESS;
974 }
975 
976 /**
977   @brief Copy the pointer to a CeedQFunctionAssemblyData.
978 
979   Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
980 
981   Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
982         CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
983         CeedQFunctionAssemblyData.
984 
985   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
986   @param[in,out] data_copy Variable to store copied reference
987 
988   @return An error code: 0 - success, otherwise - failure
989 
990   @ref Backend
991 **/
992 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
993   CeedCall(CeedQFunctionAssemblyDataReference(data));
994   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
995   *data_copy = data;
996   return CEED_ERROR_SUCCESS;
997 }
998 
999 /**
1000   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1001 
1002   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1003   @param[out] is_setup Boolean flag for setup status
1004 
1005   @return An error code: 0 - success, otherwise - failure
1006 
1007   @ref Backend
1008 **/
1009 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1010   *is_setup = data->is_setup;
1011   return CEED_ERROR_SUCCESS;
1012 }
1013 
1014 /**
1015   @brief Set internal objects for CeedQFunctionAssemblyData
1016 
1017   @param[in,out] data CeedQFunctionAssemblyData to set objects
1018   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1019   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1020 
1021   @return An error code: 0 - success, otherwise - failure
1022 
1023   @ref Backend
1024 **/
1025 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1026   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1027   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1028 
1029   data->is_setup = true;
1030   return CEED_ERROR_SUCCESS;
1031 }
1032 
1033 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1034   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1035 
1036   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1037   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1038 
1039   return CEED_ERROR_SUCCESS;
1040 }
1041 
1042 /**
1043   @brief Destroy CeedQFunctionAssemblyData
1044 
1045   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1046 
1047   @return An error code: 0 - success, otherwise - failure
1048 
1049   @ref Backend
1050 **/
1051 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1052   if (!*data || --(*data)->ref_count > 0) {
1053     *data = NULL;
1054     return CEED_ERROR_SUCCESS;
1055   }
1056   CeedCall(CeedDestroy(&(*data)->ceed));
1057   CeedCall(CeedVectorDestroy(&(*data)->vec));
1058   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1059 
1060   CeedCall(CeedFree(data));
1061   return CEED_ERROR_SUCCESS;
1062 }
1063 
1064 /**
1065   @brief Get CeedOperatorAssemblyData
1066 
1067   @param[in]  op   CeedOperator to assemble
1068   @param[out] data CeedQFunctionAssemblyData
1069 
1070   @return An error code: 0 - success, otherwise - failure
1071 
1072   @ref Backend
1073 **/
1074 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1075   if (!op->op_assembled) {
1076     CeedOperatorAssemblyData data;
1077 
1078     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1079     op->op_assembled = data;
1080   }
1081   *data = op->op_assembled;
1082 
1083   return CEED_ERROR_SUCCESS;
1084 }
1085 
1086 /**
1087   @brief Create object holding CeedOperator assembly data.
1088 
1089   The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1090   An array with references to the corresponding active CeedElemRestrictions is also stored.
1091   For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1092   The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1093 CeedEvalMode.
1094   The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1095   Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1096 
1097   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1098   @param[in]  op   CeedOperator to be assembled
1099   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1100 
1101   @return An error code: 0 - success, otherwise - failure
1102 
1103   @ref Backend
1104 **/
1105 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1106   CeedInt num_active_bases = 0;
1107 
1108   // Allocate
1109   CeedCall(CeedCalloc(1, data));
1110   (*data)->ceed = ceed;
1111   CeedCall(CeedReference(ceed));
1112 
1113   // Build OperatorAssembly data
1114   CeedQFunction       qf;
1115   CeedQFunctionField *qf_fields;
1116   CeedOperatorField  *op_fields;
1117   CeedInt             num_input_fields;
1118   CeedCall(CeedOperatorGetQFunction(op, &qf));
1119   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1120   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1121 
1122   // Determine active input basis
1123   CeedInt       *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0;
1124   CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL;
1125   CeedSize     **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1126   for (CeedInt i = 0; i < num_input_fields; i++) {
1127     CeedVector vec;
1128     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1129     if (vec == CEED_VECTOR_ACTIVE) {
1130       CeedBasis    basis_in = NULL;
1131       CeedEvalMode eval_mode;
1132       CeedInt      index = -1, dim, num_comp, q_comp;
1133       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1134       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1135       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1136       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1137       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1138       for (CeedInt i = 0; i < num_active_bases; i++) {
1139         if ((*data)->active_bases[i] == basis_in) index = i;
1140       }
1141       if (index == -1) {
1142         CeedElemRestriction elem_rstr_in;
1143         index = num_active_bases;
1144         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1145         (*data)->active_bases[num_active_bases] = NULL;
1146         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases]));
1147         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1148         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1149         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1150         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases]));
1151         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1152         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1153         num_eval_modes_in[index]  = 0;
1154         num_eval_modes_out[index] = 0;
1155         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1156         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1157         eval_modes_in[index]  = NULL;
1158         eval_modes_out[index] = NULL;
1159         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1160         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1161         eval_mode_offsets_in[index]  = NULL;
1162         eval_mode_offsets_out[index] = NULL;
1163         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1164         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1165         (*data)->assembled_bases_in[index]  = NULL;
1166         (*data)->assembled_bases_out[index] = NULL;
1167         num_active_bases++;
1168       }
1169       if (eval_mode != CEED_EVAL_WEIGHT) {
1170         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1171         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1172         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1173         for (CeedInt d = 0; d < q_comp; d++) {
1174           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1175           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1176           offset += num_comp;
1177         }
1178         num_eval_modes_in[index] += q_comp;
1179       }
1180     }
1181   }
1182   (*data)->num_eval_modes_in    = num_eval_modes_in;
1183   (*data)->eval_modes_in        = eval_modes_in;
1184   (*data)->eval_mode_offsets_in = eval_mode_offsets_in;
1185 
1186   // Determine active output basis
1187   CeedInt num_output_fields;
1188   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1189   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1190   offset = 0;
1191   for (CeedInt i = 0; i < num_output_fields; i++) {
1192     CeedVector vec;
1193     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1194     if (vec == CEED_VECTOR_ACTIVE) {
1195       CeedBasis    basis_out = NULL;
1196       CeedEvalMode eval_mode;
1197       CeedInt      index = -1, dim, num_comp, q_comp;
1198       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1199       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1200       CeedCall(CeedBasisGetDimension(basis_out, &dim));
1201       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1202       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1203       for (CeedInt i = 0; i < num_active_bases; i++) {
1204         if ((*data)->active_bases[i] == basis_out) index = i;
1205       }
1206       if (index == -1) {
1207         CeedElemRestriction elem_rstr_out;
1208 
1209         index = num_active_bases;
1210         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1211         (*data)->active_bases[num_active_bases] = NULL;
1212         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases]));
1213         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1214         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1215         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1216         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases]));
1217         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1218         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1219         num_eval_modes_in[index]  = 0;
1220         num_eval_modes_out[index] = 0;
1221         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1222         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1223         eval_modes_in[index]  = NULL;
1224         eval_modes_out[index] = NULL;
1225         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1226         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1227         eval_mode_offsets_in[index]  = NULL;
1228         eval_mode_offsets_out[index] = NULL;
1229         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1230         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1231         (*data)->assembled_bases_in[index]  = NULL;
1232         (*data)->assembled_bases_out[index] = NULL;
1233         num_active_bases++;
1234       }
1235       if (eval_mode != CEED_EVAL_WEIGHT) {
1236         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1237         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1238         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1239         for (CeedInt d = 0; d < q_comp; d++) {
1240           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1241           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1242           offset += num_comp;
1243         }
1244         num_eval_modes_out[index] += q_comp;
1245       }
1246     }
1247   }
1248   (*data)->num_output_components = offset;
1249   (*data)->num_eval_modes_out    = num_eval_modes_out;
1250   (*data)->eval_modes_out        = eval_modes_out;
1251   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1252   (*data)->num_active_bases      = num_active_bases;
1253 
1254   return CEED_ERROR_SUCCESS;
1255 }
1256 
1257 /**
1258   @brief Get CeedOperator CeedEvalModes for assembly.
1259 
1260   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1261 
1262   @param[in]  data                  CeedOperatorAssemblyData
1263   @param[out] num_active_bases      Total number of active bases
1264   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1265                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1266   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1267   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1268   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1269   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1270   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1271   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1272                                       including contributions of all active bases
1273 
1274   @return An error code: 0 - success, otherwise - failure
1275 
1276 
1277   @ref Backend
1278 **/
1279 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in,
1280                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out,
1281                                          const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) {
1282   if (num_active_bases) *num_active_bases = data->num_active_bases;
1283   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1284   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1285   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1286   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1287   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1288   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1289   if (num_output_components) *num_output_components = data->num_output_components;
1290 
1291   return CEED_ERROR_SUCCESS;
1292 }
1293 
1294 /**
1295   @brief Get CeedOperator CeedBasis data for assembly.
1296 
1297   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1298 
1299   @param[in]  data                CeedOperatorAssemblyData
1300   @param[out] num_active_bases    Number of active bases, or NULL
1301   @param[out] active_bases        Pointer to hold active CeedBasis, or NULL
1302   @param[out] assembled_bases_in  Pointer to hold assembled active input B, or NULL
1303   @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL
1304 
1305   @return An error code: 0 - success, otherwise - failure
1306 
1307   @ref Backend
1308 **/
1309 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases,
1310                                      const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) {
1311   // Assemble B_in, B_out if needed
1312   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1313     CeedInt num_qpts;
1314 
1315     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1316     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1317       CeedInt     num_nodes;
1318       CeedScalar *B_in = NULL, *identity = NULL;
1319       bool        has_eval_none = false;
1320 
1321       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1322       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1323 
1324       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1325         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1326       }
1327       if (has_eval_none) {
1328         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1329         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1330           identity[i * num_nodes + i] = 1.0;
1331         }
1332       }
1333 
1334       for (CeedInt q = 0; q < num_qpts; q++) {
1335         for (CeedInt n = 0; n < num_nodes; n++) {
1336           CeedInt      d_in              = 0, q_comp_in;
1337           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1338           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1339             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1340             const CeedScalar *B  = NULL;
1341             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B);
1342             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in));
1343             if (q_comp_in > 1) {
1344               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1345               else B = &B[(++d_in) * num_qpts * num_nodes];
1346             }
1347             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1348             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1349           }
1350         }
1351       }
1352       if (identity) CeedCall(CeedFree(identity));
1353       data->assembled_bases_in[b] = B_in;
1354     }
1355   }
1356 
1357   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1358     CeedInt num_qpts;
1359 
1360     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1361     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1362       CeedInt     num_nodes;
1363       bool        has_eval_none = false;
1364       CeedScalar *B_out = NULL, *identity = NULL;
1365 
1366       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1367       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1368 
1369       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1370         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1371       }
1372       if (has_eval_none) {
1373         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1374         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1375           identity[i * num_nodes + i] = 1.0;
1376         }
1377       }
1378 
1379       for (CeedInt q = 0; q < num_qpts; q++) {
1380         for (CeedInt n = 0; n < num_nodes; n++) {
1381           CeedInt      d_out              = 0, q_comp_out;
1382           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1383           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1384             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1385             const CeedScalar *B  = NULL;
1386             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B);
1387             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out));
1388             if (q_comp_out > 1) {
1389               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1390               else B = &B[(++d_out) * num_qpts * num_nodes];
1391             }
1392             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1393             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1394           }
1395         }
1396       }
1397       if (identity) CeedCall(CeedFree(identity));
1398       data->assembled_bases_out[b] = B_out;
1399     }
1400   }
1401 
1402   // Pass out assembled data
1403   if (active_bases) *active_bases = data->active_bases;
1404   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1405   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1406 
1407   return CEED_ERROR_SUCCESS;
1408 }
1409 
1410 /**
1411   @brief Get CeedOperator CeedBasis data for assembly.
1412 
1413   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1414 
1415   @param[in]  data                  CeedOperatorAssemblyData
1416   @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL
1417   @param[out] active_elem_rstrs     Pointer to hold active CeedElemRestrictions, or NULL
1418 
1419   @return An error code: 0 - success, otherwise - failure
1420 
1421   @ref Backend
1422 **/
1423 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs,
1424                                                 CeedElemRestriction **active_elem_rstrs) {
1425   if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases;
1426   if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs;
1427 
1428   return CEED_ERROR_SUCCESS;
1429 }
1430 
1431 /**
1432   @brief Destroy CeedOperatorAssemblyData
1433 
1434   @param[in,out] data CeedOperatorAssemblyData to destroy
1435 
1436   @return An error code: 0 - success, otherwise - failure
1437 
1438   @ref Backend
1439 **/
1440 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1441   if (!*data) {
1442     *data = NULL;
1443     return CEED_ERROR_SUCCESS;
1444   }
1445   CeedCall(CeedDestroy(&(*data)->ceed));
1446   for (CeedInt b = 0; b < (*data)->num_active_bases; b++) {
1447     CeedCall(CeedBasisDestroy(&(*data)->active_bases[b]));
1448     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b]));
1449     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1450     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1451     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1452     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1453     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1454     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1455   }
1456   CeedCall(CeedFree(&(*data)->active_bases));
1457   CeedCall(CeedFree(&(*data)->active_elem_rstrs));
1458   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1459   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1460   CeedCall(CeedFree(&(*data)->eval_modes_in));
1461   CeedCall(CeedFree(&(*data)->eval_modes_out));
1462   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1463   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1464   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1465   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1466 
1467   CeedCall(CeedFree(data));
1468   return CEED_ERROR_SUCCESS;
1469 }
1470 
1471 /// @}
1472 
1473 /// ----------------------------------------------------------------------------
1474 /// CeedOperator Public API
1475 /// ----------------------------------------------------------------------------
1476 /// @addtogroup CeedOperatorUser
1477 /// @{
1478 
1479 /**
1480   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1481 
1482   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1483   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1484 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1485 
1486   Inputs and outputs are in the order provided by the user when adding CeedOperator fields.
1487   For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction
1488 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1489 
1490   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1491 
1492   @param[in]  op        CeedOperator to assemble CeedQFunction
1493   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1494   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1495   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1496 
1497   @return An error code: 0 - success, otherwise - failure
1498 
1499   @ref User
1500 **/
1501 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1502   CeedCall(CeedOperatorCheckReady(op));
1503 
1504   if (op->LinearAssembleQFunction) {
1505     // Backend version
1506     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1507   } else {
1508     // Operator fallback
1509     CeedOperator op_fallback;
1510 
1511     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1512     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1513     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1514   }
1515   return CEED_ERROR_SUCCESS;
1516 }
1517 
1518 /**
1519   @brief Assemble CeedQFunction and store result internally.
1520 
1521   Return copied references of stored data to the caller.
1522   Caller is responsible for ownership and destruction of the copied references.
1523   See also @ref CeedOperatorLinearAssembleQFunction
1524 
1525   @param[in]  op        CeedOperator to assemble CeedQFunction
1526   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1527   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1528   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1529 
1530   @return An error code: 0 - success, otherwise - failure
1531 
1532   @ref User
1533 **/
1534 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1535   CeedCall(CeedOperatorCheckReady(op));
1536 
1537   if (op->LinearAssembleQFunctionUpdate) {
1538     // Backend version
1539     bool                qf_assembled_is_setup;
1540     CeedVector          assembled_vec  = NULL;
1541     CeedElemRestriction assembled_rstr = NULL;
1542 
1543     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1544     if (qf_assembled_is_setup) {
1545       bool update_needed;
1546 
1547       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1548       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1549       if (update_needed) {
1550         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1551       }
1552     } else {
1553       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1554       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1555     }
1556     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1557 
1558     // Copy reference from internally held copy
1559     *assembled = NULL;
1560     *rstr      = NULL;
1561     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1562     CeedCall(CeedVectorDestroy(&assembled_vec));
1563     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1564     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1565   } else {
1566     // Operator fallback
1567     CeedOperator op_fallback;
1568 
1569     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1570     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1571     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1572   }
1573 
1574   return CEED_ERROR_SUCCESS;
1575 }
1576 
1577 /**
1578   @brief Assemble the diagonal of a square linear CeedOperator
1579 
1580   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1581 
1582   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1583 
1584   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1585 
1586   @param[in]  op        CeedOperator to assemble CeedQFunction
1587   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1588   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1589 
1590   @return An error code: 0 - success, otherwise - failure
1591 
1592   @ref User
1593 **/
1594 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1595   bool is_composite;
1596   CeedCall(CeedOperatorCheckReady(op));
1597   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1598 
1599   CeedSize input_size = 0, output_size = 0;
1600   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1601   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1602 
1603   // Early exit for empty operator
1604   if (!is_composite) {
1605     CeedInt num_elem = 0;
1606 
1607     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1608     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1609   }
1610 
1611   if (op->LinearAssembleDiagonal) {
1612     // Backend version
1613     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1614     return CEED_ERROR_SUCCESS;
1615   } else if (op->LinearAssembleAddDiagonal) {
1616     // Backend version with zeroing first
1617     CeedCall(CeedVectorSetValue(assembled, 0.0));
1618     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1619     return CEED_ERROR_SUCCESS;
1620   } else {
1621     // Operator fallback
1622     CeedOperator op_fallback;
1623 
1624     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1625     if (op_fallback) {
1626       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1627       return CEED_ERROR_SUCCESS;
1628     }
1629   }
1630   // Default interface implementation
1631   CeedCall(CeedVectorSetValue(assembled, 0.0));
1632   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1633 
1634   return CEED_ERROR_SUCCESS;
1635 }
1636 
1637 /**
1638   @brief Assemble the diagonal of a square linear CeedOperator
1639 
1640   This sums into a CeedVector the diagonal of a linear CeedOperator.
1641 
1642   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1643 
1644   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1645 
1646   @param[in]  op        CeedOperator to assemble CeedQFunction
1647   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1648   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1649 
1650   @return An error code: 0 - success, otherwise - failure
1651 
1652   @ref User
1653 **/
1654 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1655   bool is_composite;
1656   CeedCall(CeedOperatorCheckReady(op));
1657   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1658 
1659   CeedSize input_size = 0, output_size = 0;
1660   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1661   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1662 
1663   // Early exit for empty operator
1664   if (!is_composite) {
1665     CeedInt num_elem = 0;
1666 
1667     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1668     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1669   }
1670 
1671   if (op->LinearAssembleAddDiagonal) {
1672     // Backend version
1673     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1674     return CEED_ERROR_SUCCESS;
1675   } else {
1676     // Operator fallback
1677     CeedOperator op_fallback;
1678 
1679     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1680     if (op_fallback) {
1681       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1682       return CEED_ERROR_SUCCESS;
1683     }
1684   }
1685   // Default interface implementation
1686   if (is_composite) {
1687     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1688   } else {
1689     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1690   }
1691 
1692   return CEED_ERROR_SUCCESS;
1693 }
1694 
1695 /**
1696   @brief Assemble the point block diagonal of a square linear CeedOperator
1697 
1698   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1699 
1700   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1701 
1702   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1703 
1704   @param[in]  op        CeedOperator to assemble CeedQFunction
1705   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1706 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1707 component in].
1708   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1709 
1710   @return An error code: 0 - success, otherwise - failure
1711 
1712   @ref User
1713 **/
1714 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1715   bool is_composite;
1716   CeedCall(CeedOperatorCheckReady(op));
1717   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1718 
1719   CeedSize input_size = 0, output_size = 0;
1720   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1721   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1722 
1723   // Early exit for empty operator
1724   if (!is_composite) {
1725     CeedInt num_elem = 0;
1726 
1727     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1728     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1729   }
1730 
1731   if (op->LinearAssemblePointBlockDiagonal) {
1732     // Backend version
1733     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1734     return CEED_ERROR_SUCCESS;
1735   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1736     // Backend version with zeroing first
1737     CeedCall(CeedVectorSetValue(assembled, 0.0));
1738     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1739     return CEED_ERROR_SUCCESS;
1740   } else {
1741     // Operator fallback
1742     CeedOperator op_fallback;
1743 
1744     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1745     if (op_fallback) {
1746       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1747       return CEED_ERROR_SUCCESS;
1748     }
1749   }
1750   // Default interface implementation
1751   CeedCall(CeedVectorSetValue(assembled, 0.0));
1752   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1753 
1754   return CEED_ERROR_SUCCESS;
1755 }
1756 
1757 /**
1758   @brief Assemble the point block diagonal of a square linear CeedOperator
1759 
1760   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1761 
1762   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1763 
1764   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1765 
1766   @param[in]  op        CeedOperator to assemble CeedQFunction
1767   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1768 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1769 component in].
1770   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1771 
1772   @return An error code: 0 - success, otherwise - failure
1773 
1774   @ref User
1775 **/
1776 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1777   bool is_composite;
1778   CeedCall(CeedOperatorCheckReady(op));
1779   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1780 
1781   CeedSize input_size = 0, output_size = 0;
1782   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1783   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1784 
1785   // Early exit for empty operator
1786   if (!is_composite) {
1787     CeedInt num_elem = 0;
1788 
1789     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1790     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1791   }
1792 
1793   if (op->LinearAssembleAddPointBlockDiagonal) {
1794     // Backend version
1795     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1796     return CEED_ERROR_SUCCESS;
1797   } else {
1798     // Operator fallback
1799     CeedOperator op_fallback;
1800 
1801     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1802     if (op_fallback) {
1803       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1804       return CEED_ERROR_SUCCESS;
1805     }
1806   }
1807   // Default interface implementation
1808   if (is_composite) {
1809     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1810   } else {
1811     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1812   }
1813 
1814   return CEED_ERROR_SUCCESS;
1815 }
1816 
1817 /**
1818    @brief Fully assemble the nonzero pattern of a linear operator.
1819 
1820    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1821 
1822    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1823 matrix in entry (i, j).
1824   Note that the (i, j) pairs are not unique and may repeat.
1825   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1826 
1827    This will generally be slow unless your operator is low-order.
1828 
1829    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1830 
1831    @param[in]  op          CeedOperator to assemble
1832    @param[out] num_entries Number of entries in coordinate nonzero pattern
1833    @param[out] rows        Row number for each entry
1834    @param[out] cols        Column number for each entry
1835 
1836    @ref User
1837 **/
1838 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1839   CeedInt       num_suboperators, single_entries;
1840   CeedOperator *sub_operators;
1841   bool          is_composite;
1842   CeedCall(CeedOperatorCheckReady(op));
1843   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1844 
1845   if (op->LinearAssembleSymbolic) {
1846     // Backend version
1847     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1848     return CEED_ERROR_SUCCESS;
1849   } else {
1850     // Operator fallback
1851     CeedOperator op_fallback;
1852 
1853     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1854     if (op_fallback) {
1855       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1856       return CEED_ERROR_SUCCESS;
1857     }
1858   }
1859 
1860   // Default interface implementation
1861 
1862   // count entries and allocate rows, cols arrays
1863   *num_entries = 0;
1864   if (is_composite) {
1865     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1866     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1867     for (CeedInt k = 0; k < num_suboperators; ++k) {
1868       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1869       *num_entries += single_entries;
1870     }
1871   } else {
1872     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1873     *num_entries += single_entries;
1874   }
1875   CeedCall(CeedCalloc(*num_entries, rows));
1876   CeedCall(CeedCalloc(*num_entries, cols));
1877 
1878   // assemble nonzero locations
1879   CeedInt offset = 0;
1880   if (is_composite) {
1881     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1882     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1883     for (CeedInt k = 0; k < num_suboperators; ++k) {
1884       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1885       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1886       offset += single_entries;
1887     }
1888   } else {
1889     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1890   }
1891 
1892   return CEED_ERROR_SUCCESS;
1893 }
1894 
1895 /**
1896    @brief Fully assemble the nonzero entries of a linear operator.
1897 
1898    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1899 
1900    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1901 matrix in entry (i, j).
1902   Note that the (i, j) pairs are not unique and may repeat.
1903   This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1904 
1905    This will generally be slow unless your operator is low-order.
1906 
1907    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1908 
1909    @param[in]  op     CeedOperator to assemble
1910    @param[out] values Values to assemble into matrix
1911 
1912    @ref User
1913 **/
1914 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1915   CeedInt       num_suboperators, single_entries = 0;
1916   CeedOperator *sub_operators;
1917   bool          is_composite;
1918   CeedCall(CeedOperatorCheckReady(op));
1919   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1920 
1921   // Early exit for empty operator
1922   if (!is_composite) {
1923     CeedInt num_elem = 0;
1924 
1925     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1926     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1927   }
1928 
1929   if (op->LinearAssemble) {
1930     // Backend version
1931     CeedCall(op->LinearAssemble(op, values));
1932     return CEED_ERROR_SUCCESS;
1933   } else {
1934     // Operator fallback
1935     CeedOperator op_fallback;
1936 
1937     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1938     if (op_fallback) {
1939       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
1940       return CEED_ERROR_SUCCESS;
1941     }
1942   }
1943 
1944   // Default interface implementation
1945   CeedInt offset = 0;
1946   CeedCall(CeedVectorSetValue(values, 0.0));
1947   if (is_composite) {
1948     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1949     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1950     for (CeedInt k = 0; k < num_suboperators; k++) {
1951       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
1952       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1953       offset += single_entries;
1954     }
1955   } else {
1956     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
1957   }
1958 
1959   return CEED_ERROR_SUCCESS;
1960 }
1961 
1962 /**
1963   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
1964 
1965   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1966 
1967   @param[in]  op               Composite CeedOperator
1968   @param[in]  num_skip_indices Number of suboperators to skip
1969   @param[in]  skip_indices     Array of indices of suboperators to skip
1970   @param[out] mult             Vector to store multiplicity (of size l_size)
1971 
1972   @return An error code: 0 - success, otherwise - failure
1973 
1974   @ref User
1975 **/
1976 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
1977   CeedCall(CeedOperatorCheckReady(op));
1978 
1979   Ceed                ceed;
1980   CeedInt             num_suboperators;
1981   CeedSize            l_vec_len;
1982   CeedScalar         *mult_array;
1983   CeedVector          ones_l_vec;
1984   CeedElemRestriction elem_rstr;
1985   CeedOperator       *sub_operators;
1986 
1987   CeedCall(CeedOperatorGetCeed(op, &ceed));
1988 
1989   // Zero mult vector
1990   CeedCall(CeedVectorSetValue(mult, 0.0));
1991 
1992   // Get suboperators
1993   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1994   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1995   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
1996 
1997   // Work vector
1998   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
1999   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2000   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2001   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2002 
2003   // Compute multiplicity across suboperators
2004   for (CeedInt i = 0; i < num_suboperators; i++) {
2005     const CeedScalar *sub_mult_array;
2006     CeedVector        sub_mult_l_vec, ones_e_vec;
2007 
2008     // -- Check for suboperator to skip
2009     for (CeedInt j = 0; j < num_skip_indices; j++) {
2010       if (skip_indices[j] == i) continue;
2011     }
2012 
2013     // -- Sub operator multiplicity
2014     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2015     CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2016     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2017     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2018     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2019     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2020     // ---- Flag every node present in the current suboperator
2021     for (CeedInt j = 0; j < l_vec_len; j++) {
2022       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2023     }
2024     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2025     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2026     CeedCall(CeedVectorDestroy(&ones_e_vec));
2027   }
2028   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2029   CeedCall(CeedVectorDestroy(&ones_l_vec));
2030 
2031   return CEED_ERROR_SUCCESS;
2032 }
2033 
2034 /**
2035   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2036 grid interpolation
2037 
2038   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2039 
2040   @param[in]  op_fine      Fine grid operator
2041   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2042   @param[in]  rstr_coarse  Coarse grid restriction
2043   @param[in]  basis_coarse Coarse grid active vector basis
2044   @param[out] op_coarse    Coarse grid operator
2045   @param[out] op_prolong   Coarse to fine operator, or NULL
2046   @param[out] op_restrict  Fine to coarse operator, or NULL
2047 
2048   @return An error code: 0 - success, otherwise - failure
2049 
2050   @ref User
2051 **/
2052 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2053                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2054   CeedCall(CeedOperatorCheckReady(op_fine));
2055 
2056   // Build prolongation matrix, if required
2057   CeedBasis basis_c_to_f = NULL;
2058   if (op_prolong || op_restrict) {
2059     CeedBasis basis_fine;
2060     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2061     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2062   }
2063 
2064   // Core code
2065   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2066 
2067   return CEED_ERROR_SUCCESS;
2068 }
2069 
2070 /**
2071   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2072 
2073   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2074 
2075   @param[in]  op_fine       Fine grid operator
2076   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2077   @param[in]  rstr_coarse   Coarse grid restriction
2078   @param[in]  basis_coarse  Coarse grid active vector basis
2079   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2080   @param[out] op_coarse     Coarse grid operator
2081   @param[out] op_prolong    Coarse to fine operator, or NULL
2082   @param[out] op_restrict   Fine to coarse operator, or NULL
2083 
2084   @return An error code: 0 - success, otherwise - failure
2085 
2086   @ref User
2087 **/
2088 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2089                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2090                                              CeedOperator *op_restrict) {
2091   CeedCall(CeedOperatorCheckReady(op_fine));
2092   Ceed ceed;
2093   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2094 
2095   // Check for compatible quadrature spaces
2096   CeedBasis basis_fine;
2097   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2098   CeedInt Q_f, Q_c;
2099   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2100   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2101   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2102 
2103   // Create coarse to fine basis, if required
2104   CeedBasis basis_c_to_f = NULL;
2105   if (op_prolong || op_restrict) {
2106     // Check if interpolation matrix is provided
2107     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2108               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2109     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2110     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2111     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2112     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2113     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2114     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2115     CeedScalar *q_ref, *q_weight, *grad;
2116     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2117     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2118     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2119     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2120     CeedCall(CeedFree(&q_ref));
2121     CeedCall(CeedFree(&q_weight));
2122     CeedCall(CeedFree(&grad));
2123   }
2124 
2125   // Core code
2126   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2127   return CEED_ERROR_SUCCESS;
2128 }
2129 
2130 /**
2131   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2132 
2133   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2134 
2135   @param[in]  op_fine       Fine grid operator
2136   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2137   @param[in]  rstr_coarse   Coarse grid restriction
2138   @param[in]  basis_coarse  Coarse grid active vector basis
2139   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2140   @param[out] op_coarse     Coarse grid operator
2141   @param[out] op_prolong    Coarse to fine operator, or NULL
2142   @param[out] op_restrict   Fine to coarse operator, or NULL
2143 
2144   @return An error code: 0 - success, otherwise - failure
2145 
2146   @ref User
2147 **/
2148 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2149                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2150                                        CeedOperator *op_restrict) {
2151   CeedCall(CeedOperatorCheckReady(op_fine));
2152   Ceed ceed;
2153   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2154 
2155   // Check for compatible quadrature spaces
2156   CeedBasis basis_fine;
2157   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2158   CeedInt Q_f, Q_c;
2159   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2160   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2161   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2162 
2163   // Coarse to fine basis
2164   CeedBasis basis_c_to_f = NULL;
2165   if (op_prolong || op_restrict) {
2166     // Check if interpolation matrix is provided
2167     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2168               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2169     CeedElemTopology topo;
2170     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2171     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2172     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2173     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2174     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2175     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2176     CeedScalar *q_ref, *q_weight, *grad;
2177     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2178     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2179     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2180     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2181     CeedCall(CeedFree(&q_ref));
2182     CeedCall(CeedFree(&q_weight));
2183     CeedCall(CeedFree(&grad));
2184   }
2185 
2186   // Core code
2187   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2188   return CEED_ERROR_SUCCESS;
2189 }
2190 
2191 /**
2192   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2193 
2194   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2195   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2196   The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2197 \hat S V\f$.
2198   The CeedOperator must be linear and non-composite.
2199   The associated CeedQFunction must therefore also be linear.
2200 
2201   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2202 
2203   @param[in]  op      CeedOperator to create element inverses
2204   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2205   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2206 
2207   @return An error code: 0 - success, otherwise - failure
2208 
2209   @ref User
2210 **/
2211 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2212   CeedCall(CeedOperatorCheckReady(op));
2213 
2214   if (op->CreateFDMElementInverse) {
2215     // Backend version
2216     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2217     return CEED_ERROR_SUCCESS;
2218   } else {
2219     // Operator fallback
2220     CeedOperator op_fallback;
2221 
2222     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2223     if (op_fallback) {
2224       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2225       return CEED_ERROR_SUCCESS;
2226     }
2227   }
2228 
2229   // Default interface implementation
2230   Ceed ceed, ceed_parent;
2231   CeedCall(CeedOperatorGetCeed(op, &ceed));
2232   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2233   ceed_parent = ceed_parent ? ceed_parent : ceed;
2234   CeedQFunction qf;
2235   CeedCall(CeedOperatorGetQFunction(op, &qf));
2236 
2237   // Determine active input basis
2238   bool                interp = false, grad = false;
2239   CeedBasis           basis = NULL;
2240   CeedElemRestriction rstr  = NULL;
2241   CeedOperatorField  *op_fields;
2242   CeedQFunctionField *qf_fields;
2243   CeedInt             num_input_fields;
2244   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2245   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2246   for (CeedInt i = 0; i < num_input_fields; i++) {
2247     CeedVector vec;
2248     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2249     if (vec == CEED_VECTOR_ACTIVE) {
2250       CeedEvalMode eval_mode;
2251       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2252       interp = interp || eval_mode == CEED_EVAL_INTERP;
2253       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2254       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2255       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2256     }
2257   }
2258   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2259   CeedSize l_size = 1;
2260   CeedInt  P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2261   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2262   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2263   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2264   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2265   CeedCall(CeedBasisGetDimension(basis, &dim));
2266   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2267   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2268   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2269 
2270   // Build and diagonalize 1D Mass and Laplacian
2271   bool is_tensor_basis;
2272   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2273   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2274   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2275   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2276   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2277   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2278   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2279   CeedCall(CeedCalloc(P_1d, &lambda));
2280   // -- Build matrices
2281   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2282   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2283   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2284   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2285   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2286 
2287   // -- Diagonalize
2288   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2289   CeedCall(CeedFree(&mass));
2290   CeedCall(CeedFree(&laplace));
2291   for (CeedInt i = 0; i < P_1d; i++) {
2292     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2293   }
2294   CeedCall(CeedFree(&x));
2295 
2296   // Assemble QFunction
2297   CeedVector          assembled;
2298   CeedElemRestriction rstr_qf;
2299   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2300   CeedInt layout[3];
2301   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2302   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2303   CeedScalar max_norm = 0;
2304   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2305 
2306   // Calculate element averages
2307   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2308   CeedScalar       *elem_avg;
2309   const CeedScalar *assembled_array, *q_weight_array;
2310   CeedVector        q_weight;
2311   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2312   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2313   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2314   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2315   CeedCall(CeedCalloc(num_elem, &elem_avg));
2316   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2317   for (CeedInt e = 0; e < num_elem; e++) {
2318     CeedInt count = 0;
2319     for (CeedInt q = 0; q < num_qpts; q++) {
2320       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2321         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2322           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2323           count++;
2324         }
2325       }
2326     }
2327     if (count) {
2328       elem_avg[e] /= count;
2329     } else {
2330       elem_avg[e] = 1.0;
2331     }
2332   }
2333   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2334   CeedCall(CeedVectorDestroy(&assembled));
2335   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2336   CeedCall(CeedVectorDestroy(&q_weight));
2337 
2338   // Build FDM diagonal
2339   CeedVector  q_data;
2340   CeedScalar *q_data_array, *fdm_diagonal;
2341   CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2342   const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2343   for (CeedInt c = 0; c < num_comp; c++) {
2344     for (CeedInt n = 0; n < num_nodes; n++) {
2345       if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2346       if (grad) {
2347         for (CeedInt d = 0; d < dim; d++) {
2348           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2349           fdm_diagonal[c * num_nodes + n] += lambda[i];
2350         }
2351       }
2352       if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2353     }
2354   }
2355   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2356   CeedCall(CeedVectorSetValue(q_data, 0.0));
2357   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2358   for (CeedInt e = 0; e < num_elem; e++) {
2359     for (CeedInt c = 0; c < num_comp; c++) {
2360       for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2361     }
2362   }
2363   CeedCall(CeedFree(&elem_avg));
2364   CeedCall(CeedFree(&fdm_diagonal));
2365   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2366 
2367   // Setup FDM operator
2368   // -- Basis
2369   CeedBasis   fdm_basis;
2370   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2371   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2372   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2373   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2374   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2375   CeedCall(CeedFree(&fdm_interp));
2376   CeedCall(CeedFree(&grad_dummy));
2377   CeedCall(CeedFree(&q_ref_dummy));
2378   CeedCall(CeedFree(&q_weight_dummy));
2379   CeedCall(CeedFree(&lambda));
2380 
2381   // -- Restriction
2382   CeedElemRestriction rstr_qd_i;
2383   CeedInt             strides[3] = {1, num_nodes, num_nodes * num_comp};
2384   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2385   // -- QFunction
2386   CeedQFunction qf_fdm;
2387   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2388   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2389   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2390   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2391   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2392   // -- QFunction context
2393   CeedInt *num_comp_data;
2394   CeedCall(CeedCalloc(1, &num_comp_data));
2395   num_comp_data[0] = num_comp;
2396   CeedQFunctionContext ctx_fdm;
2397   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2398   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2399   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2400   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2401   // -- Operator
2402   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2403   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2404   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2405   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2406 
2407   // Cleanup
2408   CeedCall(CeedVectorDestroy(&q_data));
2409   CeedCall(CeedBasisDestroy(&fdm_basis));
2410   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2411   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2412 
2413   return CEED_ERROR_SUCCESS;
2414 }
2415 
2416 /// @}
2417