xref: /libCEED/interface/ceed-preconditioning.c (revision c5f45aeaba028f514c71cfc279e5c99f2dd834d5)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  basis     CeedBasis from which to get the basis matrix
184   @param[in]  eval_mode Current basis evaluation mode
185   @param[in]  identity  Pointer to identity matrix
186   @param[out] basis_ptr Basis pointer to set
187 
188   @ref Developer
189 **/
190 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
191   switch (eval_mode) {
192     case CEED_EVAL_NONE:
193       *basis_ptr = identity;
194       break;
195     case CEED_EVAL_INTERP:
196       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
197       break;
198     case CEED_EVAL_GRAD:
199       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
200       break;
201     case CEED_EVAL_DIV:
202       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
203       break;
204     case CEED_EVAL_CURL:
205       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
206       break;
207     case CEED_EVAL_WEIGHT:
208       break;  // Caught by QF Assembly
209   }
210   assert(*basis_ptr != NULL);
211 
212   return CEED_ERROR_SUCCESS;
213 }
214 
215 /**
216   @brief Create point block restriction for active operator field
217 
218   @param[in]  rstr            Original CeedElemRestriction for active field
219   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
220 
221   @return An error code: 0 - success, otherwise - failure
222 
223   @ref Developer
224 **/
225 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
226   Ceed ceed;
227   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
228   const CeedInt *offsets;
229   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
230 
231   // Expand offsets
232   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
233   CeedSize l_size;
234   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
235   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
236   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
237   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
238   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
239   CeedInt shift = num_comp;
240   if (comp_stride != 1) shift *= num_comp;
241   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
242   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
243     pointblock_offsets[i] = offsets[i] * shift;
244   }
245 
246   // Create new restriction
247   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
248                                      pointblock_offsets, pointblock_rstr));
249 
250   // Cleanup
251   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
252 
253   return CEED_ERROR_SUCCESS;
254 }
255 
256 /**
257   @brief Core logic for assembling operator diagonal or point block diagonal
258 
259   @param[in]  op            CeedOperator to assemble point block diagonal
260   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
261   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
262   @param[out] assembled     CeedVector to store assembled diagonal
263 
264   @return An error code: 0 - success, otherwise - failure
265 
266   @ref Developer
267 **/
268 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
269   Ceed ceed;
270   CeedCall(CeedOperatorGetCeed(op, &ceed));
271 
272   // Assemble QFunction
273   CeedQFunction       qf;
274   const CeedScalar   *assembled_qf_array;
275   CeedVector          assembled_qf        = NULL;
276   CeedElemRestriction assembled_elem_rstr = NULL;
277   CeedInt             num_input_fields, num_output_fields;
278   CeedInt             layout[3];
279 
280   CeedCall(CeedOperatorGetQFunction(op, &qf));
281   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
282   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
283   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout));
284   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
285   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
286 
287   // Get assembly data
288   CeedOperatorAssemblyData data;
289   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
290   CeedInt                 *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
291   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
292   CeedBasis               *active_bases;
293   CeedElemRestriction     *active_elem_rstrs;
294   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
295   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
296                                                 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components));
297   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL));
298   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs));
299 
300   // Loop over all active bases
301   for (CeedInt b = 0; b < num_active_bases; b++) {
302     // Assemble point block diagonal restriction, if needed
303     CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b];
304 
305     if (is_pointblock) {
306       CeedElemRestriction point_block_elem_rstr;
307 
308       CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr));
309       diag_elem_rstr = point_block_elem_rstr;
310     }
311 
312     // Create diagonal vector
313     CeedVector elem_diag;
314     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
315 
316     // Assemble element operator diagonals
317     CeedScalar *elem_diag_array;
318     CeedInt     num_elem, num_nodes, num_qpts, num_components;
319 
320     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
321     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
322     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
323     CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes));
324     CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components));
325     CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts));
326 
327     // Construct identity matrix for basis if required
328     bool        has_eval_none = false;
329     CeedScalar *identity      = NULL;
330     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
331       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
332     }
333     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
334       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
335     }
336     if (has_eval_none) {
337       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
338       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
339     }
340 
341     // Compute the diagonal of B^T D B
342     // Each element
343     for (CeedInt e = 0; e < num_elem; e++) {
344       // Each basis eval mode pair
345       CeedInt      d_out              = 0, q_comp_out;
346       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
347       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
348         const CeedScalar *B_t = NULL;
349         CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t);
350         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out));
351         if (q_comp_out > 1) {
352           if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
353           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
354         }
355         eval_mode_out_prev = eval_modes_out[b][e_out];
356 
357         CeedInt      d_in              = 0, q_comp_in;
358         CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
359         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
360           const CeedScalar *B = NULL;
361           CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B);
362           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in));
363           if (q_comp_in > 1) {
364             if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
365             else B = &B[(++d_in) * num_qpts * num_nodes];
366           }
367           eval_mode_in_prev = eval_modes_in[b][e_in];
368 
369           // Each component
370           for (CeedInt c_out = 0; c_out < num_components; c_out++) {
371             // Each qpt/node pair
372             for (CeedInt q = 0; q < num_qpts; q++) {
373               if (is_pointblock) {
374                 // Point Block Diagonal
375                 for (CeedInt c_in = 0; c_in < num_components; c_in++) {
376                   const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
377                   const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
378                   for (CeedInt n = 0; n < num_nodes; n++) {
379                     elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] +=
380                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
381                   }
382                 }
383               } else {
384                 // Diagonal Only
385                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
386                 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
387                 for (CeedInt n = 0; n < num_nodes; n++) {
388                   elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
389                 }
390               }
391             }
392           }
393         }
394       }
395     }
396     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
397 
398     // Assemble local operator diagonal
399     CeedCall(CeedElemRestrictionApplyUnsigned(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
400 
401     // Cleanup
402     if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
403     CeedCall(CeedVectorDestroy(&elem_diag));
404     CeedCall(CeedFree(&identity));
405   }
406   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
407   CeedCall(CeedVectorDestroy(&assembled_qf));
408 
409   return CEED_ERROR_SUCCESS;
410 }
411 
412 /**
413   @brief Core logic for assembling composite operator diagonal
414 
415   @param[in]  op            CeedOperator to assemble point block diagonal
416   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
417   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
418   @param[out] assembled     CeedVector to store assembled diagonal
419 
420   @return An error code: 0 - success, otherwise - failure
421 
422   @ref Developer
423 **/
424 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
425                                                                  CeedVector assembled) {
426   CeedInt       num_sub;
427   CeedOperator *suboperators;
428   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
429   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
430   for (CeedInt i = 0; i < num_sub; i++) {
431     if (is_pointblock) {
432       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
433     } else {
434       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
435     }
436   }
437   return CEED_ERROR_SUCCESS;
438 }
439 
440 /**
441   @brief Build nonzero pattern for non-composite operator
442 
443   Users should generally use CeedOperatorLinearAssembleSymbolic()
444 
445   Note: For operators using oriented element restrictions, entries in rows or cols may be negative indicating the assembled value at this nonzero
446 should be negated
447 
448   @param[in]  op     CeedOperator to assemble nonzero pattern
449   @param[in]  offset Offset for number of entries
450   @param[out] rows   Row number for each entry
451   @param[out] cols   Column number for each entry
452 
453   @return An error code: 0 - success, otherwise - failure
454 
455   @ref Developer
456 **/
457 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
458   Ceed ceed;
459   bool is_composite;
460   CeedCall(CeedOperatorGetCeed(op, &ceed));
461   CeedCall(CeedOperatorIsComposite(op, &is_composite));
462 
463   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
464 
465   CeedSize num_nodes;
466   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
467   CeedElemRestriction rstr_in;
468   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
469   CeedInt num_elem, elem_size, num_comp;
470   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
471   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
472   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
473   CeedInt layout_er[3];
474   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
475 
476   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
477 
478   // Determine elem_dof relation
479   CeedVector index_vec;
480   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
481   CeedScalar *array;
482   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
483   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
484   CeedCall(CeedVectorRestoreArray(index_vec, &array));
485   CeedVector elem_dof;
486   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
487   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
488   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
489   const CeedScalar *elem_dof_a;
490   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
491   CeedCall(CeedVectorDestroy(&index_vec));
492 
493   // Determine i, j locations for element matrices
494   CeedInt count = 0;
495   for (CeedInt e = 0; e < num_elem; e++) {
496     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
497       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
498         for (CeedInt i = 0; i < elem_size; i++) {
499           for (CeedInt j = 0; j < elem_size; j++) {
500             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
501             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
502 
503             const CeedInt row = elem_dof_a[elem_dof_index_row];
504             const CeedInt col = elem_dof_a[elem_dof_index_col];
505 
506             rows[offset + count] = row;
507             cols[offset + count] = col;
508             count++;
509           }
510         }
511       }
512     }
513   }
514   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
515   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
516   CeedCall(CeedVectorDestroy(&elem_dof));
517 
518   return CEED_ERROR_SUCCESS;
519 }
520 
521 /**
522   @brief Assemble nonzero entries for non-composite operator
523 
524   Users should generally use CeedOperatorLinearAssemble()
525 
526   @param[in]  op     CeedOperator to assemble
527   @param[in]  offset Offset for number of entries
528   @param[out] values Values to assemble into matrix
529 
530   @return An error code: 0 - success, otherwise - failure
531 
532   @ref Developer
533 **/
534 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
535   Ceed ceed;
536   bool is_composite;
537   CeedCall(CeedOperatorGetCeed(op, &ceed));
538   CeedCall(CeedOperatorIsComposite(op, &is_composite));
539 
540   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
541 
542   // Early exit for empty operator
543   {
544     CeedInt num_elem = 0;
545 
546     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
547     if (num_elem == 0) return CEED_ERROR_SUCCESS;
548   }
549 
550   if (op->LinearAssembleSingle) {
551     // Backend version
552     CeedCall(op->LinearAssembleSingle(op, offset, values));
553     return CEED_ERROR_SUCCESS;
554   } else {
555     // Operator fallback
556     CeedOperator op_fallback;
557 
558     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
559     if (op_fallback) {
560       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
561       return CEED_ERROR_SUCCESS;
562     }
563   }
564 
565   // Assemble QFunction
566   CeedQFunction qf;
567   CeedCall(CeedOperatorGetQFunction(op, &qf));
568   CeedVector          assembled_qf = NULL;
569   CeedElemRestriction rstr_q       = NULL;
570   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
571   CeedSize qf_length;
572   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
573 
574   CeedInt            num_input_fields, num_output_fields;
575   CeedOperatorField *input_fields;
576   CeedOperatorField *output_fields;
577   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
578 
579   // Get assembly data
580   CeedOperatorAssemblyData data;
581   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
582   const CeedEvalMode **eval_modes_in, **eval_modes_out;
583   CeedInt             *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
584   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out,
585                                                 &eval_modes_out, NULL, NULL));
586   CeedBasis *bases;
587   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL));
588   CeedBasis basis_in = bases[0];
589 
590   CeedCheck(num_active_bases == 1, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases");
591   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
592 
593   CeedElemRestriction active_rstr;
594   CeedInt             num_elem, elem_size, num_qpts, num_comp;
595   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
596   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
597   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
598   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
599   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
600 
601   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
602 
603   // loop over elements and put in data structure
604   const CeedScalar *assembled_qf_array;
605   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
606 
607   CeedInt layout_qf[3];
608   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
609   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
610 
611   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
612   const CeedScalar **B_mats_in, **B_mats_out;
613   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out));
614   const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0];
615   CeedScalar        BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]];
616   CeedScalar        elem_mat[elem_size * elem_size];
617   CeedInt           count = 0;
618   CeedScalar       *vals;
619   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
620   for (CeedInt e = 0; e < num_elem; e++) {
621     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
622       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
623         // Compute B^T*D
624         for (CeedInt n = 0; n < elem_size; n++) {
625           for (CeedInt q = 0; q < num_qpts; q++) {
626             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
627               const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in);
628               CeedScalar    sum       = 0.0;
629               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
630                 const CeedInt b_out_index     = (num_eval_modes_out[0] * q + e_out) * elem_size + n;
631                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out;
632                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
633                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
634               }
635               BTD_mat[btd_index] = sum;
636             }
637           }
638         }
639         // form element matrix itself (for each block component)
640         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0]));
641 
642         // put element matrix in coordinate data structure
643         for (CeedInt i = 0; i < elem_size; i++) {
644           for (CeedInt j = 0; j < elem_size; j++) {
645             vals[offset + count] = elem_mat[i * elem_size + j];
646             count++;
647           }
648         }
649       }
650     }
651   }
652   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries");
653   CeedCall(CeedVectorRestoreArray(values, &vals));
654 
655   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
656   CeedCall(CeedVectorDestroy(&assembled_qf));
657 
658   return CEED_ERROR_SUCCESS;
659 }
660 
661 /**
662   @brief Count number of entries for assembled CeedOperator
663 
664   @param[in]  op          CeedOperator to assemble
665   @param[out] num_entries Number of entries in assembled representation
666 
667   @return An error code: 0 - success, otherwise - failure
668 
669   @ref Utility
670 **/
671 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
672   bool                is_composite;
673   CeedElemRestriction rstr;
674   CeedInt             num_elem, elem_size, num_comp;
675 
676   CeedCall(CeedOperatorIsComposite(op, &is_composite));
677   CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
678   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
679   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
680   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
681   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
682   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
683 
684   return CEED_ERROR_SUCCESS;
685 }
686 
687 /**
688   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
689 
690   @param[in]  op_fine      Fine grid operator
691   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
692   @param[in]  rstr_coarse  Coarse grid restriction
693   @param[in]  basis_coarse Coarse grid active vector basis
694   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
695   @param[out] op_coarse    Coarse grid operator
696   @param[out] op_prolong   Coarse to fine operator, or NULL
697   @param[out] op_restrict  Fine to coarse operator, or NULL
698 
699   @return An error code: 0 - success, otherwise - failure
700 
701   @ref Developer
702 **/
703 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
704                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
705   Ceed                ceed;
706   CeedVector          mult_vec         = NULL;
707   CeedElemRestriction rstr_p_mult_fine = NULL;
708   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
709 
710   // Check for composite operator
711   bool is_composite;
712   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
713   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
714 
715   // Coarse Grid
716   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
717   CeedElemRestriction rstr_fine = NULL;
718   // -- Clone input fields
719   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
720     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
721       rstr_fine = op_fine->input_fields[i]->elem_rstr;
722       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
723     } else {
724       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
725                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
726     }
727   }
728   // -- Clone output fields
729   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
730     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
731       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
732     } else {
733       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
734                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
735     }
736   }
737   // -- Clone QFunctionAssemblyData
738   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
739 
740   // Multiplicity vector
741   if (op_restrict || op_prolong) {
742     CeedVector mult_e_vec;
743 
744     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
745     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
746     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
747     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
748     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
749     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
750     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
751     CeedCall(CeedVectorDestroy(&mult_e_vec));
752     CeedCall(CeedVectorReciprocal(mult_vec));
753   }
754 
755   // Clone name
756   bool   has_name = op_fine->name;
757   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
758   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
759 
760   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
761   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
762             "Prolongation or restriction operator creation requires coarse-to-fine basis");
763 
764   // Restriction/Prolongation Operators
765   CeedInt num_comp;
766   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
767 
768   // Restriction
769   if (op_restrict) {
770     CeedInt             *num_comp_r_data;
771     CeedQFunction        qf_restrict;
772     CeedQFunctionContext ctx_r;
773 
774     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
775     CeedCall(CeedCalloc(1, &num_comp_r_data));
776     num_comp_r_data[0] = num_comp;
777     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
778     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
779     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
780     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
781     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
782     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
783     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
784     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
785 
786     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
787     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
788     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec));
789     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
790 
791     // Set name
792     char *restriction_name;
793     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
794     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
795     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
796     CeedCall(CeedFree(&restriction_name));
797 
798     // Check
799     CeedCall(CeedOperatorCheckReady(*op_restrict));
800 
801     // Cleanup
802     CeedCall(CeedQFunctionDestroy(&qf_restrict));
803   }
804 
805   // Prolongation
806   if (op_prolong) {
807     CeedInt             *num_comp_p_data;
808     CeedQFunction        qf_prolong;
809     CeedQFunctionContext ctx_p;
810 
811     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
812     CeedCall(CeedCalloc(1, &num_comp_p_data));
813     num_comp_p_data[0] = num_comp;
814     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
815     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
816     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
817     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
818     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
819     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
820     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
821     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
822 
823     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
824     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
825     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec));
826     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
827 
828     // Set name
829     char *prolongation_name;
830     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
831     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
832     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
833     CeedCall(CeedFree(&prolongation_name));
834 
835     // Check
836     CeedCall(CeedOperatorCheckReady(*op_prolong));
837 
838     // Cleanup
839     CeedCall(CeedQFunctionDestroy(&qf_prolong));
840   }
841 
842   // Check
843   CeedCall(CeedOperatorCheckReady(*op_coarse));
844 
845   // Cleanup
846   CeedCall(CeedVectorDestroy(&mult_vec));
847   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
848   CeedCall(CeedBasisDestroy(&basis_c_to_f));
849 
850   return CEED_ERROR_SUCCESS;
851 }
852 
853 /**
854   @brief Build 1D mass matrix and Laplacian with perturbation
855 
856   @param[in]  interp_1d   Interpolation matrix in one dimension
857   @param[in]  grad_1d     Gradient matrix in one dimension
858   @param[in]  q_weight_1d Quadrature weights in one dimension
859   @param[in]  P_1d        Number of basis nodes in one dimension
860   @param[in]  Q_1d        Number of quadrature points in one dimension
861   @param[in]  dim         Dimension of basis
862   @param[out] mass        Assembled mass matrix in one dimension
863   @param[out] laplace     Assembled perturbed Laplacian in one dimension
864 
865   @return An error code: 0 - success, otherwise - failure
866 
867   @ref Developer
868 **/
869 CeedPragmaOptimizeOff
870 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
871                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
872   for (CeedInt i = 0; i < P_1d; i++) {
873     for (CeedInt j = 0; j < P_1d; j++) {
874       CeedScalar sum = 0.0;
875       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
876       mass[i + j * P_1d] = sum;
877     }
878   }
879   // -- Laplacian
880   for (CeedInt i = 0; i < P_1d; i++) {
881     for (CeedInt j = 0; j < P_1d; j++) {
882       CeedScalar sum = 0.0;
883       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
884       laplace[i + j * P_1d] = sum;
885     }
886   }
887   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
888   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
889   return CEED_ERROR_SUCCESS;
890 }
891 CeedPragmaOptimizeOn
892 
893 /// @}
894 
895 /// ----------------------------------------------------------------------------
896 /// CeedOperator Backend API
897 /// ----------------------------------------------------------------------------
898 /// @addtogroup CeedOperatorBackend
899 /// @{
900 
901 /**
902   @brief Create object holding CeedQFunction assembly data for CeedOperator
903 
904   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
905   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
906 
907   @return An error code: 0 - success, otherwise - failure
908 
909   @ref Backend
910 **/
911 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
912   CeedCall(CeedCalloc(1, data));
913   (*data)->ref_count = 1;
914   (*data)->ceed      = ceed;
915   CeedCall(CeedReference(ceed));
916 
917   return CEED_ERROR_SUCCESS;
918 }
919 
920 /**
921   @brief Increment the reference counter for a CeedQFunctionAssemblyData
922 
923   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
924 
925   @return An error code: 0 - success, otherwise - failure
926 
927   @ref Backend
928 **/
929 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
930   data->ref_count++;
931   return CEED_ERROR_SUCCESS;
932 }
933 
934 /**
935   @brief Set re-use of CeedQFunctionAssemblyData
936 
937   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
938   @param[in]     reuse_data Boolean flag indicating data re-use
939 
940   @return An error code: 0 - success, otherwise - failure
941 
942   @ref Backend
943 **/
944 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
945   data->reuse_data        = reuse_data;
946   data->needs_data_update = true;
947   return CEED_ERROR_SUCCESS;
948 }
949 
950 /**
951   @brief Mark QFunctionAssemblyData as stale
952 
953   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
954   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
955 
956   @return An error code: 0 - success, otherwise - failure
957 
958   @ref Backend
959 **/
960 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
961   data->needs_data_update = needs_data_update;
962   return CEED_ERROR_SUCCESS;
963 }
964 
965 /**
966   @brief Determine if QFunctionAssemblyData needs update
967 
968   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
969   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
970 
971   @return An error code: 0 - success, otherwise - failure
972 
973   @ref Backend
974 **/
975 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
976   *is_update_needed = !data->reuse_data || data->needs_data_update;
977   return CEED_ERROR_SUCCESS;
978 }
979 
980 /**
981   @brief Copy the pointer to a CeedQFunctionAssemblyData.
982 
983   Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
984 
985   Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
986         CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
987         CeedQFunctionAssemblyData.
988 
989   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
990   @param[in,out] data_copy Variable to store copied reference
991 
992   @return An error code: 0 - success, otherwise - failure
993 
994   @ref Backend
995 **/
996 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
997   CeedCall(CeedQFunctionAssemblyDataReference(data));
998   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
999   *data_copy = data;
1000   return CEED_ERROR_SUCCESS;
1001 }
1002 
1003 /**
1004   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1005 
1006   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1007   @param[out] is_setup Boolean flag for setup status
1008 
1009   @return An error code: 0 - success, otherwise - failure
1010 
1011   @ref Backend
1012 **/
1013 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1014   *is_setup = data->is_setup;
1015   return CEED_ERROR_SUCCESS;
1016 }
1017 
1018 /**
1019   @brief Set internal objects for CeedQFunctionAssemblyData
1020 
1021   @param[in,out] data CeedQFunctionAssemblyData to set objects
1022   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1023   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1024 
1025   @return An error code: 0 - success, otherwise - failure
1026 
1027   @ref Backend
1028 **/
1029 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1030   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1031   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1032 
1033   data->is_setup = true;
1034   return CEED_ERROR_SUCCESS;
1035 }
1036 
1037 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1038   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1039 
1040   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1041   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1042 
1043   return CEED_ERROR_SUCCESS;
1044 }
1045 
1046 /**
1047   @brief Destroy CeedQFunctionAssemblyData
1048 
1049   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1050 
1051   @return An error code: 0 - success, otherwise - failure
1052 
1053   @ref Backend
1054 **/
1055 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1056   if (!*data || --(*data)->ref_count > 0) {
1057     *data = NULL;
1058     return CEED_ERROR_SUCCESS;
1059   }
1060   CeedCall(CeedDestroy(&(*data)->ceed));
1061   CeedCall(CeedVectorDestroy(&(*data)->vec));
1062   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1063 
1064   CeedCall(CeedFree(data));
1065   return CEED_ERROR_SUCCESS;
1066 }
1067 
1068 /**
1069   @brief Get CeedOperatorAssemblyData
1070 
1071   @param[in]  op   CeedOperator to assemble
1072   @param[out] data CeedQFunctionAssemblyData
1073 
1074   @return An error code: 0 - success, otherwise - failure
1075 
1076   @ref Backend
1077 **/
1078 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1079   if (!op->op_assembled) {
1080     CeedOperatorAssemblyData data;
1081 
1082     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1083     op->op_assembled = data;
1084   }
1085   *data = op->op_assembled;
1086 
1087   return CEED_ERROR_SUCCESS;
1088 }
1089 
1090 /**
1091   @brief Create object holding CeedOperator assembly data.
1092 
1093   The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1094   An array with references to the corresponding active CeedElemRestrictions is also stored.
1095   For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1096   The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1097 CeedEvalMode.
1098   The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1099   Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1100 
1101   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1102   @param[in]  op   CeedOperator to be assembled
1103   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1104 
1105   @return An error code: 0 - success, otherwise - failure
1106 
1107   @ref Backend
1108 **/
1109 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1110   CeedInt num_active_bases = 0;
1111 
1112   // Allocate
1113   CeedCall(CeedCalloc(1, data));
1114   (*data)->ceed = ceed;
1115   CeedCall(CeedReference(ceed));
1116 
1117   // Build OperatorAssembly data
1118   CeedQFunction       qf;
1119   CeedQFunctionField *qf_fields;
1120   CeedOperatorField  *op_fields;
1121   CeedInt             num_input_fields;
1122   CeedCall(CeedOperatorGetQFunction(op, &qf));
1123   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1124   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1125 
1126   // Determine active input basis
1127   CeedInt       *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0;
1128   CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL;
1129   CeedSize     **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1130   for (CeedInt i = 0; i < num_input_fields; i++) {
1131     CeedVector vec;
1132     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1133     if (vec == CEED_VECTOR_ACTIVE) {
1134       CeedBasis    basis_in = NULL;
1135       CeedEvalMode eval_mode;
1136       CeedInt      index = -1, dim, num_comp, q_comp;
1137       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1138       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1139       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1140       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1141       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1142       for (CeedInt i = 0; i < num_active_bases; i++) {
1143         if ((*data)->active_bases[i] == basis_in) index = i;
1144       }
1145       if (index == -1) {
1146         CeedElemRestriction elem_rstr_in;
1147         index = num_active_bases;
1148         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1149         (*data)->active_bases[num_active_bases] = NULL;
1150         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases]));
1151         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1152         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1153         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1154         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases]));
1155         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1156         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1157         num_eval_modes_in[index]  = 0;
1158         num_eval_modes_out[index] = 0;
1159         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1160         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1161         eval_modes_in[index]  = NULL;
1162         eval_modes_out[index] = NULL;
1163         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1164         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1165         eval_mode_offsets_in[index]  = NULL;
1166         eval_mode_offsets_out[index] = NULL;
1167         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1168         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1169         (*data)->assembled_bases_in[index]  = NULL;
1170         (*data)->assembled_bases_out[index] = NULL;
1171         num_active_bases++;
1172       }
1173       if (eval_mode != CEED_EVAL_WEIGHT) {
1174         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1175         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1176         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1177         for (CeedInt d = 0; d < q_comp; d++) {
1178           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1179           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1180           offset += num_comp;
1181         }
1182         num_eval_modes_in[index] += q_comp;
1183       }
1184     }
1185   }
1186   (*data)->num_eval_modes_in    = num_eval_modes_in;
1187   (*data)->eval_modes_in        = eval_modes_in;
1188   (*data)->eval_mode_offsets_in = eval_mode_offsets_in;
1189 
1190   // Determine active output basis
1191   CeedInt num_output_fields;
1192   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1193   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1194   offset = 0;
1195   for (CeedInt i = 0; i < num_output_fields; i++) {
1196     CeedVector vec;
1197     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1198     if (vec == CEED_VECTOR_ACTIVE) {
1199       CeedBasis    basis_out = NULL;
1200       CeedEvalMode eval_mode;
1201       CeedInt      index = -1, dim, num_comp, q_comp;
1202       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1203       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1204       CeedCall(CeedBasisGetDimension(basis_out, &dim));
1205       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1206       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1207       for (CeedInt i = 0; i < num_active_bases; i++) {
1208         if ((*data)->active_bases[i] == basis_out) index = i;
1209       }
1210       if (index == -1) {
1211         CeedElemRestriction elem_rstr_out;
1212 
1213         index = num_active_bases;
1214         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1215         (*data)->active_bases[num_active_bases] = NULL;
1216         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases]));
1217         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1218         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1219         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1220         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases]));
1221         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1222         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1223         num_eval_modes_in[index]  = 0;
1224         num_eval_modes_out[index] = 0;
1225         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1226         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1227         eval_modes_in[index]  = NULL;
1228         eval_modes_out[index] = NULL;
1229         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1230         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1231         eval_mode_offsets_in[index]  = NULL;
1232         eval_mode_offsets_out[index] = NULL;
1233         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1234         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1235         (*data)->assembled_bases_in[index]  = NULL;
1236         (*data)->assembled_bases_out[index] = NULL;
1237         num_active_bases++;
1238       }
1239       if (eval_mode != CEED_EVAL_WEIGHT) {
1240         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1241         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1242         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1243         for (CeedInt d = 0; d < q_comp; d++) {
1244           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1245           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1246           offset += num_comp;
1247         }
1248         num_eval_modes_out[index] += q_comp;
1249       }
1250     }
1251   }
1252   (*data)->num_output_components = offset;
1253   (*data)->num_eval_modes_out    = num_eval_modes_out;
1254   (*data)->eval_modes_out        = eval_modes_out;
1255   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1256   (*data)->num_active_bases      = num_active_bases;
1257 
1258   return CEED_ERROR_SUCCESS;
1259 }
1260 
1261 /**
1262   @brief Get CeedOperator CeedEvalModes for assembly.
1263 
1264   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1265 
1266   @param[in]  data                  CeedOperatorAssemblyData
1267   @param[out] num_active_bases      Total number of active bases
1268   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1269                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1270   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1271   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1272   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1273   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1274   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1275   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1276                                       including contributions of all active bases
1277 
1278   @return An error code: 0 - success, otherwise - failure
1279 
1280 
1281   @ref Backend
1282 **/
1283 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in,
1284                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out,
1285                                          const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) {
1286   if (num_active_bases) *num_active_bases = data->num_active_bases;
1287   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1288   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1289   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1290   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1291   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1292   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1293   if (num_output_components) *num_output_components = data->num_output_components;
1294 
1295   return CEED_ERROR_SUCCESS;
1296 }
1297 
1298 /**
1299   @brief Get CeedOperator CeedBasis data for assembly.
1300 
1301   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1302 
1303   @param[in]  data                CeedOperatorAssemblyData
1304   @param[out] num_active_bases    Number of active bases, or NULL
1305   @param[out] active_bases        Pointer to hold active CeedBasis, or NULL
1306   @param[out] assembled_bases_in  Pointer to hold assembled active input B, or NULL
1307   @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL
1308 
1309   @return An error code: 0 - success, otherwise - failure
1310 
1311   @ref Backend
1312 **/
1313 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases,
1314                                      const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) {
1315   // Assemble B_in, B_out if needed
1316   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1317     CeedInt num_qpts;
1318 
1319     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1320     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1321       CeedInt     num_nodes;
1322       CeedScalar *B_in = NULL, *identity = NULL;
1323       bool        has_eval_none = false;
1324 
1325       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1326       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1327 
1328       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1329         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1330       }
1331       if (has_eval_none) {
1332         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1333         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1334           identity[i * num_nodes + i] = 1.0;
1335         }
1336       }
1337 
1338       for (CeedInt q = 0; q < num_qpts; q++) {
1339         for (CeedInt n = 0; n < num_nodes; n++) {
1340           CeedInt      d_in              = 0, q_comp_in;
1341           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1342           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1343             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1344             const CeedScalar *B  = NULL;
1345             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B);
1346             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in));
1347             if (q_comp_in > 1) {
1348               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1349               else B = &B[(++d_in) * num_qpts * num_nodes];
1350             }
1351             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1352             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1353           }
1354         }
1355       }
1356       if (identity) CeedCall(CeedFree(identity));
1357       data->assembled_bases_in[b] = B_in;
1358     }
1359   }
1360 
1361   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1362     CeedInt num_qpts;
1363 
1364     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1365     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1366       CeedInt     num_nodes;
1367       bool        has_eval_none = false;
1368       CeedScalar *B_out = NULL, *identity = NULL;
1369 
1370       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1371       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1372 
1373       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1374         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1375       }
1376       if (has_eval_none) {
1377         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1378         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1379           identity[i * num_nodes + i] = 1.0;
1380         }
1381       }
1382 
1383       for (CeedInt q = 0; q < num_qpts; q++) {
1384         for (CeedInt n = 0; n < num_nodes; n++) {
1385           CeedInt      d_out              = 0, q_comp_out;
1386           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1387           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1388             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1389             const CeedScalar *B  = NULL;
1390             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B);
1391             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out));
1392             if (q_comp_out > 1) {
1393               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1394               else B = &B[(++d_out) * num_qpts * num_nodes];
1395             }
1396             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1397             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1398           }
1399         }
1400       }
1401       if (identity) CeedCall(CeedFree(identity));
1402       data->assembled_bases_out[b] = B_out;
1403     }
1404   }
1405 
1406   // Pass out assembled data
1407   if (active_bases) *active_bases = data->active_bases;
1408   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1409   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1410 
1411   return CEED_ERROR_SUCCESS;
1412 }
1413 
1414 /**
1415   @brief Get CeedOperator CeedBasis data for assembly.
1416 
1417   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1418 
1419   @param[in]  data                  CeedOperatorAssemblyData
1420   @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL
1421   @param[out] active_elem_rstrs     Pointer to hold active CeedElemRestrictions, or NULL
1422 
1423   @return An error code: 0 - success, otherwise - failure
1424 
1425   @ref Backend
1426 **/
1427 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs,
1428                                                 CeedElemRestriction **active_elem_rstrs) {
1429   if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases;
1430   if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs;
1431 
1432   return CEED_ERROR_SUCCESS;
1433 }
1434 
1435 /**
1436   @brief Destroy CeedOperatorAssemblyData
1437 
1438   @param[in,out] data CeedOperatorAssemblyData to destroy
1439 
1440   @return An error code: 0 - success, otherwise - failure
1441 
1442   @ref Backend
1443 **/
1444 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1445   if (!*data) {
1446     *data = NULL;
1447     return CEED_ERROR_SUCCESS;
1448   }
1449   CeedCall(CeedDestroy(&(*data)->ceed));
1450   for (CeedInt b = 0; b < (*data)->num_active_bases; b++) {
1451     CeedCall(CeedBasisDestroy(&(*data)->active_bases[b]));
1452     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b]));
1453     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1454     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1455     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1456     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1457     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1458     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1459   }
1460   CeedCall(CeedFree(&(*data)->active_bases));
1461   CeedCall(CeedFree(&(*data)->active_elem_rstrs));
1462   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1463   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1464   CeedCall(CeedFree(&(*data)->eval_modes_in));
1465   CeedCall(CeedFree(&(*data)->eval_modes_out));
1466   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1467   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1468   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1469   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1470 
1471   CeedCall(CeedFree(data));
1472   return CEED_ERROR_SUCCESS;
1473 }
1474 
1475 /// @}
1476 
1477 /// ----------------------------------------------------------------------------
1478 /// CeedOperator Public API
1479 /// ----------------------------------------------------------------------------
1480 /// @addtogroup CeedOperatorUser
1481 /// @{
1482 
1483 /**
1484   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1485 
1486   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1487   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1488 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1489 
1490   Inputs and outputs are in the order provided by the user when adding CeedOperator fields.
1491   For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction
1492 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1493 
1494   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1495 
1496   @param[in]  op        CeedOperator to assemble CeedQFunction
1497   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1498   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1499   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1500 
1501   @return An error code: 0 - success, otherwise - failure
1502 
1503   @ref User
1504 **/
1505 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1506   CeedCall(CeedOperatorCheckReady(op));
1507 
1508   if (op->LinearAssembleQFunction) {
1509     // Backend version
1510     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1511   } else {
1512     // Operator fallback
1513     CeedOperator op_fallback;
1514 
1515     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1516     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1517     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1518   }
1519   return CEED_ERROR_SUCCESS;
1520 }
1521 
1522 /**
1523   @brief Assemble CeedQFunction and store result internally.
1524 
1525   Return copied references of stored data to the caller.
1526   Caller is responsible for ownership and destruction of the copied references.
1527   See also @ref CeedOperatorLinearAssembleQFunction
1528 
1529   Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers.
1530         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
1531 
1532   @param[in]  op        CeedOperator to assemble CeedQFunction
1533   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1534   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1535   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1536 
1537   @return An error code: 0 - success, otherwise - failure
1538 
1539   @ref User
1540 **/
1541 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1542   CeedCall(CeedOperatorCheckReady(op));
1543 
1544   if (op->LinearAssembleQFunctionUpdate) {
1545     // Backend version
1546     bool                qf_assembled_is_setup;
1547     CeedVector          assembled_vec  = NULL;
1548     CeedElemRestriction assembled_rstr = NULL;
1549 
1550     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1551     if (qf_assembled_is_setup) {
1552       bool update_needed;
1553 
1554       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1555       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1556       if (update_needed) CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1557     } else {
1558       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1559       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1560     }
1561     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1562 
1563     // Copy reference from internally held copy
1564     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1565     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1566     CeedCall(CeedVectorDestroy(&assembled_vec));
1567     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1568   } else {
1569     // Operator fallback
1570     CeedOperator op_fallback;
1571 
1572     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1573     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1574     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1575   }
1576 
1577   return CEED_ERROR_SUCCESS;
1578 }
1579 
1580 /**
1581   @brief Assemble the diagonal of a square linear CeedOperator
1582 
1583   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1584 
1585   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1586 
1587   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1588 
1589   @param[in]  op        CeedOperator to assemble CeedQFunction
1590   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1591   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1592 
1593   @return An error code: 0 - success, otherwise - failure
1594 
1595   @ref User
1596 **/
1597 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1598   bool is_composite;
1599   CeedCall(CeedOperatorCheckReady(op));
1600   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1601 
1602   CeedSize input_size = 0, output_size = 0;
1603   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1604   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1605 
1606   // Early exit for empty operator
1607   if (!is_composite) {
1608     CeedInt num_elem = 0;
1609 
1610     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1611     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1612   }
1613 
1614   if (op->LinearAssembleDiagonal) {
1615     // Backend version
1616     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1617     return CEED_ERROR_SUCCESS;
1618   } else if (op->LinearAssembleAddDiagonal) {
1619     // Backend version with zeroing first
1620     CeedCall(CeedVectorSetValue(assembled, 0.0));
1621     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1622     return CEED_ERROR_SUCCESS;
1623   } else {
1624     // Operator fallback
1625     CeedOperator op_fallback;
1626 
1627     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1628     if (op_fallback) {
1629       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1630       return CEED_ERROR_SUCCESS;
1631     }
1632   }
1633   // Default interface implementation
1634   CeedCall(CeedVectorSetValue(assembled, 0.0));
1635   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1636 
1637   return CEED_ERROR_SUCCESS;
1638 }
1639 
1640 /**
1641   @brief Assemble the diagonal of a square linear CeedOperator
1642 
1643   This sums into a CeedVector the diagonal of a linear CeedOperator.
1644 
1645   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1646 
1647   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1648 
1649   @param[in]  op        CeedOperator to assemble CeedQFunction
1650   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1651   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1652 
1653   @return An error code: 0 - success, otherwise - failure
1654 
1655   @ref User
1656 **/
1657 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1658   bool is_composite;
1659   CeedCall(CeedOperatorCheckReady(op));
1660   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1661 
1662   CeedSize input_size = 0, output_size = 0;
1663   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1664   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1665 
1666   // Early exit for empty operator
1667   if (!is_composite) {
1668     CeedInt num_elem = 0;
1669 
1670     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1671     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1672   }
1673 
1674   if (op->LinearAssembleAddDiagonal) {
1675     // Backend version
1676     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1677     return CEED_ERROR_SUCCESS;
1678   } else {
1679     // Operator fallback
1680     CeedOperator op_fallback;
1681 
1682     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1683     if (op_fallback) {
1684       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1685       return CEED_ERROR_SUCCESS;
1686     }
1687   }
1688   // Default interface implementation
1689   if (is_composite) {
1690     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1691   } else {
1692     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1693   }
1694 
1695   return CEED_ERROR_SUCCESS;
1696 }
1697 
1698 /**
1699   @brief Assemble the point block diagonal of a square linear CeedOperator
1700 
1701   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1702 
1703   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1704 
1705   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1706 
1707   @param[in]  op        CeedOperator to assemble CeedQFunction
1708   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1709 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1710 component in].
1711   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1712 
1713   @return An error code: 0 - success, otherwise - failure
1714 
1715   @ref User
1716 **/
1717 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1718   bool is_composite;
1719   CeedCall(CeedOperatorCheckReady(op));
1720   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1721 
1722   CeedSize input_size = 0, output_size = 0;
1723   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1724   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1725 
1726   // Early exit for empty operator
1727   if (!is_composite) {
1728     CeedInt num_elem = 0;
1729 
1730     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1731     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1732   }
1733 
1734   if (op->LinearAssemblePointBlockDiagonal) {
1735     // Backend version
1736     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1737     return CEED_ERROR_SUCCESS;
1738   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1739     // Backend version with zeroing first
1740     CeedCall(CeedVectorSetValue(assembled, 0.0));
1741     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1742     return CEED_ERROR_SUCCESS;
1743   } else {
1744     // Operator fallback
1745     CeedOperator op_fallback;
1746 
1747     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1748     if (op_fallback) {
1749       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1750       return CEED_ERROR_SUCCESS;
1751     }
1752   }
1753   // Default interface implementation
1754   CeedCall(CeedVectorSetValue(assembled, 0.0));
1755   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1756 
1757   return CEED_ERROR_SUCCESS;
1758 }
1759 
1760 /**
1761   @brief Assemble the point block diagonal of a square linear CeedOperator
1762 
1763   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1764 
1765   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1766 
1767   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1768 
1769   @param[in]  op        CeedOperator to assemble CeedQFunction
1770   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1771 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1772 component in].
1773   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1774 
1775   @return An error code: 0 - success, otherwise - failure
1776 
1777   @ref User
1778 **/
1779 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1780   bool is_composite;
1781   CeedCall(CeedOperatorCheckReady(op));
1782   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1783 
1784   CeedSize input_size = 0, output_size = 0;
1785   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1786   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1787 
1788   // Early exit for empty operator
1789   if (!is_composite) {
1790     CeedInt num_elem = 0;
1791 
1792     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1793     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1794   }
1795 
1796   if (op->LinearAssembleAddPointBlockDiagonal) {
1797     // Backend version
1798     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1799     return CEED_ERROR_SUCCESS;
1800   } else {
1801     // Operator fallback
1802     CeedOperator op_fallback;
1803 
1804     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1805     if (op_fallback) {
1806       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1807       return CEED_ERROR_SUCCESS;
1808     }
1809   }
1810   // Default interface implementation
1811   if (is_composite) {
1812     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1813   } else {
1814     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1815   }
1816 
1817   return CEED_ERROR_SUCCESS;
1818 }
1819 
1820 /**
1821    @brief Fully assemble the nonzero pattern of a linear operator.
1822 
1823    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1824 
1825    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1826 matrix in entry (i, j).
1827   Note that the (i, j) pairs are not unique and may repeat.
1828   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1829 
1830    This will generally be slow unless your operator is low-order.
1831 
1832    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1833 
1834    @param[in]  op          CeedOperator to assemble
1835    @param[out] num_entries Number of entries in coordinate nonzero pattern
1836    @param[out] rows        Row number for each entry
1837    @param[out] cols        Column number for each entry
1838 
1839    @ref User
1840 **/
1841 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1842   CeedInt       num_suboperators, single_entries;
1843   CeedOperator *sub_operators;
1844   bool          is_composite;
1845   CeedCall(CeedOperatorCheckReady(op));
1846   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1847 
1848   if (op->LinearAssembleSymbolic) {
1849     // Backend version
1850     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1851     return CEED_ERROR_SUCCESS;
1852   } else {
1853     // Operator fallback
1854     CeedOperator op_fallback;
1855 
1856     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1857     if (op_fallback) {
1858       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1859       return CEED_ERROR_SUCCESS;
1860     }
1861   }
1862 
1863   // Default interface implementation
1864 
1865   // count entries and allocate rows, cols arrays
1866   *num_entries = 0;
1867   if (is_composite) {
1868     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1869     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1870     for (CeedInt k = 0; k < num_suboperators; ++k) {
1871       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1872       *num_entries += single_entries;
1873     }
1874   } else {
1875     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1876     *num_entries += single_entries;
1877   }
1878   CeedCall(CeedCalloc(*num_entries, rows));
1879   CeedCall(CeedCalloc(*num_entries, cols));
1880 
1881   // assemble nonzero locations
1882   CeedInt offset = 0;
1883   if (is_composite) {
1884     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1885     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1886     for (CeedInt k = 0; k < num_suboperators; ++k) {
1887       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1888       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1889       offset += single_entries;
1890     }
1891   } else {
1892     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1893   }
1894 
1895   return CEED_ERROR_SUCCESS;
1896 }
1897 
1898 /**
1899    @brief Fully assemble the nonzero entries of a linear operator.
1900 
1901    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1902 
1903    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1904 matrix in entry (i, j).
1905   Note that the (i, j) pairs are not unique and may repeat.
1906   This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1907 
1908    This will generally be slow unless your operator is low-order.
1909 
1910    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1911 
1912    @param[in]  op     CeedOperator to assemble
1913    @param[out] values Values to assemble into matrix
1914 
1915    @ref User
1916 **/
1917 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1918   CeedInt       num_suboperators, single_entries = 0;
1919   CeedOperator *sub_operators;
1920   bool          is_composite;
1921   CeedCall(CeedOperatorCheckReady(op));
1922   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1923 
1924   // Early exit for empty operator
1925   if (!is_composite) {
1926     CeedInt num_elem = 0;
1927 
1928     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1929     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1930   }
1931 
1932   if (op->LinearAssemble) {
1933     // Backend version
1934     CeedCall(op->LinearAssemble(op, values));
1935     return CEED_ERROR_SUCCESS;
1936   } else {
1937     // Operator fallback
1938     CeedOperator op_fallback;
1939 
1940     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1941     if (op_fallback) {
1942       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
1943       return CEED_ERROR_SUCCESS;
1944     }
1945   }
1946 
1947   // Default interface implementation
1948   CeedInt offset = 0;
1949   CeedCall(CeedVectorSetValue(values, 0.0));
1950   if (is_composite) {
1951     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1952     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1953     for (CeedInt k = 0; k < num_suboperators; k++) {
1954       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
1955       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1956       offset += single_entries;
1957     }
1958   } else {
1959     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
1960   }
1961 
1962   return CEED_ERROR_SUCCESS;
1963 }
1964 
1965 /**
1966   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
1967 
1968   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1969 
1970   @param[in]  op               Composite CeedOperator
1971   @param[in]  num_skip_indices Number of suboperators to skip
1972   @param[in]  skip_indices     Array of indices of suboperators to skip
1973   @param[out] mult             Vector to store multiplicity (of size l_size)
1974 
1975   @return An error code: 0 - success, otherwise - failure
1976 
1977   @ref User
1978 **/
1979 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
1980   CeedCall(CeedOperatorCheckReady(op));
1981 
1982   Ceed                ceed;
1983   CeedInt             num_suboperators;
1984   CeedSize            l_vec_len;
1985   CeedScalar         *mult_array;
1986   CeedVector          ones_l_vec;
1987   CeedElemRestriction elem_rstr;
1988   CeedOperator       *sub_operators;
1989 
1990   CeedCall(CeedOperatorGetCeed(op, &ceed));
1991 
1992   // Zero mult vector
1993   CeedCall(CeedVectorSetValue(mult, 0.0));
1994 
1995   // Get suboperators
1996   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1997   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1998   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
1999 
2000   // Work vector
2001   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2002   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2003   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2004   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2005 
2006   // Compute multiplicity across suboperators
2007   for (CeedInt i = 0; i < num_suboperators; i++) {
2008     const CeedScalar *sub_mult_array;
2009     CeedVector        sub_mult_l_vec, ones_e_vec;
2010 
2011     // -- Check for suboperator to skip
2012     for (CeedInt j = 0; j < num_skip_indices; j++) {
2013       if (skip_indices[j] == i) continue;
2014     }
2015 
2016     // -- Sub operator multiplicity
2017     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2018     CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2019     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2020     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2021     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2022     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2023     // ---- Flag every node present in the current suboperator
2024     for (CeedInt j = 0; j < l_vec_len; j++) {
2025       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2026     }
2027     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2028     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2029     CeedCall(CeedVectorDestroy(&ones_e_vec));
2030   }
2031   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2032   CeedCall(CeedVectorDestroy(&ones_l_vec));
2033 
2034   return CEED_ERROR_SUCCESS;
2035 }
2036 
2037 /**
2038   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2039 grid interpolation
2040 
2041   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2042 
2043   @param[in]  op_fine      Fine grid operator
2044   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2045   @param[in]  rstr_coarse  Coarse grid restriction
2046   @param[in]  basis_coarse Coarse grid active vector basis
2047   @param[out] op_coarse    Coarse grid operator
2048   @param[out] op_prolong   Coarse to fine operator, or NULL
2049   @param[out] op_restrict  Fine to coarse operator, or NULL
2050 
2051   @return An error code: 0 - success, otherwise - failure
2052 
2053   @ref User
2054 **/
2055 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2056                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2057   CeedCall(CeedOperatorCheckReady(op_fine));
2058 
2059   // Build prolongation matrix, if required
2060   CeedBasis basis_c_to_f = NULL;
2061   if (op_prolong || op_restrict) {
2062     CeedBasis basis_fine;
2063     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2064     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2065   }
2066 
2067   // Core code
2068   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2069 
2070   return CEED_ERROR_SUCCESS;
2071 }
2072 
2073 /**
2074   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2075 
2076   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2077 
2078   @param[in]  op_fine       Fine grid operator
2079   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2080   @param[in]  rstr_coarse   Coarse grid restriction
2081   @param[in]  basis_coarse  Coarse grid active vector basis
2082   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2083   @param[out] op_coarse     Coarse grid operator
2084   @param[out] op_prolong    Coarse to fine operator, or NULL
2085   @param[out] op_restrict   Fine to coarse operator, or NULL
2086 
2087   @return An error code: 0 - success, otherwise - failure
2088 
2089   @ref User
2090 **/
2091 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2092                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2093                                              CeedOperator *op_restrict) {
2094   CeedCall(CeedOperatorCheckReady(op_fine));
2095   Ceed ceed;
2096   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2097 
2098   // Check for compatible quadrature spaces
2099   CeedBasis basis_fine;
2100   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2101   CeedInt Q_f, Q_c;
2102   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2103   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2104   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2105 
2106   // Create coarse to fine basis, if required
2107   CeedBasis basis_c_to_f = NULL;
2108   if (op_prolong || op_restrict) {
2109     // Check if interpolation matrix is provided
2110     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2111               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2112     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2113     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2114     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2115     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2116     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2117     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2118     CeedScalar *q_ref, *q_weight, *grad;
2119     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2120     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2121     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2122     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2123     CeedCall(CeedFree(&q_ref));
2124     CeedCall(CeedFree(&q_weight));
2125     CeedCall(CeedFree(&grad));
2126   }
2127 
2128   // Core code
2129   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2130   return CEED_ERROR_SUCCESS;
2131 }
2132 
2133 /**
2134   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2135 
2136   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2137 
2138   @param[in]  op_fine       Fine grid operator
2139   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2140   @param[in]  rstr_coarse   Coarse grid restriction
2141   @param[in]  basis_coarse  Coarse grid active vector basis
2142   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2143   @param[out] op_coarse     Coarse grid operator
2144   @param[out] op_prolong    Coarse to fine operator, or NULL
2145   @param[out] op_restrict   Fine to coarse operator, or NULL
2146 
2147   @return An error code: 0 - success, otherwise - failure
2148 
2149   @ref User
2150 **/
2151 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2152                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2153                                        CeedOperator *op_restrict) {
2154   CeedCall(CeedOperatorCheckReady(op_fine));
2155   Ceed ceed;
2156   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2157 
2158   // Check for compatible quadrature spaces
2159   CeedBasis basis_fine;
2160   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2161   CeedInt Q_f, Q_c;
2162   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2163   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2164   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2165 
2166   // Coarse to fine basis
2167   CeedBasis basis_c_to_f = NULL;
2168   if (op_prolong || op_restrict) {
2169     // Check if interpolation matrix is provided
2170     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2171               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2172     CeedElemTopology topo;
2173     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2174     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2175     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2176     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2177     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2178     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2179     CeedScalar *q_ref, *q_weight, *grad;
2180     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2181     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2182     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2183     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2184     CeedCall(CeedFree(&q_ref));
2185     CeedCall(CeedFree(&q_weight));
2186     CeedCall(CeedFree(&grad));
2187   }
2188 
2189   // Core code
2190   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2191   return CEED_ERROR_SUCCESS;
2192 }
2193 
2194 /**
2195   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2196 
2197   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2198   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2199   The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2200 \hat S V\f$.
2201   The CeedOperator must be linear and non-composite.
2202   The associated CeedQFunction must therefore also be linear.
2203 
2204   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2205 
2206   @param[in]  op      CeedOperator to create element inverses
2207   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2208   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2209 
2210   @return An error code: 0 - success, otherwise - failure
2211 
2212   @ref User
2213 **/
2214 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2215   CeedCall(CeedOperatorCheckReady(op));
2216 
2217   if (op->CreateFDMElementInverse) {
2218     // Backend version
2219     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2220     return CEED_ERROR_SUCCESS;
2221   } else {
2222     // Operator fallback
2223     CeedOperator op_fallback;
2224 
2225     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2226     if (op_fallback) {
2227       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2228       return CEED_ERROR_SUCCESS;
2229     }
2230   }
2231 
2232   // Default interface implementation
2233   Ceed ceed, ceed_parent;
2234   CeedCall(CeedOperatorGetCeed(op, &ceed));
2235   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2236   ceed_parent = ceed_parent ? ceed_parent : ceed;
2237   CeedQFunction qf;
2238   CeedCall(CeedOperatorGetQFunction(op, &qf));
2239 
2240   // Determine active input basis
2241   bool                interp = false, grad = false;
2242   CeedBasis           basis = NULL;
2243   CeedElemRestriction rstr  = NULL;
2244   CeedOperatorField  *op_fields;
2245   CeedQFunctionField *qf_fields;
2246   CeedInt             num_input_fields;
2247   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2248   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2249   for (CeedInt i = 0; i < num_input_fields; i++) {
2250     CeedVector vec;
2251     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2252     if (vec == CEED_VECTOR_ACTIVE) {
2253       CeedEvalMode eval_mode;
2254       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2255       interp = interp || eval_mode == CEED_EVAL_INTERP;
2256       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2257       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2258       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2259     }
2260   }
2261   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2262   CeedSize l_size = 1;
2263   CeedInt  P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2264   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2265   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2266   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2267   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2268   CeedCall(CeedBasisGetDimension(basis, &dim));
2269   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2270   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2271   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2272 
2273   // Build and diagonalize 1D Mass and Laplacian
2274   bool is_tensor_basis;
2275   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2276   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2277   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2278   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2279   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2280   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2281   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2282   CeedCall(CeedCalloc(P_1d, &lambda));
2283   // -- Build matrices
2284   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2285   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2286   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2287   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2288   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2289 
2290   // -- Diagonalize
2291   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2292   CeedCall(CeedFree(&mass));
2293   CeedCall(CeedFree(&laplace));
2294   for (CeedInt i = 0; i < P_1d; i++) {
2295     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2296   }
2297   CeedCall(CeedFree(&x));
2298 
2299   // Assemble QFunction
2300   CeedVector          assembled = NULL;
2301   CeedElemRestriction rstr_qf   = NULL;
2302   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2303   CeedInt layout[3];
2304   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2305   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2306   CeedScalar max_norm = 0;
2307   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2308 
2309   // Calculate element averages
2310   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2311   CeedScalar       *elem_avg;
2312   const CeedScalar *assembled_array, *q_weight_array;
2313   CeedVector        q_weight;
2314   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2315   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2316   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2317   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2318   CeedCall(CeedCalloc(num_elem, &elem_avg));
2319   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2320   for (CeedInt e = 0; e < num_elem; e++) {
2321     CeedInt count = 0;
2322     for (CeedInt q = 0; q < num_qpts; q++) {
2323       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2324         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2325           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2326           count++;
2327         }
2328       }
2329     }
2330     if (count) {
2331       elem_avg[e] /= count;
2332     } else {
2333       elem_avg[e] = 1.0;
2334     }
2335   }
2336   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2337   CeedCall(CeedVectorDestroy(&assembled));
2338   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2339   CeedCall(CeedVectorDestroy(&q_weight));
2340 
2341   // Build FDM diagonal
2342   CeedVector  q_data;
2343   CeedScalar *q_data_array, *fdm_diagonal;
2344   CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2345   const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2346   for (CeedInt c = 0; c < num_comp; c++) {
2347     for (CeedInt n = 0; n < num_nodes; n++) {
2348       if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2349       if (grad) {
2350         for (CeedInt d = 0; d < dim; d++) {
2351           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2352           fdm_diagonal[c * num_nodes + n] += lambda[i];
2353         }
2354       }
2355       if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2356     }
2357   }
2358   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2359   CeedCall(CeedVectorSetValue(q_data, 0.0));
2360   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2361   for (CeedInt e = 0; e < num_elem; e++) {
2362     for (CeedInt c = 0; c < num_comp; c++) {
2363       for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2364     }
2365   }
2366   CeedCall(CeedFree(&elem_avg));
2367   CeedCall(CeedFree(&fdm_diagonal));
2368   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2369 
2370   // Setup FDM operator
2371   // -- Basis
2372   CeedBasis   fdm_basis;
2373   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2374   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2375   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2376   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2377   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2378   CeedCall(CeedFree(&fdm_interp));
2379   CeedCall(CeedFree(&grad_dummy));
2380   CeedCall(CeedFree(&q_ref_dummy));
2381   CeedCall(CeedFree(&q_weight_dummy));
2382   CeedCall(CeedFree(&lambda));
2383 
2384   // -- Restriction
2385   CeedElemRestriction rstr_qd_i;
2386   CeedInt             strides[3] = {1, num_nodes, num_nodes * num_comp};
2387   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2388   // -- QFunction
2389   CeedQFunction qf_fdm;
2390   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2391   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2392   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2393   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2394   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2395   // -- QFunction context
2396   CeedInt *num_comp_data;
2397   CeedCall(CeedCalloc(1, &num_comp_data));
2398   num_comp_data[0] = num_comp;
2399   CeedQFunctionContext ctx_fdm;
2400   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2401   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2402   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2403   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2404   // -- Operator
2405   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2406   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2407   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2408   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2409 
2410   // Cleanup
2411   CeedCall(CeedVectorDestroy(&q_data));
2412   CeedCall(CeedBasisDestroy(&fdm_basis));
2413   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2414   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2415 
2416   return CEED_ERROR_SUCCESS;
2417 }
2418 
2419 /// @}
2420