xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision 859c15bb6fe60f9064a52dbb084dbd4855ef54f7)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed/backend.h>
11 #include <ceed/ceed.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  eval_mode Current basis evaluation mode
184   @param[in]  identity  Pointer to identity matrix
185   @param[in]  interp    Pointer to interpolation matrix
186   @param[in]  grad      Pointer to gradient matrix
187   @param[out] basis_ptr Basis pointer to set
188 
189   @ref Developer
190 **/
191 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad,
192                                                const CeedScalar **basis_ptr) {
193   switch (eval_mode) {
194     case CEED_EVAL_NONE:
195       *basis_ptr = identity;
196       break;
197     case CEED_EVAL_INTERP:
198       *basis_ptr = interp;
199       break;
200     case CEED_EVAL_GRAD:
201       *basis_ptr = grad;
202       break;
203     case CEED_EVAL_WEIGHT:
204     case CEED_EVAL_DIV:
205     case CEED_EVAL_CURL:
206       break;  // Caught by QF Assembly
207   }
208   assert(*basis_ptr != NULL);
209 }
210 
211 /**
212   @brief Create point block restriction for active operator field
213 
214   @param[in]  rstr            Original CeedElemRestriction for active field
215   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
216 
217   @return An error code: 0 - success, otherwise - failure
218 
219   @ref Developer
220 **/
221 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
222   Ceed ceed;
223   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
224   const CeedInt *offsets;
225   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
226 
227   // Expand offsets
228   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
229   CeedSize l_size;
230   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
231   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
232   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
233   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
234   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
235   CeedInt shift = num_comp;
236   if (comp_stride != 1) shift *= num_comp;
237   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
238   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
239     pointblock_offsets[i] = offsets[i] * shift;
240   }
241 
242   // Create new restriction
243   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
244                                      pointblock_offsets, pointblock_rstr));
245 
246   // Cleanup
247   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
248 
249   return CEED_ERROR_SUCCESS;
250 }
251 
252 /**
253   @brief Core logic for assembling operator diagonal or point block diagonal
254 
255   @param[in]  op            CeedOperator to assemble point block diagonal
256   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
257   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
258   @param[out] assembled     CeedVector to store assembled diagonal
259 
260   @return An error code: 0 - success, otherwise - failure
261 
262   @ref Developer
263 **/
264 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
265   Ceed ceed;
266   CeedCall(CeedOperatorGetCeed(op, &ceed));
267 
268   // Assemble QFunction
269   CeedQFunction       qf;
270   const CeedScalar   *assembled_qf_array;
271   CeedVector          assembled_qf;
272   CeedElemRestriction assembled_elem_rstr;
273   CeedInt             num_input_fields, num_output_fields;
274   CeedInt             layout[3];
275 
276   CeedCall(CeedOperatorGetQFunction(op, &qf));
277   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
278   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
279   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout));
280   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
281   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
282 
283   // Get assembly data
284   CeedOperatorAssemblyData data;
285   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
286   CeedInt                 *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
287   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
288   CeedBasis               *active_bases;
289   CeedElemRestriction     *active_elem_rstrs;
290 
291   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
292   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
293                                                 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components));
294   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL));
295   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs));
296 
297   // Loop over all active bases
298   for (CeedInt b = 0; b < num_active_bases; b++) {
299     // Assemble point block diagonal restriction, if needed
300     CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b];
301 
302     if (is_pointblock) {
303       CeedElemRestriction point_block_elem_rstr;
304 
305       CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr));
306       diag_elem_rstr = point_block_elem_rstr;
307     }
308 
309     // Create diagonal vector
310     CeedVector elem_diag;
311     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
312 
313     // Assemble element operator diagonals
314     CeedScalar *elem_diag_array;
315     CeedInt     num_elem, num_nodes, num_qpts, num_components;
316 
317     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
318     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
319     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
320     CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes));
321     CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components));
322     CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts));
323 
324     // Basis matrices
325     const CeedScalar *interp, *grad;
326     CeedScalar       *identity      = NULL;
327     bool              has_eval_none = false;
328     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
329       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
330     }
331     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
332       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
333     }
334     if (has_eval_none) {
335       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
336       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
337     }
338     CeedCall(CeedBasisGetInterp(active_bases[b], &interp));
339     CeedCall(CeedBasisGetGrad(active_bases[b], &grad));
340     // Compute the diagonal of B^T D B
341     // Each element
342     for (CeedInt e = 0; e < num_elem; e++) {
343       CeedInt d_out = -1;
344       // Each basis eval mode pair
345       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
346         const CeedScalar *B_t = NULL;
347         if (eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out += 1;
348         CeedOperatorGetBasisPointer(eval_modes_out[b][e_out], identity, interp, &grad[d_out * num_qpts * num_nodes], &B_t);
349         CeedInt d_in = -1;
350         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
351           const CeedScalar *B = NULL;
352           if (eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in += 1;
353           CeedOperatorGetBasisPointer(eval_modes_in[b][e_in], identity, interp, &grad[d_in * num_qpts * num_nodes], &B);
354           // Each component
355           for (CeedInt c_out = 0; c_out < num_components; c_out++) {
356             // Each qpt/node pair
357             for (CeedInt q = 0; q < num_qpts; q++) {
358               if (is_pointblock) {
359                 // Point Block Diagonal
360                 for (CeedInt c_in = 0; c_in < num_components; c_in++) {
361                   const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
362                   const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
363                   for (CeedInt n = 0; n < num_nodes; n++) {
364                     elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] +=
365                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
366                   }
367                 }
368               } else {
369                 // Diagonal Only
370                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
371                 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
372                 for (CeedInt n = 0; n < num_nodes; n++) {
373                   elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
374                 }
375               }
376             }
377           }
378         }
379       }
380     }
381     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
382 
383     // Assemble local operator diagonal
384     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
385 
386     // Cleanup
387     if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
388     CeedCall(CeedVectorDestroy(&elem_diag));
389     CeedCall(CeedFree(&identity));
390   }
391   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
392   CeedCall(CeedVectorDestroy(&assembled_qf));
393 
394   return CEED_ERROR_SUCCESS;
395 }
396 
397 /**
398   @brief Core logic for assembling composite operator diagonal
399 
400   @param[in]  op            CeedOperator to assemble point block diagonal
401   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
402   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
403   @param[out] assembled     CeedVector to store assembled diagonal
404 
405   @return An error code: 0 - success, otherwise - failure
406 
407   @ref Developer
408 **/
409 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
410                                                                  CeedVector assembled) {
411   CeedInt       num_sub;
412   CeedOperator *suboperators;
413   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
414   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
415   for (CeedInt i = 0; i < num_sub; i++) {
416     if (is_pointblock) {
417       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
418     } else {
419       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
420     }
421   }
422   return CEED_ERROR_SUCCESS;
423 }
424 
425 /**
426   @brief Build nonzero pattern for non-composite operator
427 
428   Users should generally use CeedOperatorLinearAssembleSymbolic()
429 
430   @param[in]  op     CeedOperator to assemble nonzero pattern
431   @param[in]  offset Offset for number of entries
432   @param[out] rows   Row number for each entry
433   @param[out] cols   Column number for each entry
434 
435   @return An error code: 0 - success, otherwise - failure
436 
437   @ref Developer
438 **/
439 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
440   Ceed ceed;
441   bool is_composite;
442   CeedCall(CeedOperatorGetCeed(op, &ceed));
443   CeedCall(CeedOperatorIsComposite(op, &is_composite));
444 
445   if (is_composite) {
446     // LCOV_EXCL_START
447     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
448     // LCOV_EXCL_STOP
449   }
450 
451   CeedSize num_nodes;
452   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
453   CeedElemRestriction rstr_in;
454   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
455   CeedInt num_elem, elem_size, num_comp;
456   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
457   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
458   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
459   CeedInt layout_er[3];
460   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
461 
462   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
463 
464   // Determine elem_dof relation
465   CeedVector index_vec;
466   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
467   CeedScalar *array;
468   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
469   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
470   CeedCall(CeedVectorRestoreArray(index_vec, &array));
471   CeedVector elem_dof;
472   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
473   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
474   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
475   const CeedScalar *elem_dof_a;
476   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
477   CeedCall(CeedVectorDestroy(&index_vec));
478 
479   // Determine i, j locations for element matrices
480   CeedInt count = 0;
481   for (CeedInt e = 0; e < num_elem; e++) {
482     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
483       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
484         for (CeedInt i = 0; i < elem_size; i++) {
485           for (CeedInt j = 0; j < elem_size; j++) {
486             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
487             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
488 
489             const CeedInt row = elem_dof_a[elem_dof_index_row];
490             const CeedInt col = elem_dof_a[elem_dof_index_col];
491 
492             rows[offset + count] = row;
493             cols[offset + count] = col;
494             count++;
495           }
496         }
497       }
498     }
499   }
500   if (count != local_num_entries) {
501     // LCOV_EXCL_START
502     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
503     // LCOV_EXCL_STOP
504   }
505   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
506   CeedCall(CeedVectorDestroy(&elem_dof));
507 
508   return CEED_ERROR_SUCCESS;
509 }
510 
511 /**
512   @brief Assemble nonzero entries for non-composite operator
513 
514   Users should generally use CeedOperatorLinearAssemble()
515 
516   @param[in]  op     CeedOperator to assemble
517   @param[in]  offset Offset for number of entries
518   @param[out] values Values to assemble into matrix
519 
520   @return An error code: 0 - success, otherwise - failure
521 
522   @ref Developer
523 **/
524 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
525   Ceed ceed;
526   bool is_composite;
527   CeedCall(CeedOperatorGetCeed(op, &ceed));
528   CeedCall(CeedOperatorIsComposite(op, &is_composite));
529 
530   if (is_composite) {
531     // LCOV_EXCL_START
532     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
533     // LCOV_EXCL_STOP
534   }
535 
536   // Early exit for empty operator
537   {
538     CeedInt num_elem = 0;
539 
540     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
541     if (num_elem == 0) return CEED_ERROR_SUCCESS;
542   }
543 
544   if (op->LinearAssembleSingle) {
545     // Backend version
546     CeedCall(op->LinearAssembleSingle(op, offset, values));
547     return CEED_ERROR_SUCCESS;
548   } else {
549     // Operator fallback
550     CeedOperator op_fallback;
551 
552     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
553     if (op_fallback) {
554       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
555       return CEED_ERROR_SUCCESS;
556     }
557   }
558 
559   // Assemble QFunction
560   CeedQFunction qf;
561   CeedCall(CeedOperatorGetQFunction(op, &qf));
562   CeedVector          assembled_qf;
563   CeedElemRestriction rstr_q;
564   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
565   CeedSize qf_length;
566   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
567 
568   CeedInt            num_input_fields, num_output_fields;
569   CeedOperatorField *input_fields;
570   CeedOperatorField *output_fields;
571   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
572 
573   // Get assembly data
574   CeedOperatorAssemblyData data;
575   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
576   const CeedEvalMode **eval_modes_in, **eval_modes_out;
577   CeedInt             *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
578   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out,
579                                                 &eval_modes_out, NULL, NULL));
580   CeedBasis *bases;
581   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL));
582   CeedBasis basis_in = bases[0];
583 
584   if (num_active_bases > 1) {
585     // LCOV_EXCL_START
586     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases");
587     // LCOV_EXCL_STOP
588   }
589   if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) {
590     // LCOV_EXCL_START
591     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
592     // LCOV_EXCL_STOP
593   }
594 
595   CeedElemRestriction active_rstr;
596   CeedInt             num_elem, elem_size, num_qpts, num_comp;
597   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
598   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
599   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
600   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
601   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
602 
603   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
604 
605   // loop over elements and put in data structure
606   const CeedScalar *interp_in, *grad_in;
607   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
608   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
609 
610   const CeedScalar *assembled_qf_array;
611   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
612 
613   CeedInt layout_qf[3];
614   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
615   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
616 
617   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
618   const CeedScalar **B_mats_in, **B_mats_out;
619   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out));
620   const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0];
621   CeedScalar        BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]];
622   CeedScalar        elem_mat[elem_size * elem_size];
623   CeedInt           count = 0;
624   CeedScalar       *vals;
625   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
626   for (CeedInt e = 0; e < num_elem; e++) {
627     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
628       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
629         // Compute B^T*D
630         for (CeedInt n = 0; n < elem_size; n++) {
631           for (CeedInt q = 0; q < num_qpts; q++) {
632             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
633               const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in);
634               CeedScalar    sum       = 0.0;
635               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
636                 const CeedInt b_out_index     = (num_eval_modes_out[0] * q + e_out) * elem_size + n;
637                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out;
638                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
639                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
640               }
641               BTD_mat[btd_index] = sum;
642             }
643           }
644         }
645         // form element matrix itself (for each block component)
646         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0]));
647 
648         // put element matrix in coordinate data structure
649         for (CeedInt i = 0; i < elem_size; i++) {
650           for (CeedInt j = 0; j < elem_size; j++) {
651             vals[offset + count] = elem_mat[i * elem_size + j];
652             count++;
653           }
654         }
655       }
656     }
657   }
658   if (count != local_num_entries) {
659     // LCOV_EXCL_START
660     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
661     // LCOV_EXCL_STOP
662   }
663   CeedCall(CeedVectorRestoreArray(values, &vals));
664 
665   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
666   CeedCall(CeedVectorDestroy(&assembled_qf));
667 
668   return CEED_ERROR_SUCCESS;
669 }
670 
671 /**
672   @brief Count number of entries for assembled CeedOperator
673 
674   @param[in]  op          CeedOperator to assemble
675   @param[out] num_entries Number of entries in assembled representation
676 
677   @return An error code: 0 - success, otherwise - failure
678 
679   @ref Utility
680 **/
681 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
682   bool                is_composite;
683   CeedElemRestriction rstr;
684   CeedInt             num_elem, elem_size, num_comp;
685 
686   CeedCall(CeedOperatorIsComposite(op, &is_composite));
687   if (is_composite) {
688     // LCOV_EXCL_START
689     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
690     // LCOV_EXCL_STOP
691   }
692   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
693   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
694   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
695   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
696   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
697 
698   return CEED_ERROR_SUCCESS;
699 }
700 
701 /**
702   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
703 
704   @param[in]  op_fine      Fine grid operator
705   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
706   @param[in]  rstr_coarse  Coarse grid restriction
707   @param[in]  basis_coarse Coarse grid active vector basis
708   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
709   @param[out] op_coarse    Coarse grid operator
710   @param[out] op_prolong   Coarse to fine operator, or NULL
711   @param[out] op_restrict  Fine to coarse operator, or NULL
712 
713   @return An error code: 0 - success, otherwise - failure
714 
715   @ref Developer
716 **/
717 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
718                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
719   Ceed       ceed;
720   CeedVector mult_vec = NULL;
721   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
722 
723   // Check for composite operator
724   bool is_composite;
725   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
726   if (is_composite) {
727     // LCOV_EXCL_START
728     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
729     // LCOV_EXCL_STOP
730   }
731 
732   // Coarse Grid
733   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
734   CeedElemRestriction rstr_fine = NULL;
735   // -- Clone input fields
736   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
737     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
738       rstr_fine = op_fine->input_fields[i]->elem_rstr;
739       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
740     } else {
741       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
742                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
743     }
744   }
745   // -- Clone output fields
746   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
747     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
748       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
749     } else {
750       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
751                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
752     }
753   }
754   // -- Clone QFunctionAssemblyData
755   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
756 
757   // Multiplicity vector
758   if (op_restrict || op_prolong) {
759     CeedVector mult_e_vec;
760 
761     if (!p_mult_fine) {
762       // LCOV_EXCL_START
763       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
764       // LCOV_EXCL_STOP
765     }
766     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
767     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
768     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
769     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
770     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
771     CeedCall(CeedVectorDestroy(&mult_e_vec));
772     CeedCall(CeedVectorReciprocal(mult_vec));
773   }
774 
775   // Clone name
776   bool   has_name = op_fine->name;
777   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
778   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
779 
780   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
781   if ((op_restrict || op_prolong) && !basis_c_to_f) {
782     // LCOV_EXCL_START
783     return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis");
784     // LCOV_EXCL_STOP
785   }
786 
787   // Restriction/Prolongation Operators
788   CeedInt num_comp;
789   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
790 
791   // Restriction
792   if (op_restrict) {
793     CeedInt             *num_comp_r_data;
794     CeedQFunction        qf_restrict;
795     CeedQFunctionContext ctx_r;
796 
797     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
798     CeedCall(CeedCalloc(1, &num_comp_r_data));
799     num_comp_r_data[0] = num_comp;
800     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
801     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
802     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
803     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
804     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
805     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
806     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
807     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
808 
809     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
810     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
811     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
812     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
813 
814     // Set name
815     char *restriction_name;
816     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
817     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
818     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
819     CeedCall(CeedFree(&restriction_name));
820 
821     // Check
822     CeedCall(CeedOperatorCheckReady(*op_restrict));
823 
824     // Cleanup
825     CeedCall(CeedQFunctionDestroy(&qf_restrict));
826   }
827 
828   // Prolongation
829   if (op_prolong) {
830     CeedInt             *num_comp_p_data;
831     CeedQFunction        qf_prolong;
832     CeedQFunctionContext ctx_p;
833 
834     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
835     CeedCall(CeedCalloc(1, &num_comp_p_data));
836     num_comp_p_data[0] = num_comp;
837     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
838     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
839     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
840     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
841     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
842     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
843     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
844     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
845 
846     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
847     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
848     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
849     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
850 
851     // Set name
852     char *prolongation_name;
853     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
854     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
855     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
856     CeedCall(CeedFree(&prolongation_name));
857 
858     // Check
859     CeedCall(CeedOperatorCheckReady(*op_prolong));
860 
861     // Cleanup
862     CeedCall(CeedQFunctionDestroy(&qf_prolong));
863   }
864 
865   // Check
866   CeedCall(CeedOperatorCheckReady(*op_coarse));
867 
868   // Cleanup
869   CeedCall(CeedVectorDestroy(&mult_vec));
870   CeedCall(CeedBasisDestroy(&basis_c_to_f));
871 
872   return CEED_ERROR_SUCCESS;
873 }
874 
875 /**
876   @brief Build 1D mass matrix and Laplacian with perturbation
877 
878   @param[in]  interp_1d   Interpolation matrix in one dimension
879   @param[in]  grad_1d     Gradient matrix in one dimension
880   @param[in]  q_weight_1d Quadrature weights in one dimension
881   @param[in]  P_1d        Number of basis nodes in one dimension
882   @param[in]  Q_1d        Number of quadrature points in one dimension
883   @param[in]  dim         Dimension of basis
884   @param[out] mass        Assembled mass matrix in one dimension
885   @param[out] laplace     Assembled perturbed Laplacian in one dimension
886 
887   @return An error code: 0 - success, otherwise - failure
888 
889   @ref Developer
890 **/
891 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d,
892                                                       CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
893   for (CeedInt i = 0; i < P_1d; i++) {
894     for (CeedInt j = 0; j < P_1d; j++) {
895       CeedScalar sum = 0.0;
896       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
897       mass[i + j * P_1d] = sum;
898     }
899   }
900   // -- Laplacian
901   for (CeedInt i = 0; i < P_1d; i++) {
902     for (CeedInt j = 0; j < P_1d; j++) {
903       CeedScalar sum = 0.0;
904       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
905       laplace[i + j * P_1d] = sum;
906     }
907   }
908   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
909   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
910   return CEED_ERROR_SUCCESS;
911 }
912 CeedPragmaOptimizeOn;
913 
914 /// @}
915 
916 /// ----------------------------------------------------------------------------
917 /// CeedOperator Backend API
918 /// ----------------------------------------------------------------------------
919 /// @addtogroup CeedOperatorBackend
920 /// @{
921 
922 /**
923   @brief Create object holding CeedQFunction assembly data for CeedOperator
924 
925   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
926   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
927 
928   @return An error code: 0 - success, otherwise - failure
929 
930   @ref Backend
931 **/
932 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
933   CeedCall(CeedCalloc(1, data));
934   (*data)->ref_count = 1;
935   (*data)->ceed      = ceed;
936   CeedCall(CeedReference(ceed));
937 
938   return CEED_ERROR_SUCCESS;
939 }
940 
941 /**
942   @brief Increment the reference counter for a CeedQFunctionAssemblyData
943 
944   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
945 
946   @return An error code: 0 - success, otherwise - failure
947 
948   @ref Backend
949 **/
950 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
951   data->ref_count++;
952   return CEED_ERROR_SUCCESS;
953 }
954 
955 /**
956   @brief Set re-use of CeedQFunctionAssemblyData
957 
958   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
959   @param[in]     reuse_data Boolean flag indicating data re-use
960 
961   @return An error code: 0 - success, otherwise - failure
962 
963   @ref Backend
964 **/
965 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
966   data->reuse_data        = reuse_data;
967   data->needs_data_update = true;
968   return CEED_ERROR_SUCCESS;
969 }
970 
971 /**
972   @brief Mark QFunctionAssemblyData as stale
973 
974   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
975   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
976 
977   @return An error code: 0 - success, otherwise - failure
978 
979   @ref Backend
980 **/
981 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
982   data->needs_data_update = needs_data_update;
983   return CEED_ERROR_SUCCESS;
984 }
985 
986 /**
987   @brief Determine if QFunctionAssemblyData needs update
988 
989   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
990   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
991 
992   @return An error code: 0 - success, otherwise - failure
993 
994   @ref Backend
995 **/
996 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
997   *is_update_needed = !data->reuse_data || data->needs_data_update;
998   return CEED_ERROR_SUCCESS;
999 }
1000 
1001 /**
1002   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1003            Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1004 
1005            Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
1006              CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
1007              CeedQFunctionAssemblyData.
1008 
1009   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1010   @param[in,out] data_copy Variable to store copied reference
1011 
1012   @return An error code: 0 - success, otherwise - failure
1013 
1014   @ref Backend
1015 **/
1016 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1017   CeedCall(CeedQFunctionAssemblyDataReference(data));
1018   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1019   *data_copy = data;
1020   return CEED_ERROR_SUCCESS;
1021 }
1022 
1023 /**
1024   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1025 
1026   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1027   @param[out] is_setup Boolean flag for setup status
1028 
1029   @return An error code: 0 - success, otherwise - failure
1030 
1031   @ref Backend
1032 **/
1033 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1034   *is_setup = data->is_setup;
1035   return CEED_ERROR_SUCCESS;
1036 }
1037 
1038 /**
1039   @brief Set internal objects for CeedQFunctionAssemblyData
1040 
1041   @param[in,out] data CeedQFunctionAssemblyData to set objects
1042   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1043   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1044 
1045   @return An error code: 0 - success, otherwise - failure
1046 
1047   @ref Backend
1048 **/
1049 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1050   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1051   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1052 
1053   data->is_setup = true;
1054   return CEED_ERROR_SUCCESS;
1055 }
1056 
1057 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1058   if (!data->is_setup) {
1059     // LCOV_EXCL_START
1060     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1061     // LCOV_EXCL_STOP
1062   }
1063 
1064   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1065   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1066 
1067   return CEED_ERROR_SUCCESS;
1068 }
1069 
1070 /**
1071   @brief Destroy CeedQFunctionAssemblyData
1072 
1073   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1074 
1075   @return An error code: 0 - success, otherwise - failure
1076 
1077   @ref Backend
1078 **/
1079 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1080   if (!*data || --(*data)->ref_count > 0) {
1081     *data = NULL;
1082     return CEED_ERROR_SUCCESS;
1083   }
1084   CeedCall(CeedDestroy(&(*data)->ceed));
1085   CeedCall(CeedVectorDestroy(&(*data)->vec));
1086   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1087 
1088   CeedCall(CeedFree(data));
1089   return CEED_ERROR_SUCCESS;
1090 }
1091 
1092 /**
1093   @brief Get CeedOperatorAssemblyData
1094 
1095   @param[in]  op   CeedOperator to assemble
1096   @param[out] data CeedQFunctionAssemblyData
1097 
1098   @return An error code: 0 - success, otherwise - failure
1099 
1100   @ref Backend
1101 **/
1102 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1103   if (!op->op_assembled) {
1104     CeedOperatorAssemblyData data;
1105 
1106     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1107     op->op_assembled = data;
1108   }
1109   *data = op->op_assembled;
1110 
1111   return CEED_ERROR_SUCCESS;
1112 }
1113 
1114 /**
1115   @brief Create object holding CeedOperator assembly data.
1116 
1117     The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1118     An array with references to the corresponding active CeedElemRestrictions is also stored.
1119     For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1120     The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1121       CeedEvalMode.
1122     The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1123     Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1124 
1125   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1126   @param[in]  op   CeedOperator to be assembled
1127   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1128 
1129   @return An error code: 0 - success, otherwise - failure
1130 
1131   @ref Backend
1132 **/
1133 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1134   CeedInt num_active_bases = 0;
1135 
1136   // Allocate
1137   CeedCall(CeedCalloc(1, data));
1138   (*data)->ceed = ceed;
1139   CeedCall(CeedReference(ceed));
1140 
1141   // Build OperatorAssembly data
1142   CeedQFunction       qf;
1143   CeedQFunctionField *qf_fields;
1144   CeedOperatorField  *op_fields;
1145   CeedInt             num_input_fields;
1146   CeedCall(CeedOperatorGetQFunction(op, &qf));
1147   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1148   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1149 
1150   // Determine active input basis
1151   CeedInt       *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0;
1152   CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL;
1153   CeedSize     **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1154 
1155   for (CeedInt i = 0; i < num_input_fields; i++) {
1156     CeedVector vec;
1157 
1158     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1159     if (vec == CEED_VECTOR_ACTIVE) {
1160       CeedInt      index = -1, dim = 1, num_components;
1161       CeedBasis    basis_in = NULL;
1162       CeedEvalMode eval_mode;
1163 
1164       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1165       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1166       CeedCall(CeedBasisGetNumComponents(basis_in, &num_components));
1167       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1168       for (CeedInt i = 0; i < num_active_bases; i++) {
1169         if ((*data)->active_bases[i] == basis_in) index = i;
1170       }
1171       if (index == -1) {
1172         CeedElemRestriction elem_rstr_in;
1173 
1174         index = num_active_bases;
1175         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1176         (*data)->active_bases[num_active_bases] = NULL;
1177         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases]));
1178         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1179         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1180         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1181         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases]));
1182         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1183         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1184         num_eval_modes_in[index]  = 0;
1185         num_eval_modes_out[index] = 0;
1186         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1187         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1188         eval_modes_in[index]  = NULL;
1189         eval_modes_out[index] = NULL;
1190         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1191         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1192         eval_mode_offsets_in[index]  = NULL;
1193         eval_mode_offsets_out[index] = NULL;
1194         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1195         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1196         (*data)->assembled_bases_in[index]  = NULL;
1197         (*data)->assembled_bases_out[index] = NULL;
1198         num_active_bases++;
1199       }
1200       switch (eval_mode) {
1201         case CEED_EVAL_NONE:
1202         case CEED_EVAL_INTERP:
1203           CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_modes_in[index]));
1204           CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_mode_offsets_in[index]));
1205           eval_modes_in[index][num_eval_modes_in[index]]        = eval_mode;
1206           eval_mode_offsets_in[index][num_eval_modes_in[index]] = offset;
1207           offset += num_components;
1208           num_eval_modes_in[index] += 1;
1209           break;
1210         case CEED_EVAL_GRAD:
1211           CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_modes_in[index]));
1212           CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_mode_offsets_in[index]));
1213           for (CeedInt d = 0; d < dim; d++) {
1214             eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1215             eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1216             offset += num_components;
1217           }
1218           num_eval_modes_in[index] += dim;
1219           break;
1220         case CEED_EVAL_WEIGHT:
1221         case CEED_EVAL_DIV:
1222         case CEED_EVAL_CURL:
1223           break;  // Caught by QF Assembly
1224       }
1225     }
1226   }
1227   (*data)->num_eval_modes_in    = num_eval_modes_in;
1228   (*data)->eval_modes_in        = eval_modes_in;
1229   (*data)->eval_mode_offsets_in = eval_mode_offsets_in;
1230 
1231   // Determine active output basis
1232   CeedInt num_output_fields;
1233 
1234   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1235   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1236   offset = 0;
1237   for (CeedInt i = 0; i < num_output_fields; i++) {
1238     CeedVector vec;
1239 
1240     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1241     if (vec == CEED_VECTOR_ACTIVE) {
1242       CeedInt      index = -1, dim = 1, num_components;
1243       CeedBasis    basis_out = NULL;
1244       CeedEvalMode eval_mode;
1245 
1246       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1247       CeedCall(CeedBasisGetDimension(basis_out, &dim));
1248       CeedCall(CeedBasisGetNumComponents(basis_out, &num_components));
1249       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1250       for (CeedInt i = 0; i < num_active_bases; i++) {
1251         if ((*data)->active_bases[i] == basis_out) index = i;
1252       }
1253       if (index == -1) {
1254         CeedElemRestriction elem_rstr_out;
1255 
1256         index = num_active_bases;
1257         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1258         (*data)->active_bases[num_active_bases] = NULL;
1259         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases]));
1260         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1261         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1262         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1263         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases]));
1264         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1265         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1266         num_eval_modes_in[index]  = 0;
1267         num_eval_modes_out[index] = 0;
1268         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1269         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1270         eval_modes_in[index]  = NULL;
1271         eval_modes_out[index] = NULL;
1272         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1273         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1274         eval_mode_offsets_in[index]  = NULL;
1275         eval_mode_offsets_out[index] = NULL;
1276         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1277         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1278         (*data)->assembled_bases_in[index]  = NULL;
1279         (*data)->assembled_bases_out[index] = NULL;
1280         num_active_bases++;
1281       }
1282       switch (eval_mode) {
1283         case CEED_EVAL_NONE:
1284         case CEED_EVAL_INTERP:
1285           CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_modes_out[index]));
1286           CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_mode_offsets_out[index]));
1287           eval_modes_out[index][num_eval_modes_out[index]]        = eval_mode;
1288           eval_mode_offsets_out[index][num_eval_modes_out[index]] = offset;
1289           offset += num_components;
1290           num_eval_modes_out[index] += 1;
1291           break;
1292         case CEED_EVAL_GRAD:
1293           CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_modes_out[index]));
1294           CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_mode_offsets_out[index]));
1295           for (CeedInt d = 0; d < dim; d++) {
1296             eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1297             eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1298             offset += num_components;
1299           }
1300           num_eval_modes_out[index] += dim;
1301           break;
1302         case CEED_EVAL_WEIGHT:
1303         case CEED_EVAL_DIV:
1304         case CEED_EVAL_CURL:
1305           break;  // Caught by QF Assembly
1306       }
1307     }
1308   }
1309   (*data)->num_output_components = offset;
1310   (*data)->num_eval_modes_out    = num_eval_modes_out;
1311   (*data)->eval_modes_out        = eval_modes_out;
1312   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1313   (*data)->num_active_bases      = num_active_bases;
1314 
1315   return CEED_ERROR_SUCCESS;
1316 }
1317 
1318 /**
1319   @brief Get CeedOperator CeedEvalModes for assembly.
1320 
1321     Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1322 
1323   @param[in]  data                  CeedOperatorAssemblyData
1324   @param[out] num_active_bases      Total number of active bases
1325   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1326                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1327   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1328   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1329   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1330   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1331   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1332   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1333                                       including contributions of all active bases
1334 
1335   @return An error code: 0 - success, otherwise - failure
1336 
1337 
1338   @ref Backend
1339 **/
1340 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in,
1341                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out,
1342                                          const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) {
1343   if (num_active_bases) *num_active_bases = data->num_active_bases;
1344   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1345   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1346   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1347   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1348   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1349   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1350   if (num_output_components) *num_output_components = data->num_output_components;
1351 
1352   return CEED_ERROR_SUCCESS;
1353 }
1354 
1355 /**
1356   @brief Get CeedOperator CeedBasis data for assembly.
1357 
1358     Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1359 
1360   @param[in]  data                CeedOperatorAssemblyData
1361   @param[out] num_active_bases    Number of active bases, or NULL
1362   @param[out] active_bases        Pointer to hold active CeedBasis, or NULL
1363   @param[out] assembled_bases_in  Pointer to hold assembled active input B, or NULL
1364   @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL
1365 
1366   @return An error code: 0 - success, otherwise - failure
1367 
1368   @ref Backend
1369 **/
1370 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases,
1371                                      const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) {
1372   // Assemble B_in, B_out if needed
1373   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1374     CeedInt num_qpts;
1375 
1376     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1377     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1378       CeedInt           elem_size;
1379       CeedScalar       *B_in = NULL, *identity = NULL;
1380       const CeedScalar *interp_in, *grad_in;
1381       bool              has_eval_none = false;
1382 
1383       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size));
1384       CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_in[b], &B_in));
1385 
1386       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1387         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1388       }
1389       if (has_eval_none) {
1390         CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1391         for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1392           identity[i * elem_size + i] = 1.0;
1393         }
1394       }
1395       CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_in));
1396       CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_in));
1397 
1398       for (CeedInt q = 0; q < num_qpts; q++) {
1399         for (CeedInt n = 0; n < elem_size; n++) {
1400           CeedInt d_in = -1;
1401           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1402             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1403             const CeedScalar *B  = NULL;
1404 
1405             if (data->eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in++;
1406             CeedOperatorGetBasisPointer(data->eval_modes_in[b][e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &B);
1407             B_in[(qq + e_in) * elem_size + n] = B[q * elem_size + n];
1408           }
1409         }
1410       }
1411       if (identity) CeedCall(CeedFree(identity));
1412       data->assembled_bases_in[b] = B_in;
1413     }
1414   }
1415 
1416   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1417     CeedInt num_qpts;
1418 
1419     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1420     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1421       CeedInt           elem_size;
1422       const CeedScalar *interp_out, *grad_out;
1423       bool              has_eval_none = false;
1424       CeedScalar       *B_out = NULL, *identity = NULL;
1425 
1426       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size));
1427       CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_out[b], &B_out));
1428 
1429       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1430         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1431       }
1432       if (has_eval_none) {
1433         CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1434         for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1435           identity[i * elem_size + i] = 1.0;
1436         }
1437       }
1438       CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_out));
1439       CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_out));
1440 
1441       for (CeedInt q = 0; q < num_qpts; q++) {
1442         for (CeedInt n = 0; n < elem_size; n++) {
1443           CeedInt d_out = -1;
1444           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1445             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1446             const CeedScalar *B  = NULL;
1447 
1448             if (data->eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out++;
1449             CeedOperatorGetBasisPointer(data->eval_modes_out[b][e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &B);
1450             B_out[(qq + e_out) * elem_size + n] = B[q * elem_size + n];
1451           }
1452         }
1453       }
1454       if (identity) CeedCall(CeedFree(identity));
1455       data->assembled_bases_out[b] = B_out;
1456     }
1457   }
1458 
1459   // Pass out assembled data
1460   if (active_bases) *active_bases = data->active_bases;
1461   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1462   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1463 
1464   return CEED_ERROR_SUCCESS;
1465 }
1466 
1467 /**
1468   @brief Get CeedOperator CeedBasis data for assembly.
1469 
1470   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1471 
1472   @param[in]  data                  CeedOperatorAssemblyData
1473   @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL
1474   @param[out] active_elem_rstrs     Pointer to hold active CeedElemRestrictions, or NULL
1475 
1476   @return An error code: 0 - success, otherwise - failure
1477 
1478   @ref Backend
1479 **/
1480 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs,
1481                                                 CeedElemRestriction **active_elem_rstrs) {
1482   if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases;
1483   if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs;
1484 
1485   return CEED_ERROR_SUCCESS;
1486 }
1487 
1488 /**
1489   @brief Destroy CeedOperatorAssemblyData
1490 
1491   @param[in,out] data CeedOperatorAssemblyData to destroy
1492 
1493   @return An error code: 0 - success, otherwise - failure
1494 
1495   @ref Backend
1496 **/
1497 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1498   if (!*data) {
1499     *data = NULL;
1500     return CEED_ERROR_SUCCESS;
1501   }
1502   CeedCall(CeedDestroy(&(*data)->ceed));
1503   for (CeedInt b = 0; b < (*data)->num_active_bases; b++) {
1504     CeedCall(CeedBasisDestroy(&(*data)->active_bases[b]));
1505     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b]));
1506     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1507     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1508     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1509     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1510     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1511     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1512   }
1513   CeedCall(CeedFree(&(*data)->active_bases));
1514   CeedCall(CeedFree(&(*data)->active_elem_rstrs));
1515   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1516   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1517   CeedCall(CeedFree(&(*data)->eval_modes_in));
1518   CeedCall(CeedFree(&(*data)->eval_modes_out));
1519   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1520   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1521   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1522   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1523 
1524   CeedCall(CeedFree(data));
1525   return CEED_ERROR_SUCCESS;
1526 }
1527 
1528 /// @}
1529 
1530 /// ----------------------------------------------------------------------------
1531 /// CeedOperator Public API
1532 /// ----------------------------------------------------------------------------
1533 /// @addtogroup CeedOperatorUser
1534 /// @{
1535 
1536 /**
1537   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1538 
1539   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1540     The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1541 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1542 
1543   Inputs and outputs are in the order provided by the
1544 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1545 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1546 and producing the output [dv_0, dv_1, v].
1547 
1548   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1549 
1550   @param[in]  op        CeedOperator to assemble CeedQFunction
1551   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1552   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1553   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1554 
1555   @return An error code: 0 - success, otherwise - failure
1556 
1557   @ref User
1558 **/
1559 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1560   CeedCall(CeedOperatorCheckReady(op));
1561 
1562   if (op->LinearAssembleQFunction) {
1563     // Backend version
1564     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1565   } else {
1566     // Operator fallback
1567     CeedOperator op_fallback;
1568 
1569     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1570     if (op_fallback) {
1571       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1572     } else {
1573       // LCOV_EXCL_START
1574       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1575       // LCOV_EXCL_STOP
1576     }
1577   }
1578   return CEED_ERROR_SUCCESS;
1579 }
1580 
1581 /**
1582   @brief Assemble CeedQFunction and store result internally.
1583            Return copied references of stored data to the caller.
1584            Caller is responsible for ownership and destruction of the copied references.
1585            See also @ref CeedOperatorLinearAssembleQFunction
1586 
1587   @param[in]  op        CeedOperator to assemble CeedQFunction
1588   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1589   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1590   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1591 
1592   @return An error code: 0 - success, otherwise - failure
1593 
1594   @ref User
1595 **/
1596 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1597   CeedCall(CeedOperatorCheckReady(op));
1598 
1599   if (op->LinearAssembleQFunctionUpdate) {
1600     // Backend version
1601     bool                qf_assembled_is_setup;
1602     CeedVector          assembled_vec  = NULL;
1603     CeedElemRestriction assembled_rstr = NULL;
1604 
1605     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1606     if (qf_assembled_is_setup) {
1607       bool update_needed;
1608 
1609       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1610       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1611       if (update_needed) {
1612         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1613       }
1614     } else {
1615       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1616       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1617     }
1618     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1619 
1620     // Copy reference from internally held copy
1621     *assembled = NULL;
1622     *rstr      = NULL;
1623     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1624     CeedCall(CeedVectorDestroy(&assembled_vec));
1625     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1626     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1627   } else {
1628     // Operator fallback
1629     CeedOperator op_fallback;
1630 
1631     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1632     if (op_fallback) {
1633       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1634     } else {
1635       // LCOV_EXCL_START
1636       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1637       // LCOV_EXCL_STOP
1638     }
1639   }
1640 
1641   return CEED_ERROR_SUCCESS;
1642 }
1643 
1644 /**
1645   @brief Assemble the diagonal of a square linear CeedOperator
1646 
1647   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1648 
1649   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1650 
1651   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1652 
1653   @param[in]  op        CeedOperator to assemble CeedQFunction
1654   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1655   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1656 
1657   @return An error code: 0 - success, otherwise - failure
1658 
1659   @ref User
1660 **/
1661 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1662   bool is_composite;
1663   CeedCall(CeedOperatorCheckReady(op));
1664   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1665 
1666   CeedSize input_size = 0, output_size = 0;
1667   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1668   if (input_size != output_size) {
1669     // LCOV_EXCL_START
1670     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1671     // LCOV_EXCL_STOP
1672   }
1673 
1674   // Early exit for empty operator
1675   if (!is_composite) {
1676     CeedInt num_elem = 0;
1677 
1678     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1679     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1680   }
1681 
1682   if (op->LinearAssembleDiagonal) {
1683     // Backend version
1684     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1685     return CEED_ERROR_SUCCESS;
1686   } else if (op->LinearAssembleAddDiagonal) {
1687     // Backend version with zeroing first
1688     CeedCall(CeedVectorSetValue(assembled, 0.0));
1689     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1690     return CEED_ERROR_SUCCESS;
1691   } else {
1692     // Operator fallback
1693     CeedOperator op_fallback;
1694 
1695     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1696     if (op_fallback) {
1697       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1698       return CEED_ERROR_SUCCESS;
1699     }
1700   }
1701   // Default interface implementation
1702   CeedCall(CeedVectorSetValue(assembled, 0.0));
1703   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1704 
1705   return CEED_ERROR_SUCCESS;
1706 }
1707 
1708 /**
1709   @brief Assemble the diagonal of a square linear CeedOperator
1710 
1711   This sums into a CeedVector the diagonal of a linear CeedOperator.
1712 
1713   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1714 
1715   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1716 
1717   @param[in]  op        CeedOperator to assemble CeedQFunction
1718   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1719   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1720 
1721   @return An error code: 0 - success, otherwise - failure
1722 
1723   @ref User
1724 **/
1725 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1726   bool is_composite;
1727   CeedCall(CeedOperatorCheckReady(op));
1728   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1729 
1730   CeedSize input_size = 0, output_size = 0;
1731   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1732   if (input_size != output_size) {
1733     // LCOV_EXCL_START
1734     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1735     // LCOV_EXCL_STOP
1736   }
1737 
1738   // Early exit for empty operator
1739   if (!is_composite) {
1740     CeedInt num_elem = 0;
1741 
1742     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1743     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1744   }
1745 
1746   if (op->LinearAssembleAddDiagonal) {
1747     // Backend version
1748     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1749     return CEED_ERROR_SUCCESS;
1750   } else {
1751     // Operator fallback
1752     CeedOperator op_fallback;
1753 
1754     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1755     if (op_fallback) {
1756       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1757       return CEED_ERROR_SUCCESS;
1758     }
1759   }
1760   // Default interface implementation
1761   if (is_composite) {
1762     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1763   } else {
1764     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1765   }
1766 
1767   return CEED_ERROR_SUCCESS;
1768 }
1769 
1770 /**
1771   @brief Assemble the point block diagonal of a square linear CeedOperator
1772 
1773   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1774 
1775   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1776 
1777   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1778 
1779   @param[in]  op        CeedOperator to assemble CeedQFunction
1780   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1781 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1782 component in].
1783   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1784 
1785   @return An error code: 0 - success, otherwise - failure
1786 
1787   @ref User
1788 **/
1789 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1790   bool is_composite;
1791   CeedCall(CeedOperatorCheckReady(op));
1792   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1793 
1794   CeedSize input_size = 0, output_size = 0;
1795   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1796   if (input_size != output_size) {
1797     // LCOV_EXCL_START
1798     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1799     // LCOV_EXCL_STOP
1800   }
1801 
1802   // Early exit for empty operator
1803   if (!is_composite) {
1804     CeedInt num_elem = 0;
1805 
1806     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1807     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1808   }
1809 
1810   if (op->LinearAssemblePointBlockDiagonal) {
1811     // Backend version
1812     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1813     return CEED_ERROR_SUCCESS;
1814   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1815     // Backend version with zeroing first
1816     CeedCall(CeedVectorSetValue(assembled, 0.0));
1817     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1818     return CEED_ERROR_SUCCESS;
1819   } else {
1820     // Operator fallback
1821     CeedOperator op_fallback;
1822 
1823     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1824     if (op_fallback) {
1825       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1826       return CEED_ERROR_SUCCESS;
1827     }
1828   }
1829   // Default interface implementation
1830   CeedCall(CeedVectorSetValue(assembled, 0.0));
1831   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1832 
1833   return CEED_ERROR_SUCCESS;
1834 }
1835 
1836 /**
1837   @brief Assemble the point block diagonal of a square linear CeedOperator
1838 
1839   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1840 
1841   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1842 
1843   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1844 
1845   @param[in]  op        CeedOperator to assemble CeedQFunction
1846   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1847 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1848 component in].
1849   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1850 
1851   @return An error code: 0 - success, otherwise - failure
1852 
1853   @ref User
1854 **/
1855 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1856   bool is_composite;
1857   CeedCall(CeedOperatorCheckReady(op));
1858   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1859 
1860   CeedSize input_size = 0, output_size = 0;
1861   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1862   if (input_size != output_size) {
1863     // LCOV_EXCL_START
1864     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1865     // LCOV_EXCL_STOP
1866   }
1867 
1868   // Early exit for empty operator
1869   if (!is_composite) {
1870     CeedInt num_elem = 0;
1871 
1872     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1873     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1874   }
1875 
1876   if (op->LinearAssembleAddPointBlockDiagonal) {
1877     // Backend version
1878     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1879     return CEED_ERROR_SUCCESS;
1880   } else {
1881     // Operator fallback
1882     CeedOperator op_fallback;
1883 
1884     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1885     if (op_fallback) {
1886       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1887       return CEED_ERROR_SUCCESS;
1888     }
1889   }
1890   // Default interface implementation
1891   if (is_composite) {
1892     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1893   } else {
1894     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1895   }
1896 
1897   return CEED_ERROR_SUCCESS;
1898 }
1899 
1900 /**
1901    @brief Fully assemble the nonzero pattern of a linear operator.
1902 
1903    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1904 
1905    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1906 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1907 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1908 
1909    This will generally be slow unless your operator is low-order.
1910 
1911    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1912 
1913    @param[in]  op          CeedOperator to assemble
1914    @param[out] num_entries Number of entries in coordinate nonzero pattern
1915    @param[out] rows        Row number for each entry
1916    @param[out] cols        Column number for each entry
1917 
1918    @ref User
1919 **/
1920 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1921   CeedInt       num_suboperators, single_entries;
1922   CeedOperator *sub_operators;
1923   bool          is_composite;
1924   CeedCall(CeedOperatorCheckReady(op));
1925   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1926 
1927   if (op->LinearAssembleSymbolic) {
1928     // Backend version
1929     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1930     return CEED_ERROR_SUCCESS;
1931   } else {
1932     // Operator fallback
1933     CeedOperator op_fallback;
1934 
1935     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1936     if (op_fallback) {
1937       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1938       return CEED_ERROR_SUCCESS;
1939     }
1940   }
1941 
1942   // Default interface implementation
1943 
1944   // count entries and allocate rows, cols arrays
1945   *num_entries = 0;
1946   if (is_composite) {
1947     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1948     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1949     for (CeedInt k = 0; k < num_suboperators; ++k) {
1950       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1951       *num_entries += single_entries;
1952     }
1953   } else {
1954     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1955     *num_entries += single_entries;
1956   }
1957   CeedCall(CeedCalloc(*num_entries, rows));
1958   CeedCall(CeedCalloc(*num_entries, cols));
1959 
1960   // assemble nonzero locations
1961   CeedInt offset = 0;
1962   if (is_composite) {
1963     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1964     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1965     for (CeedInt k = 0; k < num_suboperators; ++k) {
1966       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1967       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1968       offset += single_entries;
1969     }
1970   } else {
1971     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1972   }
1973 
1974   return CEED_ERROR_SUCCESS;
1975 }
1976 
1977 /**
1978    @brief Fully assemble the nonzero entries of a linear operator.
1979 
1980    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1981 
1982    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1983 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1984 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1985 
1986    This will generally be slow unless your operator is low-order.
1987 
1988    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1989 
1990    @param[in]  op     CeedOperator to assemble
1991    @param[out] values Values to assemble into matrix
1992 
1993    @ref User
1994 **/
1995 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1996   CeedInt       num_suboperators, single_entries = 0;
1997   CeedOperator *sub_operators;
1998   bool          is_composite;
1999   CeedCall(CeedOperatorCheckReady(op));
2000   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2001 
2002   // Early exit for empty operator
2003   if (!is_composite) {
2004     CeedInt num_elem = 0;
2005 
2006     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2007     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2008   }
2009 
2010   if (op->LinearAssemble) {
2011     // Backend version
2012     CeedCall(op->LinearAssemble(op, values));
2013     return CEED_ERROR_SUCCESS;
2014   } else {
2015     // Operator fallback
2016     CeedOperator op_fallback;
2017 
2018     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2019     if (op_fallback) {
2020       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2021       return CEED_ERROR_SUCCESS;
2022     }
2023   }
2024 
2025   // Default interface implementation
2026   CeedInt offset = 0;
2027   CeedCall(CeedVectorSetValue(values, 0.0));
2028   if (is_composite) {
2029     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2030     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2031     for (CeedInt k = 0; k < num_suboperators; k++) {
2032       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2033       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2034       offset += single_entries;
2035     }
2036   } else {
2037     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2038   }
2039 
2040   return CEED_ERROR_SUCCESS;
2041 }
2042 
2043 /**
2044   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2045 
2046   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2047 
2048   @param[in]  op               Composite CeedOperator
2049   @param[in]  num_skip_indices Number of suboperators to skip
2050   @param[in]  skip_indices     Array of indices of suboperators to skip
2051   @param[out] mult             Vector to store multiplicity (of size l_size)
2052 
2053   @return An error code: 0 - success, otherwise - failure
2054 
2055   @ref User
2056 **/
2057 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2058   CeedCall(CeedOperatorCheckReady(op));
2059 
2060   Ceed                ceed;
2061   CeedInt             num_suboperators;
2062   CeedSize            l_vec_len;
2063   CeedScalar         *mult_array;
2064   CeedVector          ones_l_vec;
2065   CeedElemRestriction elem_rstr;
2066   CeedOperator       *sub_operators;
2067 
2068   CeedCall(CeedOperatorGetCeed(op, &ceed));
2069 
2070   // Zero mult vector
2071   CeedCall(CeedVectorSetValue(mult, 0.0));
2072 
2073   // Get suboperators
2074   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2075   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2076   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2077 
2078   // Work vector
2079   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2080   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2081   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2082   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2083 
2084   // Compute multiplicity across suboperators
2085   for (CeedInt i = 0; i < num_suboperators; i++) {
2086     const CeedScalar *sub_mult_array;
2087     CeedVector        sub_mult_l_vec, ones_e_vec;
2088 
2089     // -- Check for suboperator to skip
2090     for (CeedInt j = 0; j < num_skip_indices; j++) {
2091       if (skip_indices[j] == i) continue;
2092     }
2093 
2094     // -- Sub operator multiplicity
2095     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2096     CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2097     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2098     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2099     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2100     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2101     // ---- Flag every node present in the current suboperator
2102     for (CeedInt j = 0; j < l_vec_len; j++) {
2103       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2104     }
2105     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2106     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2107     CeedCall(CeedVectorDestroy(&ones_e_vec));
2108   }
2109   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2110   CeedCall(CeedVectorDestroy(&ones_l_vec));
2111 
2112   return CEED_ERROR_SUCCESS;
2113 }
2114 
2115 /**
2116   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2117 grid interpolation
2118 
2119   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2120 
2121   @param[in]  op_fine      Fine grid operator
2122   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2123   @param[in]  rstr_coarse  Coarse grid restriction
2124   @param[in]  basis_coarse Coarse grid active vector basis
2125   @param[out] op_coarse    Coarse grid operator
2126   @param[out] op_prolong   Coarse to fine operator, or NULL
2127   @param[out] op_restrict  Fine to coarse operator, or NULL
2128 
2129   @return An error code: 0 - success, otherwise - failure
2130 
2131   @ref User
2132 **/
2133 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2134                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2135   CeedCall(CeedOperatorCheckReady(op_fine));
2136 
2137   // Build prolongation matrix, if required
2138   CeedBasis basis_c_to_f = NULL;
2139   if (op_prolong || op_restrict) {
2140     CeedBasis basis_fine;
2141     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2142     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2143   }
2144 
2145   // Core code
2146   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2147 
2148   return CEED_ERROR_SUCCESS;
2149 }
2150 
2151 /**
2152   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2153 
2154   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2155 
2156   @param[in]  op_fine       Fine grid operator
2157   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2158   @param[in]  rstr_coarse   Coarse grid restriction
2159   @param[in]  basis_coarse  Coarse grid active vector basis
2160   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2161   @param[out] op_coarse     Coarse grid operator
2162   @param[out] op_prolong    Coarse to fine operator, or NULL
2163   @param[out] op_restrict   Fine to coarse operator, or NULL
2164 
2165   @return An error code: 0 - success, otherwise - failure
2166 
2167   @ref User
2168 **/
2169 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2170                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2171                                              CeedOperator *op_restrict) {
2172   CeedCall(CeedOperatorCheckReady(op_fine));
2173   Ceed ceed;
2174   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2175 
2176   // Check for compatible quadrature spaces
2177   CeedBasis basis_fine;
2178   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2179   CeedInt Q_f, Q_c;
2180   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2181   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2182   if (Q_f != Q_c) {
2183     // LCOV_EXCL_START
2184     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2185     // LCOV_EXCL_STOP
2186   }
2187 
2188   // Create coarse to fine basis, if required
2189   CeedBasis basis_c_to_f = NULL;
2190   if (op_prolong || op_restrict) {
2191     // Check if interpolation matrix is provided
2192     if (!interp_c_to_f) {
2193       // LCOV_EXCL_START
2194       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2195       // LCOV_EXCL_STOP
2196     }
2197     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2198     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2199     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2200     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2201     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2202     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2203     CeedScalar *q_ref, *q_weight, *grad;
2204     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2205     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2206     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2207     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2208     CeedCall(CeedFree(&q_ref));
2209     CeedCall(CeedFree(&q_weight));
2210     CeedCall(CeedFree(&grad));
2211   }
2212 
2213   // Core code
2214   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2215   return CEED_ERROR_SUCCESS;
2216 }
2217 
2218 /**
2219   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2220 
2221   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2222 
2223   @param[in]  op_fine       Fine grid operator
2224   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2225   @param[in]  rstr_coarse   Coarse grid restriction
2226   @param[in]  basis_coarse  Coarse grid active vector basis
2227   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2228   @param[out] op_coarse     Coarse grid operator
2229   @param[out] op_prolong    Coarse to fine operator, or NULL
2230   @param[out] op_restrict   Fine to coarse operator, or NULL
2231 
2232   @return An error code: 0 - success, otherwise - failure
2233 
2234   @ref User
2235 **/
2236 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2237                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2238                                        CeedOperator *op_restrict) {
2239   CeedCall(CeedOperatorCheckReady(op_fine));
2240   Ceed ceed;
2241   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2242 
2243   // Check for compatible quadrature spaces
2244   CeedBasis basis_fine;
2245   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2246   CeedInt Q_f, Q_c;
2247   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2248   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2249   if (Q_f != Q_c) {
2250     // LCOV_EXCL_START
2251     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2252     // LCOV_EXCL_STOP
2253   }
2254 
2255   // Coarse to fine basis
2256   CeedBasis basis_c_to_f = NULL;
2257   if (op_prolong || op_restrict) {
2258     // Check if interpolation matrix is provided
2259     if (!interp_c_to_f) {
2260       // LCOV_EXCL_START
2261       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2262       // LCOV_EXCL_STOP
2263     }
2264     CeedElemTopology topo;
2265     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2266     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2267     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2268     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2269     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2270     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2271     CeedScalar *q_ref, *q_weight, *grad;
2272     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2273     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2274     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2275     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2276     CeedCall(CeedFree(&q_ref));
2277     CeedCall(CeedFree(&q_weight));
2278     CeedCall(CeedFree(&grad));
2279   }
2280 
2281   // Core code
2282   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2283   return CEED_ERROR_SUCCESS;
2284 }
2285 
2286 /**
2287   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2288 
2289   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2290     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2291     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2292 \hat S V\f$. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
2293 
2294   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2295 
2296   @param[in]  op      CeedOperator to create element inverses
2297   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2298   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2299 
2300   @return An error code: 0 - success, otherwise - failure
2301 
2302   @ref User
2303 **/
2304 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2305   CeedCall(CeedOperatorCheckReady(op));
2306 
2307   if (op->CreateFDMElementInverse) {
2308     // Backend version
2309     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2310     return CEED_ERROR_SUCCESS;
2311   } else {
2312     // Operator fallback
2313     CeedOperator op_fallback;
2314 
2315     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2316     if (op_fallback) {
2317       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2318       return CEED_ERROR_SUCCESS;
2319     }
2320   }
2321 
2322   // Default interface implementation
2323   Ceed ceed, ceed_parent;
2324   CeedCall(CeedOperatorGetCeed(op, &ceed));
2325   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2326   ceed_parent = ceed_parent ? ceed_parent : ceed;
2327   CeedQFunction qf;
2328   CeedCall(CeedOperatorGetQFunction(op, &qf));
2329 
2330   // Determine active input basis
2331   bool                interp = false, grad = false;
2332   CeedBasis           basis = NULL;
2333   CeedElemRestriction rstr  = NULL;
2334   CeedOperatorField  *op_fields;
2335   CeedQFunctionField *qf_fields;
2336   CeedInt             num_input_fields;
2337   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2338   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2339   for (CeedInt i = 0; i < num_input_fields; i++) {
2340     CeedVector vec;
2341     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2342     if (vec == CEED_VECTOR_ACTIVE) {
2343       CeedEvalMode eval_mode;
2344       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2345       interp = interp || eval_mode == CEED_EVAL_INTERP;
2346       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2347       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2348       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2349     }
2350   }
2351   if (!basis) {
2352     // LCOV_EXCL_START
2353     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2354     // LCOV_EXCL_STOP
2355   }
2356   CeedSize l_size = 1;
2357   CeedInt  P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2358   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2359   CeedCall(CeedBasisGetNumNodes(basis, &elem_size));
2360   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2361   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2362   CeedCall(CeedBasisGetDimension(basis, &dim));
2363   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2364   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2365   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2366 
2367   // Build and diagonalize 1D Mass and Laplacian
2368   bool tensor_basis;
2369   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2370   if (!tensor_basis) {
2371     // LCOV_EXCL_START
2372     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2373     // LCOV_EXCL_STOP
2374   }
2375   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2376   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2377   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2378   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2379   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2380   CeedCall(CeedCalloc(P_1d, &lambda));
2381   // -- Build matrices
2382   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2383   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2384   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2385   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2386   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2387 
2388   // -- Diagonalize
2389   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2390   CeedCall(CeedFree(&mass));
2391   CeedCall(CeedFree(&laplace));
2392   for (CeedInt i = 0; i < P_1d; i++) {
2393     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2394   }
2395   CeedCall(CeedFree(&x));
2396 
2397   // Assemble QFunction
2398   CeedVector          assembled;
2399   CeedElemRestriction rstr_qf;
2400   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2401   CeedInt layout[3];
2402   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2403   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2404   CeedScalar max_norm = 0;
2405   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2406 
2407   // Calculate element averages
2408   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2409   CeedScalar       *elem_avg;
2410   const CeedScalar *assembled_array, *q_weight_array;
2411   CeedVector        q_weight;
2412   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2413   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2414   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2415   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2416   CeedCall(CeedCalloc(num_elem, &elem_avg));
2417   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2418   for (CeedInt e = 0; e < num_elem; e++) {
2419     CeedInt count = 0;
2420     for (CeedInt q = 0; q < num_qpts; q++) {
2421       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2422         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2423           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2424           count++;
2425         }
2426       }
2427     }
2428     if (count) {
2429       elem_avg[e] /= count;
2430     } else {
2431       elem_avg[e] = 1.0;
2432     }
2433   }
2434   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2435   CeedCall(CeedVectorDestroy(&assembled));
2436   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2437   CeedCall(CeedVectorDestroy(&q_weight));
2438 
2439   // Build FDM diagonal
2440   CeedVector  q_data;
2441   CeedScalar *q_data_array, *fdm_diagonal;
2442   CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal));
2443   const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON;
2444   for (CeedInt c = 0; c < num_comp; c++) {
2445     for (CeedInt n = 0; n < elem_size; n++) {
2446       if (interp) fdm_diagonal[c * elem_size + n] = 1.0;
2447       if (grad) {
2448         for (CeedInt d = 0; d < dim; d++) {
2449           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2450           fdm_diagonal[c * elem_size + n] += lambda[i];
2451         }
2452       }
2453       if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound;
2454     }
2455   }
2456   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data));
2457   CeedCall(CeedVectorSetValue(q_data, 0.0));
2458   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2459   for (CeedInt e = 0; e < num_elem; e++) {
2460     for (CeedInt c = 0; c < num_comp; c++) {
2461       for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]);
2462     }
2463   }
2464   CeedCall(CeedFree(&elem_avg));
2465   CeedCall(CeedFree(&fdm_diagonal));
2466   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2467 
2468   // Setup FDM operator
2469   // -- Basis
2470   CeedBasis   fdm_basis;
2471   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2472   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2473   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2474   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2475   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2476   CeedCall(CeedFree(&fdm_interp));
2477   CeedCall(CeedFree(&grad_dummy));
2478   CeedCall(CeedFree(&q_ref_dummy));
2479   CeedCall(CeedFree(&q_weight_dummy));
2480   CeedCall(CeedFree(&lambda));
2481 
2482   // -- Restriction
2483   CeedElemRestriction rstr_qd_i;
2484   CeedInt             strides[3] = {1, elem_size, elem_size * num_comp};
2485   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i));
2486   // -- QFunction
2487   CeedQFunction qf_fdm;
2488   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2489   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2490   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2491   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2492   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2493   // -- QFunction context
2494   CeedInt *num_comp_data;
2495   CeedCall(CeedCalloc(1, &num_comp_data));
2496   num_comp_data[0] = num_comp;
2497   CeedQFunctionContext ctx_fdm;
2498   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2499   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2500   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2501   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2502   // -- Operator
2503   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2504   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2505   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2506   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2507 
2508   // Cleanup
2509   CeedCall(CeedVectorDestroy(&q_data));
2510   CeedCall(CeedBasisDestroy(&fdm_basis));
2511   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2512   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2513 
2514   return CEED_ERROR_SUCCESS;
2515 }
2516 
2517 /// @}
2518