xref: /libCEED/interface/ceed-preconditioning.c (revision a2e5d304d0c7d96eecfcbbd32f1ea5194beb84ca)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed/backend.h>
11 #include <ceed/ceed.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  eval_mode Current basis evaluation mode
184   @param[in]  identity  Pointer to identity matrix
185   @param[in]  interp    Pointer to interpolation matrix
186   @param[in]  grad      Pointer to gradient matrix
187   @param[out] basis_ptr Basis pointer to set
188 
189   @ref Developer
190 **/
191 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad,
192                                                const CeedScalar **basis_ptr) {
193   switch (eval_mode) {
194     case CEED_EVAL_NONE:
195       *basis_ptr = identity;
196       break;
197     case CEED_EVAL_INTERP:
198       *basis_ptr = interp;
199       break;
200     case CEED_EVAL_GRAD:
201       *basis_ptr = grad;
202       break;
203     case CEED_EVAL_WEIGHT:
204     case CEED_EVAL_DIV:
205     case CEED_EVAL_CURL:
206       break;  // Caught by QF Assembly
207   }
208   assert(*basis_ptr != NULL);
209 }
210 
211 /**
212   @brief Create point block restriction for active operator field
213 
214   @param[in]  rstr            Original CeedElemRestriction for active field
215   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
216 
217   @return An error code: 0 - success, otherwise - failure
218 
219   @ref Developer
220 **/
221 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
222   Ceed ceed;
223   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
224   const CeedInt *offsets;
225   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
226 
227   // Expand offsets
228   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
229   CeedSize l_size;
230   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
231   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
232   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
233   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
234   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
235   CeedInt shift = num_comp;
236   if (comp_stride != 1) shift *= num_comp;
237   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
238   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
239     pointblock_offsets[i] = offsets[i] * shift;
240   }
241 
242   // Create new restriction
243   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
244                                      pointblock_offsets, pointblock_rstr));
245 
246   // Cleanup
247   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
248 
249   return CEED_ERROR_SUCCESS;
250 }
251 
252 /**
253   @brief Core logic for assembling operator diagonal or point block diagonal
254 
255   @param[in]  op            CeedOperator to assemble point block diagonal
256   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
257   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
258   @param[out] assembled     CeedVector to store assembled diagonal
259 
260   @return An error code: 0 - success, otherwise - failure
261 
262   @ref Developer
263 **/
264 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
265   Ceed ceed;
266   CeedCall(CeedOperatorGetCeed(op, &ceed));
267 
268   // Assemble QFunction
269   CeedQFunction       qf;
270   const CeedScalar   *assembled_qf_array;
271   CeedVector          assembled_qf;
272   CeedElemRestriction assembled_elem_rstr;
273   CeedInt             num_input_fields, num_output_fields;
274   CeedInt             layout[3];
275 
276   CeedCall(CeedOperatorGetQFunction(op, &qf));
277   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
278   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
279   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout));
280   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
281   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
282 
283   // Get assembly data
284   CeedOperatorAssemblyData data;
285   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
286   CeedInt                 *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
287   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
288   CeedBasis               *active_bases;
289   CeedElemRestriction     *active_elem_rstrs;
290 
291   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
292   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
293                                                 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components));
294   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL));
295   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs));
296 
297   // Loop over all active bases
298   for (CeedInt b = 0; b < num_active_bases; b++) {
299     // Assemble point block diagonal restriction, if needed
300     CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b];
301 
302     if (is_pointblock) {
303       CeedElemRestriction point_block_elem_rstr;
304 
305       CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr));
306       diag_elem_rstr = point_block_elem_rstr;
307     }
308 
309     // Create diagonal vector
310     CeedVector elem_diag;
311     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
312 
313     // Assemble element operator diagonals
314     CeedScalar *elem_diag_array;
315     CeedInt     num_elem, num_nodes, num_qpts, num_components;
316 
317     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
318     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
319     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
320     CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes));
321     CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components));
322     CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts));
323 
324     // Basis matrices
325     const CeedScalar *interp, *grad;
326     CeedScalar       *identity      = NULL;
327     bool              has_eval_none = false;
328     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
329       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
330     }
331     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
332       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
333     }
334     if (has_eval_none) {
335       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
336       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
337     }
338     CeedCall(CeedBasisGetInterp(active_bases[b], &interp));
339     CeedCall(CeedBasisGetGrad(active_bases[b], &grad));
340     // Compute the diagonal of B^T D B
341     // Each element
342     for (CeedInt e = 0; e < num_elem; e++) {
343       CeedInt d_out = -1;
344       // Each basis eval mode pair
345       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
346         const CeedScalar *B_t = NULL;
347         if (eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out += 1;
348         CeedOperatorGetBasisPointer(eval_modes_out[b][e_out], identity, interp, &grad[d_out * num_qpts * num_nodes], &B_t);
349         CeedInt d_in = -1;
350         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
351           const CeedScalar *B = NULL;
352           if (eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in += 1;
353           CeedOperatorGetBasisPointer(eval_modes_in[b][e_in], identity, interp, &grad[d_in * num_qpts * num_nodes], &B);
354           // Each component
355           for (CeedInt c_out = 0; c_out < num_components; c_out++) {
356             // Each qpt/node pair
357             for (CeedInt q = 0; q < num_qpts; q++) {
358               if (is_pointblock) {
359                 // Point Block Diagonal
360                 for (CeedInt c_in = 0; c_in < num_components; c_in++) {
361                   const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
362                   const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
363                   for (CeedInt n = 0; n < num_nodes; n++) {
364                     elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] +=
365                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
366                   }
367                 }
368               } else {
369                 // Diagonal Only
370                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
371                 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
372                 for (CeedInt n = 0; n < num_nodes; n++) {
373                   elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
374                 }
375               }
376             }
377           }
378         }
379       }
380     }
381     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
382 
383     // Assemble local operator diagonal
384     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
385 
386     // Cleanup
387     if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
388     CeedCall(CeedVectorDestroy(&elem_diag));
389     CeedCall(CeedFree(&identity));
390   }
391   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
392   CeedCall(CeedVectorDestroy(&assembled_qf));
393 
394   return CEED_ERROR_SUCCESS;
395 }
396 
397 /**
398   @brief Core logic for assembling composite operator diagonal
399 
400   @param[in]  op            CeedOperator to assemble point block diagonal
401   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
402   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
403   @param[out] assembled     CeedVector to store assembled diagonal
404 
405   @return An error code: 0 - success, otherwise - failure
406 
407   @ref Developer
408 **/
409 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
410                                                                  CeedVector assembled) {
411   CeedInt       num_sub;
412   CeedOperator *suboperators;
413   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
414   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
415   for (CeedInt i = 0; i < num_sub; i++) {
416     if (is_pointblock) {
417       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
418     } else {
419       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
420     }
421   }
422   return CEED_ERROR_SUCCESS;
423 }
424 
425 /**
426   @brief Build nonzero pattern for non-composite operator
427 
428   Users should generally use CeedOperatorLinearAssembleSymbolic()
429 
430   @param[in]  op     CeedOperator to assemble nonzero pattern
431   @param[in]  offset Offset for number of entries
432   @param[out] rows   Row number for each entry
433   @param[out] cols   Column number for each entry
434 
435   @return An error code: 0 - success, otherwise - failure
436 
437   @ref Developer
438 **/
439 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
440   Ceed ceed;
441   bool is_composite;
442   CeedCall(CeedOperatorGetCeed(op, &ceed));
443   CeedCall(CeedOperatorIsComposite(op, &is_composite));
444 
445   if (is_composite) {
446     // LCOV_EXCL_START
447     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
448     // LCOV_EXCL_STOP
449   }
450 
451   CeedSize num_nodes;
452   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
453   CeedElemRestriction rstr_in;
454   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
455   CeedInt num_elem, elem_size, num_comp;
456   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
457   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
458   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
459   CeedInt layout_er[3];
460   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
461 
462   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
463 
464   // Determine elem_dof relation
465   CeedVector index_vec;
466   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
467   CeedScalar *array;
468   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
469   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
470   CeedCall(CeedVectorRestoreArray(index_vec, &array));
471   CeedVector elem_dof;
472   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
473   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
474   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
475   const CeedScalar *elem_dof_a;
476   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
477   CeedCall(CeedVectorDestroy(&index_vec));
478 
479   // Determine i, j locations for element matrices
480   CeedInt count = 0;
481   for (CeedInt e = 0; e < num_elem; e++) {
482     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
483       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
484         for (CeedInt i = 0; i < elem_size; i++) {
485           for (CeedInt j = 0; j < elem_size; j++) {
486             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
487             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
488 
489             const CeedInt row = elem_dof_a[elem_dof_index_row];
490             const CeedInt col = elem_dof_a[elem_dof_index_col];
491 
492             rows[offset + count] = row;
493             cols[offset + count] = col;
494             count++;
495           }
496         }
497       }
498     }
499   }
500   if (count != local_num_entries) {
501     // LCOV_EXCL_START
502     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
503     // LCOV_EXCL_STOP
504   }
505   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
506   CeedCall(CeedVectorDestroy(&elem_dof));
507 
508   return CEED_ERROR_SUCCESS;
509 }
510 
511 /**
512   @brief Assemble nonzero entries for non-composite operator
513 
514   Users should generally use CeedOperatorLinearAssemble()
515 
516   @param[in]  op     CeedOperator to assemble
517   @param[in]  offset Offset for number of entries
518   @param[out] values Values to assemble into matrix
519 
520   @return An error code: 0 - success, otherwise - failure
521 
522   @ref Developer
523 **/
524 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
525   Ceed ceed;
526   bool is_composite;
527   CeedCall(CeedOperatorGetCeed(op, &ceed));
528   CeedCall(CeedOperatorIsComposite(op, &is_composite));
529 
530   if (is_composite) {
531     // LCOV_EXCL_START
532     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
533     // LCOV_EXCL_STOP
534   }
535 
536   // Early exit for empty operator
537   {
538     CeedInt num_elem = 0;
539 
540     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
541     if (num_elem == 0) return CEED_ERROR_SUCCESS;
542   }
543 
544   if (op->LinearAssembleSingle) {
545     // Backend version
546     CeedCall(op->LinearAssembleSingle(op, offset, values));
547     return CEED_ERROR_SUCCESS;
548   } else {
549     // Operator fallback
550     CeedOperator op_fallback;
551 
552     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
553     if (op_fallback) {
554       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
555       return CEED_ERROR_SUCCESS;
556     }
557   }
558 
559   // Assemble QFunction
560   CeedQFunction qf;
561   CeedCall(CeedOperatorGetQFunction(op, &qf));
562   CeedVector          assembled_qf;
563   CeedElemRestriction rstr_q;
564   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
565   CeedSize qf_length;
566   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
567 
568   CeedInt            num_input_fields, num_output_fields;
569   CeedOperatorField *input_fields;
570   CeedOperatorField *output_fields;
571   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
572 
573   // Get assembly data
574   CeedOperatorAssemblyData data;
575   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
576   const CeedEvalMode **eval_modes_in, **eval_modes_out;
577   CeedInt             *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
578   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out,
579                                                 &eval_modes_out, NULL, NULL));
580   CeedBasis *bases;
581   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL));
582   CeedBasis basis_in = bases[0];
583 
584   if (num_active_bases > 1) {
585     // LCOV_EXCL_START
586     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases");
587     // LCOV_EXCL_STOP
588   }
589   if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) {
590     // LCOV_EXCL_START
591     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
592     // LCOV_EXCL_STOP
593   }
594 
595   CeedElemRestriction active_rstr;
596   CeedInt             num_elem, elem_size, num_qpts, num_comp;
597   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
598   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
599   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
600   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
601   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
602 
603   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
604 
605   // loop over elements and put in data structure
606   const CeedScalar *interp_in, *grad_in;
607   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
608   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
609 
610   const CeedScalar *assembled_qf_array;
611   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
612 
613   CeedInt layout_qf[3];
614   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
615   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
616 
617   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
618   const CeedScalar **B_mats_in, **B_mats_out;
619   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out));
620   const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0];
621   CeedScalar        BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]];
622   CeedScalar        elem_mat[elem_size * elem_size];
623   CeedInt           count = 0;
624   CeedScalar       *vals;
625   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
626   for (CeedInt e = 0; e < num_elem; e++) {
627     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
628       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
629         // Compute B^T*D
630         for (CeedInt n = 0; n < elem_size; n++) {
631           for (CeedInt q = 0; q < num_qpts; q++) {
632             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
633               const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in);
634               CeedScalar    sum       = 0.0;
635               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
636                 const CeedInt b_out_index     = (num_eval_modes_out[0] * q + e_out) * elem_size + n;
637                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out;
638                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
639                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
640               }
641               BTD_mat[btd_index] = sum;
642             }
643           }
644         }
645         // form element matrix itself (for each block component)
646         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0]));
647 
648         // put element matrix in coordinate data structure
649         for (CeedInt i = 0; i < elem_size; i++) {
650           for (CeedInt j = 0; j < elem_size; j++) {
651             vals[offset + count] = elem_mat[i * elem_size + j];
652             count++;
653           }
654         }
655       }
656     }
657   }
658   if (count != local_num_entries) {
659     // LCOV_EXCL_START
660     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
661     // LCOV_EXCL_STOP
662   }
663   CeedCall(CeedVectorRestoreArray(values, &vals));
664 
665   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
666   CeedCall(CeedVectorDestroy(&assembled_qf));
667 
668   return CEED_ERROR_SUCCESS;
669 }
670 
671 /**
672   @brief Count number of entries for assembled CeedOperator
673 
674   @param[in]  op          CeedOperator to assemble
675   @param[out] num_entries Number of entries in assembled representation
676 
677   @return An error code: 0 - success, otherwise - failure
678 
679   @ref Utility
680 **/
681 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
682   bool                is_composite;
683   CeedElemRestriction rstr;
684   CeedInt             num_elem, elem_size, num_comp;
685 
686   CeedCall(CeedOperatorIsComposite(op, &is_composite));
687   if (is_composite) {
688     // LCOV_EXCL_START
689     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
690     // LCOV_EXCL_STOP
691   }
692   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
693   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
694   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
695   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
696   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
697 
698   return CEED_ERROR_SUCCESS;
699 }
700 
701 /**
702   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
703 
704   @param[in]  op_fine      Fine grid operator
705   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
706   @param[in]  rstr_coarse  Coarse grid restriction
707   @param[in]  basis_coarse Coarse grid active vector basis
708   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
709   @param[out] op_coarse    Coarse grid operator
710   @param[out] op_prolong   Coarse to fine operator, or NULL
711   @param[out] op_restrict  Fine to coarse operator, or NULL
712 
713   @return An error code: 0 - success, otherwise - failure
714 
715   @ref Developer
716 **/
717 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
718                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
719   Ceed       ceed;
720   CeedVector mult_vec = NULL;
721   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
722 
723   // Check for composite operator
724   bool is_composite;
725   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
726   if (is_composite) {
727     // LCOV_EXCL_START
728     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
729     // LCOV_EXCL_STOP
730   }
731 
732   // Coarse Grid
733   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
734   CeedElemRestriction rstr_fine = NULL;
735   // -- Clone input fields
736   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
737     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
738       rstr_fine = op_fine->input_fields[i]->elem_rstr;
739       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
740     } else {
741       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
742                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
743     }
744   }
745   // -- Clone output fields
746   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
747     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
748       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
749     } else {
750       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
751                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
752     }
753   }
754   // -- Clone QFunctionAssemblyData
755   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
756 
757   // Multiplicity vector
758   if (op_restrict || op_prolong) {
759     CeedVector mult_e_vec;
760 
761     if (!p_mult_fine) {
762       // LCOV_EXCL_START
763       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
764       // LCOV_EXCL_STOP
765     }
766     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
767     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
768     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
769     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
770     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
771     CeedCall(CeedVectorDestroy(&mult_e_vec));
772     CeedCall(CeedVectorReciprocal(mult_vec));
773   }
774 
775   // Clone name
776   bool   has_name = op_fine->name;
777   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
778   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
779 
780   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
781   if ((op_restrict || op_prolong) && !basis_c_to_f) {
782     // LCOV_EXCL_START
783     return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis");
784     // LCOV_EXCL_STOP
785   }
786 
787   // Restriction/Prolongation Operators
788   CeedInt num_comp;
789   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
790 
791   // Restriction
792   if (op_restrict) {
793     CeedInt             *num_comp_r_data;
794     CeedQFunction        qf_restrict;
795     CeedQFunctionContext ctx_r;
796 
797     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
798     CeedCall(CeedCalloc(1, &num_comp_r_data));
799     num_comp_r_data[0] = num_comp;
800     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
801     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
802     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
803     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
804     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
805     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
806     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
807     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
808 
809     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
810     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
811     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
812     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
813 
814     // Set name
815     char *restriction_name;
816     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
817     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
818     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
819     CeedCall(CeedFree(&restriction_name));
820 
821     // Check
822     CeedCall(CeedOperatorCheckReady(*op_restrict));
823 
824     // Cleanup
825     CeedCall(CeedQFunctionDestroy(&qf_restrict));
826   }
827 
828   // Prolongation
829   if (op_prolong) {
830     CeedInt             *num_comp_p_data;
831     CeedQFunction        qf_prolong;
832     CeedQFunctionContext ctx_p;
833 
834     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
835     CeedCall(CeedCalloc(1, &num_comp_p_data));
836     num_comp_p_data[0] = num_comp;
837     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
838     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
839     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
840     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
841     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
842     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
843     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
844     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
845 
846     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
847     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
848     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
849     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
850 
851     // Set name
852     char *prolongation_name;
853     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
854     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
855     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
856     CeedCall(CeedFree(&prolongation_name));
857 
858     // Check
859     CeedCall(CeedOperatorCheckReady(*op_prolong));
860 
861     // Cleanup
862     CeedCall(CeedQFunctionDestroy(&qf_prolong));
863   }
864 
865   // Check
866   CeedCall(CeedOperatorCheckReady(*op_coarse));
867 
868   // Cleanup
869   CeedCall(CeedVectorDestroy(&mult_vec));
870   CeedCall(CeedBasisDestroy(&basis_c_to_f));
871 
872   return CEED_ERROR_SUCCESS;
873 }
874 
875 /**
876   @brief Build 1D mass matrix and Laplacian with perturbation
877 
878   @param[in]  interp_1d   Interpolation matrix in one dimension
879   @param[in]  grad_1d     Gradient matrix in one dimension
880   @param[in]  q_weight_1d Quadrature weights in one dimension
881   @param[in]  P_1d        Number of basis nodes in one dimension
882   @param[in]  Q_1d        Number of quadrature points in one dimension
883   @param[in]  dim         Dimension of basis
884   @param[out] mass        Assembled mass matrix in one dimension
885   @param[out] laplace     Assembled perturbed Laplacian in one dimension
886 
887   @return An error code: 0 - success, otherwise - failure
888 
889   @ref Developer
890 **/
891 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d,
892                                                       CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
893   for (CeedInt i = 0; i < P_1d; i++) {
894     for (CeedInt j = 0; j < P_1d; j++) {
895       CeedScalar sum = 0.0;
896       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
897       mass[i + j * P_1d] = sum;
898     }
899   }
900   // -- Laplacian
901   for (CeedInt i = 0; i < P_1d; i++) {
902     for (CeedInt j = 0; j < P_1d; j++) {
903       CeedScalar sum = 0.0;
904       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
905       laplace[i + j * P_1d] = sum;
906     }
907   }
908   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
909   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
910   return CEED_ERROR_SUCCESS;
911 }
912 CeedPragmaOptimizeOn;
913 
914 /// @}
915 
916 /// ----------------------------------------------------------------------------
917 /// CeedOperator Backend API
918 /// ----------------------------------------------------------------------------
919 /// @addtogroup CeedOperatorBackend
920 /// @{
921 
922 /**
923   @brief Create object holding CeedQFunction assembly data for CeedOperator
924 
925   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
926   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
927 
928   @return An error code: 0 - success, otherwise - failure
929 
930   @ref Backend
931 **/
932 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
933   CeedCall(CeedCalloc(1, data));
934   (*data)->ref_count = 1;
935   (*data)->ceed      = ceed;
936   CeedCall(CeedReference(ceed));
937 
938   return CEED_ERROR_SUCCESS;
939 }
940 
941 /**
942   @brief Increment the reference counter for a CeedQFunctionAssemblyData
943 
944   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
945 
946   @return An error code: 0 - success, otherwise - failure
947 
948   @ref Backend
949 **/
950 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
951   data->ref_count++;
952   return CEED_ERROR_SUCCESS;
953 }
954 
955 /**
956   @brief Set re-use of CeedQFunctionAssemblyData
957 
958   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
959   @param[in]     reuse_data Boolean flag indicating data re-use
960 
961   @return An error code: 0 - success, otherwise - failure
962 
963   @ref Backend
964 **/
965 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
966   data->reuse_data        = reuse_data;
967   data->needs_data_update = true;
968   return CEED_ERROR_SUCCESS;
969 }
970 
971 /**
972   @brief Mark QFunctionAssemblyData as stale
973 
974   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
975   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
976 
977   @return An error code: 0 - success, otherwise - failure
978 
979   @ref Backend
980 **/
981 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
982   data->needs_data_update = needs_data_update;
983   return CEED_ERROR_SUCCESS;
984 }
985 
986 /**
987   @brief Determine if QFunctionAssemblyData needs update
988 
989   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
990   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
991 
992   @return An error code: 0 - success, otherwise - failure
993 
994   @ref Backend
995 **/
996 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
997   *is_update_needed = !data->reuse_data || data->needs_data_update;
998   return CEED_ERROR_SUCCESS;
999 }
1000 
1001 /**
1002   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1003            Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1004            Note: If `*data_copy` is non-NULL, then it is assumed that `*data_copy` is a pointer to a CeedQFunctionAssemblyData.
1005              This CeedQFunctionAssemblyData will be destroyed if `*data_copy` is the only reference to this CeedQFunctionAssemblyData.
1006 
1007   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1008   @param[in,out] data_copy Variable to store copied reference
1009 
1010   @return An error code: 0 - success, otherwise - failure
1011 
1012   @ref Backend
1013 **/
1014 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1015   CeedCall(CeedQFunctionAssemblyDataReference(data));
1016   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1017   *data_copy = data;
1018   return CEED_ERROR_SUCCESS;
1019 }
1020 
1021 /**
1022   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1023 
1024   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1025   @param[out] is_setup Boolean flag for setup status
1026 
1027   @return An error code: 0 - success, otherwise - failure
1028 
1029   @ref Backend
1030 **/
1031 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1032   *is_setup = data->is_setup;
1033   return CEED_ERROR_SUCCESS;
1034 }
1035 
1036 /**
1037   @brief Set internal objects for CeedQFunctionAssemblyData
1038 
1039   @param[in,out] data CeedQFunctionAssemblyData to set objects
1040   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1041   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1042 
1043   @return An error code: 0 - success, otherwise - failure
1044 
1045   @ref Backend
1046 **/
1047 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1048   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1049   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1050 
1051   data->is_setup = true;
1052   return CEED_ERROR_SUCCESS;
1053 }
1054 
1055 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1056   if (!data->is_setup) {
1057     // LCOV_EXCL_START
1058     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1059     // LCOV_EXCL_STOP
1060   }
1061 
1062   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1063   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1064 
1065   return CEED_ERROR_SUCCESS;
1066 }
1067 
1068 /**
1069   @brief Destroy CeedQFunctionAssemblyData
1070 
1071   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1072 
1073   @return An error code: 0 - success, otherwise - failure
1074 
1075   @ref Backend
1076 **/
1077 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1078   if (!*data || --(*data)->ref_count > 0) {
1079     *data = NULL;
1080     return CEED_ERROR_SUCCESS;
1081   }
1082   CeedCall(CeedDestroy(&(*data)->ceed));
1083   CeedCall(CeedVectorDestroy(&(*data)->vec));
1084   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1085 
1086   CeedCall(CeedFree(data));
1087   return CEED_ERROR_SUCCESS;
1088 }
1089 
1090 /**
1091   @brief Get CeedOperatorAssemblyData
1092 
1093   @param[in]  op   CeedOperator to assemble
1094   @param[out] data CeedQFunctionAssemblyData
1095 
1096   @return An error code: 0 - success, otherwise - failure
1097 
1098   @ref Backend
1099 **/
1100 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1101   if (!op->op_assembled) {
1102     CeedOperatorAssemblyData data;
1103 
1104     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1105     op->op_assembled = data;
1106   }
1107   *data = op->op_assembled;
1108 
1109   return CEED_ERROR_SUCCESS;
1110 }
1111 
1112 /**
1113   @brief Create object holding CeedOperator assembly data.
1114 
1115     The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1116     An array with references to the corresponding active CeedElemRestrictions is also stored.
1117     For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1118     The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1119       CeedEvalMode.
1120     The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1121     Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1122 
1123   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1124   @param[in]  op   CeedOperator to be assembled
1125   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1126 
1127   @return An error code: 0 - success, otherwise - failure
1128 
1129   @ref Backend
1130 **/
1131 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1132   CeedInt num_active_bases = 0;
1133 
1134   // Allocate
1135   CeedCall(CeedCalloc(1, data));
1136   (*data)->ceed = ceed;
1137   CeedCall(CeedReference(ceed));
1138 
1139   // Build OperatorAssembly data
1140   CeedQFunction       qf;
1141   CeedQFunctionField *qf_fields;
1142   CeedOperatorField  *op_fields;
1143   CeedInt             num_input_fields;
1144   CeedCall(CeedOperatorGetQFunction(op, &qf));
1145   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1146   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1147 
1148   // Determine active input basis
1149   CeedInt       *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0;
1150   CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL;
1151   CeedSize     **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1152 
1153   for (CeedInt i = 0; i < num_input_fields; i++) {
1154     CeedVector vec;
1155 
1156     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1157     if (vec == CEED_VECTOR_ACTIVE) {
1158       CeedInt      index = -1, dim = 1, num_components;
1159       CeedBasis    basis_in = NULL;
1160       CeedEvalMode eval_mode;
1161 
1162       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1163       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1164       CeedCall(CeedBasisGetNumComponents(basis_in, &num_components));
1165       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1166       for (CeedInt i = 0; i < num_active_bases; i++) {
1167         if ((*data)->active_bases[i] == basis_in) index = i;
1168       }
1169       if (index == -1) {
1170         CeedElemRestriction elem_rstr_in;
1171 
1172         index = num_active_bases;
1173         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1174         (*data)->active_bases[num_active_bases] = NULL;
1175         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases]));
1176         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1177         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1178         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1179         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases]));
1180         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1181         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1182         num_eval_modes_in[index]  = 0;
1183         num_eval_modes_out[index] = 0;
1184         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1185         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1186         eval_modes_in[index]  = NULL;
1187         eval_modes_out[index] = NULL;
1188         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1189         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1190         eval_mode_offsets_in[index]  = NULL;
1191         eval_mode_offsets_out[index] = NULL;
1192         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1193         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1194         (*data)->assembled_bases_in[index]  = NULL;
1195         (*data)->assembled_bases_out[index] = NULL;
1196         num_active_bases++;
1197       }
1198       switch (eval_mode) {
1199         case CEED_EVAL_NONE:
1200         case CEED_EVAL_INTERP:
1201           CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_modes_in[index]));
1202           CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_mode_offsets_in[index]));
1203           eval_modes_in[index][num_eval_modes_in[index]]        = eval_mode;
1204           eval_mode_offsets_in[index][num_eval_modes_in[index]] = offset;
1205           offset += num_components;
1206           num_eval_modes_in[index] += 1;
1207           break;
1208         case CEED_EVAL_GRAD:
1209           CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_modes_in[index]));
1210           CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_mode_offsets_in[index]));
1211           for (CeedInt d = 0; d < dim; d++) {
1212             eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1213             eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1214             offset += num_components;
1215           }
1216           num_eval_modes_in[index] += dim;
1217           break;
1218         case CEED_EVAL_WEIGHT:
1219         case CEED_EVAL_DIV:
1220         case CEED_EVAL_CURL:
1221           break;  // Caught by QF Assembly
1222       }
1223     }
1224   }
1225   (*data)->num_eval_modes_in    = num_eval_modes_in;
1226   (*data)->eval_modes_in        = eval_modes_in;
1227   (*data)->eval_mode_offsets_in = eval_mode_offsets_in;
1228 
1229   // Determine active output basis
1230   CeedInt num_output_fields;
1231 
1232   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1233   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1234   offset = 0;
1235   for (CeedInt i = 0; i < num_output_fields; i++) {
1236     CeedVector vec;
1237 
1238     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1239     if (vec == CEED_VECTOR_ACTIVE) {
1240       CeedInt      index = -1, dim = 1, num_components;
1241       CeedBasis    basis_out = NULL;
1242       CeedEvalMode eval_mode;
1243 
1244       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1245       CeedCall(CeedBasisGetDimension(basis_out, &dim));
1246       CeedCall(CeedBasisGetNumComponents(basis_out, &num_components));
1247       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1248       for (CeedInt i = 0; i < num_active_bases; i++) {
1249         if ((*data)->active_bases[i] == basis_out) index = i;
1250       }
1251       if (index == -1) {
1252         CeedElemRestriction elem_rstr_out;
1253 
1254         index = num_active_bases;
1255         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1256         (*data)->active_bases[num_active_bases] = NULL;
1257         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases]));
1258         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1259         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1260         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1261         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases]));
1262         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1263         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1264         num_eval_modes_in[index]  = 0;
1265         num_eval_modes_out[index] = 0;
1266         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1267         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1268         eval_modes_in[index]  = NULL;
1269         eval_modes_out[index] = NULL;
1270         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1271         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1272         eval_mode_offsets_in[index]  = NULL;
1273         eval_mode_offsets_out[index] = NULL;
1274         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1275         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1276         (*data)->assembled_bases_in[index]  = NULL;
1277         (*data)->assembled_bases_out[index] = NULL;
1278         num_active_bases++;
1279       }
1280       switch (eval_mode) {
1281         case CEED_EVAL_NONE:
1282         case CEED_EVAL_INTERP:
1283           CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_modes_out[index]));
1284           CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_mode_offsets_out[index]));
1285           eval_modes_out[index][num_eval_modes_out[index]]        = eval_mode;
1286           eval_mode_offsets_out[index][num_eval_modes_out[index]] = offset;
1287           offset += num_components;
1288           num_eval_modes_out[index] += 1;
1289           break;
1290         case CEED_EVAL_GRAD:
1291           CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_modes_out[index]));
1292           CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_mode_offsets_out[index]));
1293           for (CeedInt d = 0; d < dim; d++) {
1294             eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1295             eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1296             offset += num_components;
1297           }
1298           num_eval_modes_out[index] += dim;
1299           break;
1300         case CEED_EVAL_WEIGHT:
1301         case CEED_EVAL_DIV:
1302         case CEED_EVAL_CURL:
1303           break;  // Caught by QF Assembly
1304       }
1305     }
1306   }
1307   (*data)->num_output_components = offset;
1308   (*data)->num_eval_modes_out    = num_eval_modes_out;
1309   (*data)->eval_modes_out        = eval_modes_out;
1310   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1311   (*data)->num_active_bases      = num_active_bases;
1312 
1313   return CEED_ERROR_SUCCESS;
1314 }
1315 
1316 /**
1317   @brief Get CeedOperator CeedEvalModes for assembly.
1318 
1319     Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1320 
1321   @param[in]  data                  CeedOperatorAssemblyData
1322   @param[out] num_active_bases      Total number of active bases
1323   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1324                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1325   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1326   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1327   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1328   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1329   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1330   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1331                                       including contributions of all active bases
1332 
1333   @return An error code: 0 - success, otherwise - failure
1334 
1335 
1336   @ref Backend
1337 **/
1338 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in,
1339                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out,
1340                                          const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) {
1341   if (num_active_bases) *num_active_bases = data->num_active_bases;
1342   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1343   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1344   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1345   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1346   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1347   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1348   if (num_output_components) *num_output_components = data->num_output_components;
1349 
1350   return CEED_ERROR_SUCCESS;
1351 }
1352 
1353 /**
1354   @brief Get CeedOperator CeedBasis data for assembly.
1355 
1356     Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1357 
1358   @param[in]  data                CeedOperatorAssemblyData
1359   @param[out] num_active_bases    Number of active bases, or NULL
1360   @param[out] active_bases        Pointer to hold active CeedBasis, or NULL
1361   @param[out] assembled_bases_in  Pointer to hold assembled active input B, or NULL
1362   @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL
1363 
1364   @return An error code: 0 - success, otherwise - failure
1365 
1366   @ref Backend
1367 **/
1368 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases,
1369                                      const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) {
1370   // Assemble B_in, B_out if needed
1371   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1372     CeedInt num_qpts;
1373 
1374     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1375     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1376       CeedInt           elem_size;
1377       CeedScalar       *B_in = NULL, *identity = NULL;
1378       const CeedScalar *interp_in, *grad_in;
1379       bool              has_eval_none = false;
1380 
1381       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size));
1382       CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_in[b], &B_in));
1383 
1384       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1385         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1386       }
1387       if (has_eval_none) {
1388         CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1389         for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1390           identity[i * elem_size + i] = 1.0;
1391         }
1392       }
1393       CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_in));
1394       CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_in));
1395 
1396       for (CeedInt q = 0; q < num_qpts; q++) {
1397         for (CeedInt n = 0; n < elem_size; n++) {
1398           CeedInt d_in = -1;
1399           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1400             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1401             const CeedScalar *B  = NULL;
1402 
1403             if (data->eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in++;
1404             CeedOperatorGetBasisPointer(data->eval_modes_in[b][e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &B);
1405             B_in[(qq + e_in) * elem_size + n] = B[q * elem_size + n];
1406           }
1407         }
1408       }
1409       if (identity) CeedCall(CeedFree(identity));
1410       data->assembled_bases_in[b] = B_in;
1411     }
1412   }
1413 
1414   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1415     CeedInt num_qpts;
1416 
1417     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1418     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1419       CeedInt           elem_size;
1420       const CeedScalar *interp_out, *grad_out;
1421       bool              has_eval_none = false;
1422       CeedScalar       *B_out = NULL, *identity = NULL;
1423 
1424       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size));
1425       CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_out[b], &B_out));
1426 
1427       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1428         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1429       }
1430       if (has_eval_none) {
1431         CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1432         for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1433           identity[i * elem_size + i] = 1.0;
1434         }
1435       }
1436       CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_out));
1437       CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_out));
1438 
1439       for (CeedInt q = 0; q < num_qpts; q++) {
1440         for (CeedInt n = 0; n < elem_size; n++) {
1441           CeedInt d_out = -1;
1442           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1443             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1444             const CeedScalar *B  = NULL;
1445 
1446             if (data->eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out++;
1447             CeedOperatorGetBasisPointer(data->eval_modes_out[b][e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &B);
1448             B_out[(qq + e_out) * elem_size + n] = B[q * elem_size + n];
1449           }
1450         }
1451       }
1452       if (identity) CeedCall(CeedFree(identity));
1453       data->assembled_bases_out[b] = B_out;
1454     }
1455   }
1456 
1457   // Pass out assembled data
1458   if (active_bases) *active_bases = data->active_bases;
1459   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1460   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1461 
1462   return CEED_ERROR_SUCCESS;
1463 }
1464 
1465 /**
1466   @brief Get CeedOperator CeedBasis data for assembly.
1467 
1468   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1469 
1470   @param[in]  data                  CeedOperatorAssemblyData
1471   @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL
1472   @param[out] active_elem_rstrs     Pointer to hold active CeedElemRestrictions, or NULL
1473 
1474   @return An error code: 0 - success, otherwise - failure
1475 
1476   @ref Backend
1477 **/
1478 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs,
1479                                                 CeedElemRestriction **active_elem_rstrs) {
1480   if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases;
1481   if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs;
1482 
1483   return CEED_ERROR_SUCCESS;
1484 }
1485 
1486 /**
1487   @brief Destroy CeedOperatorAssemblyData
1488 
1489   @param[in,out] data CeedOperatorAssemblyData to destroy
1490 
1491   @return An error code: 0 - success, otherwise - failure
1492 
1493   @ref Backend
1494 **/
1495 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1496   if (!*data) {
1497     *data = NULL;
1498     return CEED_ERROR_SUCCESS;
1499   }
1500   CeedCall(CeedDestroy(&(*data)->ceed));
1501   for (CeedInt b = 0; b < (*data)->num_active_bases; b++) {
1502     CeedCall(CeedBasisDestroy(&(*data)->active_bases[b]));
1503     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b]));
1504     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1505     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1506     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1507     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1508     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1509     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1510   }
1511   CeedCall(CeedFree(&(*data)->active_bases));
1512   CeedCall(CeedFree(&(*data)->active_elem_rstrs));
1513   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1514   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1515   CeedCall(CeedFree(&(*data)->eval_modes_in));
1516   CeedCall(CeedFree(&(*data)->eval_modes_out));
1517   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1518   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1519   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1520   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1521 
1522   CeedCall(CeedFree(data));
1523   return CEED_ERROR_SUCCESS;
1524 }
1525 
1526 /// @}
1527 
1528 /// ----------------------------------------------------------------------------
1529 /// CeedOperator Public API
1530 /// ----------------------------------------------------------------------------
1531 /// @addtogroup CeedOperatorUser
1532 /// @{
1533 
1534 /**
1535   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1536 
1537   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1538     The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices
1539 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the
1540 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1541 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1542 and producing the output [dv_0, dv_1, v].
1543 
1544   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1545 
1546   @param[in]  op        CeedOperator to assemble CeedQFunction
1547   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1548   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1549   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1550 
1551   @return An error code: 0 - success, otherwise - failure
1552 
1553   @ref User
1554 **/
1555 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1556   CeedCall(CeedOperatorCheckReady(op));
1557 
1558   if (op->LinearAssembleQFunction) {
1559     // Backend version
1560     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1561   } else {
1562     // Operator fallback
1563     CeedOperator op_fallback;
1564 
1565     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1566     if (op_fallback) {
1567       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1568     } else {
1569       // LCOV_EXCL_START
1570       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1571       // LCOV_EXCL_STOP
1572     }
1573   }
1574   return CEED_ERROR_SUCCESS;
1575 }
1576 
1577 /**
1578   @brief Assemble CeedQFunction and store result internally.
1579            Return copied references of stored data to the caller.
1580            Caller is responsible for ownership and destruction of the copied references.
1581            See also @ref CeedOperatorLinearAssembleQFunction
1582 
1583   @param[in]  op        CeedOperator to assemble CeedQFunction
1584   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1585   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1586   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1587 
1588   @return An error code: 0 - success, otherwise - failure
1589 
1590   @ref User
1591 **/
1592 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1593   CeedCall(CeedOperatorCheckReady(op));
1594 
1595   if (op->LinearAssembleQFunctionUpdate) {
1596     // Backend version
1597     bool                qf_assembled_is_setup;
1598     CeedVector          assembled_vec  = NULL;
1599     CeedElemRestriction assembled_rstr = NULL;
1600 
1601     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1602     if (qf_assembled_is_setup) {
1603       bool update_needed;
1604 
1605       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1606       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1607       if (update_needed) {
1608         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1609       }
1610     } else {
1611       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1612       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1613     }
1614     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1615 
1616     // Copy reference from internally held copy
1617     *assembled = NULL;
1618     *rstr      = NULL;
1619     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1620     CeedCall(CeedVectorDestroy(&assembled_vec));
1621     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1622     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1623   } else {
1624     // Operator fallback
1625     CeedOperator op_fallback;
1626 
1627     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1628     if (op_fallback) {
1629       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1630     } else {
1631       // LCOV_EXCL_START
1632       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1633       // LCOV_EXCL_STOP
1634     }
1635   }
1636 
1637   return CEED_ERROR_SUCCESS;
1638 }
1639 
1640 /**
1641   @brief Assemble the diagonal of a square linear CeedOperator
1642 
1643   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1644 
1645   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1646 
1647   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1648 
1649   @param[in]  op        CeedOperator to assemble CeedQFunction
1650   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1651   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1652 
1653   @return An error code: 0 - success, otherwise - failure
1654 
1655   @ref User
1656 **/
1657 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1658   bool is_composite;
1659   CeedCall(CeedOperatorCheckReady(op));
1660   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1661 
1662   CeedSize input_size = 0, output_size = 0;
1663   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1664   if (input_size != output_size) {
1665     // LCOV_EXCL_START
1666     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1667     // LCOV_EXCL_STOP
1668   }
1669 
1670   // Early exit for empty operator
1671   if (!is_composite) {
1672     CeedInt num_elem = 0;
1673 
1674     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1675     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1676   }
1677 
1678   if (op->LinearAssembleDiagonal) {
1679     // Backend version
1680     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1681     return CEED_ERROR_SUCCESS;
1682   } else if (op->LinearAssembleAddDiagonal) {
1683     // Backend version with zeroing first
1684     CeedCall(CeedVectorSetValue(assembled, 0.0));
1685     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1686     return CEED_ERROR_SUCCESS;
1687   } else {
1688     // Operator fallback
1689     CeedOperator op_fallback;
1690 
1691     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1692     if (op_fallback) {
1693       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1694       return CEED_ERROR_SUCCESS;
1695     }
1696   }
1697   // Default interface implementation
1698   CeedCall(CeedVectorSetValue(assembled, 0.0));
1699   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1700 
1701   return CEED_ERROR_SUCCESS;
1702 }
1703 
1704 /**
1705   @brief Assemble the diagonal of a square linear CeedOperator
1706 
1707   This sums into a CeedVector the diagonal of a linear CeedOperator.
1708 
1709   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1710 
1711   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1712 
1713   @param[in]  op        CeedOperator to assemble CeedQFunction
1714   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1715   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1716 
1717   @return An error code: 0 - success, otherwise - failure
1718 
1719   @ref User
1720 **/
1721 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1722   bool is_composite;
1723   CeedCall(CeedOperatorCheckReady(op));
1724   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1725 
1726   CeedSize input_size = 0, output_size = 0;
1727   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1728   if (input_size != output_size) {
1729     // LCOV_EXCL_START
1730     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1731     // LCOV_EXCL_STOP
1732   }
1733 
1734   // Early exit for empty operator
1735   if (!is_composite) {
1736     CeedInt num_elem = 0;
1737 
1738     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1739     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1740   }
1741 
1742   if (op->LinearAssembleAddDiagonal) {
1743     // Backend version
1744     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1745     return CEED_ERROR_SUCCESS;
1746   } else {
1747     // Operator fallback
1748     CeedOperator op_fallback;
1749 
1750     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1751     if (op_fallback) {
1752       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1753       return CEED_ERROR_SUCCESS;
1754     }
1755   }
1756   // Default interface implementation
1757   if (is_composite) {
1758     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1759   } else {
1760     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1761   }
1762 
1763   return CEED_ERROR_SUCCESS;
1764 }
1765 
1766 /**
1767   @brief Assemble the point block diagonal of a square linear CeedOperator
1768 
1769   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1770 
1771   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1772 
1773   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1774 
1775   @param[in]  op        CeedOperator to assemble CeedQFunction
1776   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1777 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1778 component in].
1779   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1780 
1781   @return An error code: 0 - success, otherwise - failure
1782 
1783   @ref User
1784 **/
1785 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1786   bool is_composite;
1787   CeedCall(CeedOperatorCheckReady(op));
1788   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1789 
1790   CeedSize input_size = 0, output_size = 0;
1791   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1792   if (input_size != output_size) {
1793     // LCOV_EXCL_START
1794     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1795     // LCOV_EXCL_STOP
1796   }
1797 
1798   // Early exit for empty operator
1799   if (!is_composite) {
1800     CeedInt num_elem = 0;
1801 
1802     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1803     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1804   }
1805 
1806   if (op->LinearAssemblePointBlockDiagonal) {
1807     // Backend version
1808     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1809     return CEED_ERROR_SUCCESS;
1810   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1811     // Backend version with zeroing first
1812     CeedCall(CeedVectorSetValue(assembled, 0.0));
1813     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1814     return CEED_ERROR_SUCCESS;
1815   } else {
1816     // Operator fallback
1817     CeedOperator op_fallback;
1818 
1819     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1820     if (op_fallback) {
1821       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1822       return CEED_ERROR_SUCCESS;
1823     }
1824   }
1825   // Default interface implementation
1826   CeedCall(CeedVectorSetValue(assembled, 0.0));
1827   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1828 
1829   return CEED_ERROR_SUCCESS;
1830 }
1831 
1832 /**
1833   @brief Assemble the point block diagonal of a square linear CeedOperator
1834 
1835   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1836 
1837   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1838 
1839   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1840 
1841   @param[in]  op        CeedOperator to assemble CeedQFunction
1842   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1843 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1844 component in].
1845   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1846 
1847   @return An error code: 0 - success, otherwise - failure
1848 
1849   @ref User
1850 **/
1851 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1852   bool is_composite;
1853   CeedCall(CeedOperatorCheckReady(op));
1854   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1855 
1856   CeedSize input_size = 0, output_size = 0;
1857   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1858   if (input_size != output_size) {
1859     // LCOV_EXCL_START
1860     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1861     // LCOV_EXCL_STOP
1862   }
1863 
1864   // Early exit for empty operator
1865   if (!is_composite) {
1866     CeedInt num_elem = 0;
1867 
1868     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1869     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1870   }
1871 
1872   if (op->LinearAssembleAddPointBlockDiagonal) {
1873     // Backend version
1874     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1875     return CEED_ERROR_SUCCESS;
1876   } else {
1877     // Operator fallback
1878     CeedOperator op_fallback;
1879 
1880     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1881     if (op_fallback) {
1882       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1883       return CEED_ERROR_SUCCESS;
1884     }
1885   }
1886   // Default interface implementation
1887   if (is_composite) {
1888     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1889   } else {
1890     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1891   }
1892 
1893   return CEED_ERROR_SUCCESS;
1894 }
1895 
1896 /**
1897    @brief Fully assemble the nonzero pattern of a linear operator.
1898 
1899    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1900 
1901    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1902 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1903 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1904 
1905    This will generally be slow unless your operator is low-order.
1906 
1907    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1908 
1909    @param[in]  op          CeedOperator to assemble
1910    @param[out] num_entries Number of entries in coordinate nonzero pattern
1911    @param[out] rows        Row number for each entry
1912    @param[out] cols        Column number for each entry
1913 
1914    @ref User
1915 **/
1916 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1917   CeedInt       num_suboperators, single_entries;
1918   CeedOperator *sub_operators;
1919   bool          is_composite;
1920   CeedCall(CeedOperatorCheckReady(op));
1921   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1922 
1923   if (op->LinearAssembleSymbolic) {
1924     // Backend version
1925     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1926     return CEED_ERROR_SUCCESS;
1927   } else {
1928     // Operator fallback
1929     CeedOperator op_fallback;
1930 
1931     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1932     if (op_fallback) {
1933       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1934       return CEED_ERROR_SUCCESS;
1935     }
1936   }
1937 
1938   // Default interface implementation
1939 
1940   // count entries and allocate rows, cols arrays
1941   *num_entries = 0;
1942   if (is_composite) {
1943     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1944     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1945     for (CeedInt k = 0; k < num_suboperators; ++k) {
1946       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1947       *num_entries += single_entries;
1948     }
1949   } else {
1950     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1951     *num_entries += single_entries;
1952   }
1953   CeedCall(CeedCalloc(*num_entries, rows));
1954   CeedCall(CeedCalloc(*num_entries, cols));
1955 
1956   // assemble nonzero locations
1957   CeedInt offset = 0;
1958   if (is_composite) {
1959     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1960     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1961     for (CeedInt k = 0; k < num_suboperators; ++k) {
1962       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1963       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1964       offset += single_entries;
1965     }
1966   } else {
1967     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1968   }
1969 
1970   return CEED_ERROR_SUCCESS;
1971 }
1972 
1973 /**
1974    @brief Fully assemble the nonzero entries of a linear operator.
1975 
1976    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1977 
1978    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1979 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1980 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1981 
1982    This will generally be slow unless your operator is low-order.
1983 
1984    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1985 
1986    @param[in]  op     CeedOperator to assemble
1987    @param[out] values Values to assemble into matrix
1988 
1989    @ref User
1990 **/
1991 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1992   CeedInt       num_suboperators, single_entries = 0;
1993   CeedOperator *sub_operators;
1994   bool          is_composite;
1995   CeedCall(CeedOperatorCheckReady(op));
1996   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1997 
1998   // Early exit for empty operator
1999   if (!is_composite) {
2000     CeedInt num_elem = 0;
2001 
2002     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2003     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2004   }
2005 
2006   if (op->LinearAssemble) {
2007     // Backend version
2008     CeedCall(op->LinearAssemble(op, values));
2009     return CEED_ERROR_SUCCESS;
2010   } else {
2011     // Operator fallback
2012     CeedOperator op_fallback;
2013 
2014     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2015     if (op_fallback) {
2016       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2017       return CEED_ERROR_SUCCESS;
2018     }
2019   }
2020 
2021   // Default interface implementation
2022   CeedInt offset = 0;
2023   CeedCall(CeedVectorSetValue(values, 0.0));
2024   if (is_composite) {
2025     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2026     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2027     for (CeedInt k = 0; k < num_suboperators; k++) {
2028       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2029       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2030       offset += single_entries;
2031     }
2032   } else {
2033     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2034   }
2035 
2036   return CEED_ERROR_SUCCESS;
2037 }
2038 
2039 /**
2040   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2041 
2042   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2043 
2044   @param[in]  op               Composite CeedOperator
2045   @param[in]  num_skip_indices Number of suboperators to skip
2046   @param[in]  skip_indices     Array of indices of suboperators to skip
2047   @param[out] mult             Vector to store multiplicity (of size l_size)
2048 
2049   @return An error code: 0 - success, otherwise - failure
2050 
2051   @ref User
2052 **/
2053 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2054   CeedCall(CeedOperatorCheckReady(op));
2055 
2056   Ceed                ceed;
2057   CeedInt             num_suboperators;
2058   CeedSize            l_vec_len;
2059   CeedScalar         *mult_array;
2060   CeedVector          ones_l_vec;
2061   CeedElemRestriction elem_rstr;
2062   CeedOperator       *sub_operators;
2063 
2064   CeedCall(CeedOperatorGetCeed(op, &ceed));
2065 
2066   // Zero mult vector
2067   CeedCall(CeedVectorSetValue(mult, 0.0));
2068 
2069   // Get suboperators
2070   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2071   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2072   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2073 
2074   // Work vector
2075   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2076   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2077   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2078   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2079 
2080   // Compute multiplicity across suboperators
2081   for (CeedInt i = 0; i < num_suboperators; i++) {
2082     const CeedScalar *sub_mult_array;
2083     CeedVector        sub_mult_l_vec, ones_e_vec;
2084 
2085     // -- Check for suboperator to skip
2086     for (CeedInt j = 0; j < num_skip_indices; j++) {
2087       if (skip_indices[j] == i) continue;
2088     }
2089 
2090     // -- Sub operator multiplicity
2091     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2092     CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2093     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2094     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2095     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2096     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2097     // ---- Flag every node present in the current suboperator
2098     for (CeedInt j = 0; j < l_vec_len; j++) {
2099       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2100     }
2101     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2102     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2103     CeedCall(CeedVectorDestroy(&ones_e_vec));
2104   }
2105   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2106   CeedCall(CeedVectorDestroy(&ones_l_vec));
2107 
2108   return CEED_ERROR_SUCCESS;
2109 }
2110 
2111 /**
2112   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2113 grid interpolation
2114 
2115   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2116 
2117   @param[in]  op_fine      Fine grid operator
2118   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2119   @param[in]  rstr_coarse  Coarse grid restriction
2120   @param[in]  basis_coarse Coarse grid active vector basis
2121   @param[out] op_coarse    Coarse grid operator
2122   @param[out] op_prolong   Coarse to fine operator, or NULL
2123   @param[out] op_restrict  Fine to coarse operator, or NULL
2124 
2125   @return An error code: 0 - success, otherwise - failure
2126 
2127   @ref User
2128 **/
2129 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2130                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2131   CeedCall(CeedOperatorCheckReady(op_fine));
2132 
2133   // Build prolongation matrix, if required
2134   CeedBasis basis_c_to_f = NULL;
2135   if (op_prolong || op_restrict) {
2136     CeedBasis basis_fine;
2137     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2138     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2139   }
2140 
2141   // Core code
2142   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2143 
2144   return CEED_ERROR_SUCCESS;
2145 }
2146 
2147 /**
2148   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2149 
2150   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2151 
2152   @param[in]  op_fine       Fine grid operator
2153   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2154   @param[in]  rstr_coarse   Coarse grid restriction
2155   @param[in]  basis_coarse  Coarse grid active vector basis
2156   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2157   @param[out] op_coarse     Coarse grid operator
2158   @param[out] op_prolong    Coarse to fine operator, or NULL
2159   @param[out] op_restrict   Fine to coarse operator, or NULL
2160 
2161   @return An error code: 0 - success, otherwise - failure
2162 
2163   @ref User
2164 **/
2165 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2166                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2167                                              CeedOperator *op_restrict) {
2168   CeedCall(CeedOperatorCheckReady(op_fine));
2169   Ceed ceed;
2170   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2171 
2172   // Check for compatible quadrature spaces
2173   CeedBasis basis_fine;
2174   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2175   CeedInt Q_f, Q_c;
2176   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2177   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2178   if (Q_f != Q_c) {
2179     // LCOV_EXCL_START
2180     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2181     // LCOV_EXCL_STOP
2182   }
2183 
2184   // Create coarse to fine basis, if required
2185   CeedBasis basis_c_to_f = NULL;
2186   if (op_prolong || op_restrict) {
2187     // Check if interpolation matrix is provided
2188     if (!interp_c_to_f) {
2189       // LCOV_EXCL_START
2190       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2191       // LCOV_EXCL_STOP
2192     }
2193     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2194     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2195     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2196     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2197     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2198     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2199     CeedScalar *q_ref, *q_weight, *grad;
2200     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2201     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2202     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2203     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2204     CeedCall(CeedFree(&q_ref));
2205     CeedCall(CeedFree(&q_weight));
2206     CeedCall(CeedFree(&grad));
2207   }
2208 
2209   // Core code
2210   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2211   return CEED_ERROR_SUCCESS;
2212 }
2213 
2214 /**
2215   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2216 
2217   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2218 
2219   @param[in]  op_fine       Fine grid operator
2220   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2221   @param[in]  rstr_coarse   Coarse grid restriction
2222   @param[in]  basis_coarse  Coarse grid active vector basis
2223   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2224   @param[out] op_coarse     Coarse grid operator
2225   @param[out] op_prolong    Coarse to fine operator, or NULL
2226   @param[out] op_restrict   Fine to coarse operator, or NULL
2227 
2228   @return An error code: 0 - success, otherwise - failure
2229 
2230   @ref User
2231 **/
2232 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2233                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2234                                        CeedOperator *op_restrict) {
2235   CeedCall(CeedOperatorCheckReady(op_fine));
2236   Ceed ceed;
2237   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2238 
2239   // Check for compatible quadrature spaces
2240   CeedBasis basis_fine;
2241   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2242   CeedInt Q_f, Q_c;
2243   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2244   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2245   if (Q_f != Q_c) {
2246     // LCOV_EXCL_START
2247     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2248     // LCOV_EXCL_STOP
2249   }
2250 
2251   // Coarse to fine basis
2252   CeedBasis basis_c_to_f = NULL;
2253   if (op_prolong || op_restrict) {
2254     // Check if interpolation matrix is provided
2255     if (!interp_c_to_f) {
2256       // LCOV_EXCL_START
2257       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2258       // LCOV_EXCL_STOP
2259     }
2260     CeedElemTopology topo;
2261     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2262     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2263     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2264     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2265     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2266     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2267     CeedScalar *q_ref, *q_weight, *grad;
2268     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2269     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2270     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2271     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2272     CeedCall(CeedFree(&q_ref));
2273     CeedCall(CeedFree(&q_weight));
2274     CeedCall(CeedFree(&grad));
2275   }
2276 
2277   // Core code
2278   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2279   return CEED_ERROR_SUCCESS;
2280 }
2281 
2282 /**
2283   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2284 
2285   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2286     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V.
2287     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T
2288 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
2289 
2290   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2291 
2292   @param[in]  op      CeedOperator to create element inverses
2293   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2294   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2295 
2296   @return An error code: 0 - success, otherwise - failure
2297 
2298   @ref User
2299 **/
2300 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2301   CeedCall(CeedOperatorCheckReady(op));
2302 
2303   if (op->CreateFDMElementInverse) {
2304     // Backend version
2305     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2306     return CEED_ERROR_SUCCESS;
2307   } else {
2308     // Operator fallback
2309     CeedOperator op_fallback;
2310 
2311     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2312     if (op_fallback) {
2313       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2314       return CEED_ERROR_SUCCESS;
2315     }
2316   }
2317 
2318   // Default interface implementation
2319   Ceed ceed, ceed_parent;
2320   CeedCall(CeedOperatorGetCeed(op, &ceed));
2321   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2322   ceed_parent = ceed_parent ? ceed_parent : ceed;
2323   CeedQFunction qf;
2324   CeedCall(CeedOperatorGetQFunction(op, &qf));
2325 
2326   // Determine active input basis
2327   bool                interp = false, grad = false;
2328   CeedBasis           basis = NULL;
2329   CeedElemRestriction rstr  = NULL;
2330   CeedOperatorField  *op_fields;
2331   CeedQFunctionField *qf_fields;
2332   CeedInt             num_input_fields;
2333   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2334   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2335   for (CeedInt i = 0; i < num_input_fields; i++) {
2336     CeedVector vec;
2337     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2338     if (vec == CEED_VECTOR_ACTIVE) {
2339       CeedEvalMode eval_mode;
2340       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2341       interp = interp || eval_mode == CEED_EVAL_INTERP;
2342       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2343       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2344       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2345     }
2346   }
2347   if (!basis) {
2348     // LCOV_EXCL_START
2349     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2350     // LCOV_EXCL_STOP
2351   }
2352   CeedSize l_size = 1;
2353   CeedInt  P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2354   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2355   CeedCall(CeedBasisGetNumNodes(basis, &elem_size));
2356   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2357   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2358   CeedCall(CeedBasisGetDimension(basis, &dim));
2359   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2360   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2361   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2362 
2363   // Build and diagonalize 1D Mass and Laplacian
2364   bool tensor_basis;
2365   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2366   if (!tensor_basis) {
2367     // LCOV_EXCL_START
2368     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2369     // LCOV_EXCL_STOP
2370   }
2371   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2372   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2373   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2374   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2375   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2376   CeedCall(CeedCalloc(P_1d, &lambda));
2377   // -- Build matrices
2378   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2379   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2380   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2381   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2382   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2383 
2384   // -- Diagonalize
2385   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2386   CeedCall(CeedFree(&mass));
2387   CeedCall(CeedFree(&laplace));
2388   for (CeedInt i = 0; i < P_1d; i++) {
2389     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2390   }
2391   CeedCall(CeedFree(&x));
2392 
2393   // Assemble QFunction
2394   CeedVector          assembled;
2395   CeedElemRestriction rstr_qf;
2396   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2397   CeedInt layout[3];
2398   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2399   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2400   CeedScalar max_norm = 0;
2401   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2402 
2403   // Calculate element averages
2404   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2405   CeedScalar       *elem_avg;
2406   const CeedScalar *assembled_array, *q_weight_array;
2407   CeedVector        q_weight;
2408   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2409   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2410   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2411   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2412   CeedCall(CeedCalloc(num_elem, &elem_avg));
2413   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2414   for (CeedInt e = 0; e < num_elem; e++) {
2415     CeedInt count = 0;
2416     for (CeedInt q = 0; q < num_qpts; q++) {
2417       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2418         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2419           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2420           count++;
2421         }
2422       }
2423     }
2424     if (count) {
2425       elem_avg[e] /= count;
2426     } else {
2427       elem_avg[e] = 1.0;
2428     }
2429   }
2430   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2431   CeedCall(CeedVectorDestroy(&assembled));
2432   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2433   CeedCall(CeedVectorDestroy(&q_weight));
2434 
2435   // Build FDM diagonal
2436   CeedVector  q_data;
2437   CeedScalar *q_data_array, *fdm_diagonal;
2438   CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal));
2439   const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON;
2440   for (CeedInt c = 0; c < num_comp; c++) {
2441     for (CeedInt n = 0; n < elem_size; n++) {
2442       if (interp) fdm_diagonal[c * elem_size + n] = 1.0;
2443       if (grad) {
2444         for (CeedInt d = 0; d < dim; d++) {
2445           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2446           fdm_diagonal[c * elem_size + n] += lambda[i];
2447         }
2448       }
2449       if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound;
2450     }
2451   }
2452   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data));
2453   CeedCall(CeedVectorSetValue(q_data, 0.0));
2454   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2455   for (CeedInt e = 0; e < num_elem; e++) {
2456     for (CeedInt c = 0; c < num_comp; c++) {
2457       for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]);
2458     }
2459   }
2460   CeedCall(CeedFree(&elem_avg));
2461   CeedCall(CeedFree(&fdm_diagonal));
2462   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2463 
2464   // Setup FDM operator
2465   // -- Basis
2466   CeedBasis   fdm_basis;
2467   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2468   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2469   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2470   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2471   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2472   CeedCall(CeedFree(&fdm_interp));
2473   CeedCall(CeedFree(&grad_dummy));
2474   CeedCall(CeedFree(&q_ref_dummy));
2475   CeedCall(CeedFree(&q_weight_dummy));
2476   CeedCall(CeedFree(&lambda));
2477 
2478   // -- Restriction
2479   CeedElemRestriction rstr_qd_i;
2480   CeedInt             strides[3] = {1, elem_size, elem_size * num_comp};
2481   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i));
2482   // -- QFunction
2483   CeedQFunction qf_fdm;
2484   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2485   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2486   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2487   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2488   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2489   // -- QFunction context
2490   CeedInt *num_comp_data;
2491   CeedCall(CeedCalloc(1, &num_comp_data));
2492   num_comp_data[0] = num_comp;
2493   CeedQFunctionContext ctx_fdm;
2494   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2495   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2496   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2497   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2498   // -- Operator
2499   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2500   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2501   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2502   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2503 
2504   // Cleanup
2505   CeedCall(CeedVectorDestroy(&q_data));
2506   CeedCall(CeedBasisDestroy(&fdm_basis));
2507   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2508   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2509 
2510   return CEED_ERROR_SUCCESS;
2511 }
2512 
2513 /// @}
2514