xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision 2c2ea1dbee80fceecd2c97f30b09f8c87820a53e)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed.h>
11 #include <ceed/backend.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  basis     CeedBasis from which to get the basis matrix
184   @param[in]  eval_mode Current basis evaluation mode
185   @param[in]  identity  Pointer to identity matrix
186   @param[out] basis_ptr Basis pointer to set
187 
188   @ref Developer
189 **/
190 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
191   switch (eval_mode) {
192     case CEED_EVAL_NONE:
193       *basis_ptr = identity;
194       break;
195     case CEED_EVAL_INTERP:
196       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
197       break;
198     case CEED_EVAL_GRAD:
199       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
200       break;
201     case CEED_EVAL_DIV:
202       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
203       break;
204     case CEED_EVAL_CURL:
205       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
206       break;
207     case CEED_EVAL_WEIGHT:
208       break;  // Caught by QF Assembly
209   }
210   assert(*basis_ptr != NULL);
211 
212   return CEED_ERROR_SUCCESS;
213 }
214 
215 /**
216   @brief Create point block restriction for active operator field
217 
218   @param[in]  rstr            Original CeedElemRestriction for active field
219   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
220 
221   @return An error code: 0 - success, otherwise - failure
222 
223   @ref Developer
224 **/
225 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
226   Ceed ceed;
227   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
228   const CeedInt *offsets;
229   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
230 
231   // Expand offsets
232   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
233   CeedSize l_size;
234   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
235   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
236   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
237   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
238   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
239   CeedInt shift = num_comp;
240   if (comp_stride != 1) shift *= num_comp;
241   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
242   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
243     pointblock_offsets[i] = offsets[i] * shift;
244   }
245 
246   // Create new restriction
247   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
248                                      pointblock_offsets, pointblock_rstr));
249 
250   // Cleanup
251   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
252 
253   return CEED_ERROR_SUCCESS;
254 }
255 
256 /**
257   @brief Core logic for assembling operator diagonal or point block diagonal
258 
259   @param[in]  op            CeedOperator to assemble point block diagonal
260   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
261   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
262   @param[out] assembled     CeedVector to store assembled diagonal
263 
264   @return An error code: 0 - success, otherwise - failure
265 
266   @ref Developer
267 **/
268 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
269   Ceed ceed;
270   CeedCall(CeedOperatorGetCeed(op, &ceed));
271 
272   // Assemble QFunction
273   CeedQFunction       qf;
274   const CeedScalar   *assembled_qf_array;
275   CeedVector          assembled_qf;
276   CeedElemRestriction assembled_elem_rstr;
277   CeedInt             num_input_fields, num_output_fields;
278   CeedInt             layout[3];
279 
280   CeedCall(CeedOperatorGetQFunction(op, &qf));
281   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
282   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
283   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout));
284   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
285   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
286 
287   // Get assembly data
288   CeedOperatorAssemblyData data;
289   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
290   CeedInt                 *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
291   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
292   CeedBasis               *active_bases;
293   CeedElemRestriction     *active_elem_rstrs;
294   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
295   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
296                                                 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components));
297   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL));
298   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs));
299 
300   // Loop over all active bases
301   for (CeedInt b = 0; b < num_active_bases; b++) {
302     // Assemble point block diagonal restriction, if needed
303     CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b];
304 
305     if (is_pointblock) {
306       CeedElemRestriction point_block_elem_rstr;
307 
308       CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr));
309       diag_elem_rstr = point_block_elem_rstr;
310     }
311 
312     // Create diagonal vector
313     CeedVector elem_diag;
314     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
315 
316     // Assemble element operator diagonals
317     CeedScalar *elem_diag_array;
318     CeedInt     num_elem, num_nodes, num_qpts, num_components;
319 
320     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
321     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
322     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
323     CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes));
324     CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components));
325     CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts));
326 
327     // Construct identity matrix for basis if required
328     bool        has_eval_none = false;
329     CeedScalar *identity      = NULL;
330     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
331       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
332     }
333     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
334       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
335     }
336     if (has_eval_none) {
337       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
338       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
339     }
340 
341     // Compute the diagonal of B^T D B
342     // Each element
343     for (CeedInt e = 0; e < num_elem; e++) {
344       // Each basis eval mode pair
345       CeedInt      d_out              = 0, q_comp_out;
346       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
347       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
348         const CeedScalar *B_t = NULL;
349         CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t);
350         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out));
351         if (q_comp_out > 1) {
352           if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
353           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
354         }
355         eval_mode_out_prev = eval_modes_out[b][e_out];
356 
357         CeedInt      d_in              = 0, q_comp_in;
358         CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
359         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
360           const CeedScalar *B = NULL;
361           CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B);
362           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in));
363           if (q_comp_in > 1) {
364             if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
365             else B = &B[(++d_in) * num_qpts * num_nodes];
366           }
367           eval_mode_in_prev = eval_modes_in[b][e_in];
368 
369           // Each component
370           for (CeedInt c_out = 0; c_out < num_components; c_out++) {
371             // Each qpt/node pair
372             for (CeedInt q = 0; q < num_qpts; q++) {
373               if (is_pointblock) {
374                 // Point Block Diagonal
375                 for (CeedInt c_in = 0; c_in < num_components; c_in++) {
376                   const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
377                   const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
378                   for (CeedInt n = 0; n < num_nodes; n++) {
379                     elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] +=
380                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
381                   }
382                 }
383               } else {
384                 // Diagonal Only
385                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
386                 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
387                 for (CeedInt n = 0; n < num_nodes; n++) {
388                   elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
389                 }
390               }
391             }
392           }
393         }
394       }
395     }
396     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
397 
398     // Assemble local operator diagonal
399     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
400 
401     // Cleanup
402     if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
403     CeedCall(CeedVectorDestroy(&elem_diag));
404     CeedCall(CeedFree(&identity));
405   }
406   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
407   CeedCall(CeedVectorDestroy(&assembled_qf));
408 
409   return CEED_ERROR_SUCCESS;
410 }
411 
412 /**
413   @brief Core logic for assembling composite operator diagonal
414 
415   @param[in]  op            CeedOperator to assemble point block diagonal
416   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
417   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
418   @param[out] assembled     CeedVector to store assembled diagonal
419 
420   @return An error code: 0 - success, otherwise - failure
421 
422   @ref Developer
423 **/
424 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
425                                                                  CeedVector assembled) {
426   CeedInt       num_sub;
427   CeedOperator *suboperators;
428   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
429   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
430   for (CeedInt i = 0; i < num_sub; i++) {
431     if (is_pointblock) {
432       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
433     } else {
434       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
435     }
436   }
437   return CEED_ERROR_SUCCESS;
438 }
439 
440 /**
441   @brief Build nonzero pattern for non-composite operator
442 
443   Users should generally use CeedOperatorLinearAssembleSymbolic()
444 
445   @param[in]  op     CeedOperator to assemble nonzero pattern
446   @param[in]  offset Offset for number of entries
447   @param[out] rows   Row number for each entry
448   @param[out] cols   Column number for each entry
449 
450   @return An error code: 0 - success, otherwise - failure
451 
452   @ref Developer
453 **/
454 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
455   Ceed ceed;
456   bool is_composite;
457   CeedCall(CeedOperatorGetCeed(op, &ceed));
458   CeedCall(CeedOperatorIsComposite(op, &is_composite));
459 
460   if (is_composite) {
461     // LCOV_EXCL_START
462     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
463     // LCOV_EXCL_STOP
464   }
465 
466   CeedSize num_nodes;
467   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
468   CeedElemRestriction rstr_in;
469   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
470   CeedInt num_elem, elem_size, num_comp;
471   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
472   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
473   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
474   CeedInt layout_er[3];
475   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
476 
477   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
478 
479   // Determine elem_dof relation
480   CeedVector index_vec;
481   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
482   CeedScalar *array;
483   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
484   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
485   CeedCall(CeedVectorRestoreArray(index_vec, &array));
486   CeedVector elem_dof;
487   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
488   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
489   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
490   const CeedScalar *elem_dof_a;
491   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
492   CeedCall(CeedVectorDestroy(&index_vec));
493 
494   // Determine i, j locations for element matrices
495   CeedInt count = 0;
496   for (CeedInt e = 0; e < num_elem; e++) {
497     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
498       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
499         for (CeedInt i = 0; i < elem_size; i++) {
500           for (CeedInt j = 0; j < elem_size; j++) {
501             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
502             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
503 
504             const CeedInt row = elem_dof_a[elem_dof_index_row];
505             const CeedInt col = elem_dof_a[elem_dof_index_col];
506 
507             rows[offset + count] = row;
508             cols[offset + count] = col;
509             count++;
510           }
511         }
512       }
513     }
514   }
515   if (count != local_num_entries) {
516     // LCOV_EXCL_START
517     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
518     // LCOV_EXCL_STOP
519   }
520   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
521   CeedCall(CeedVectorDestroy(&elem_dof));
522 
523   return CEED_ERROR_SUCCESS;
524 }
525 
526 /**
527   @brief Assemble nonzero entries for non-composite operator
528 
529   Users should generally use CeedOperatorLinearAssemble()
530 
531   @param[in]  op     CeedOperator to assemble
532   @param[in]  offset Offset for number of entries
533   @param[out] values Values to assemble into matrix
534 
535   @return An error code: 0 - success, otherwise - failure
536 
537   @ref Developer
538 **/
539 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
540   Ceed ceed;
541   bool is_composite;
542   CeedCall(CeedOperatorGetCeed(op, &ceed));
543   CeedCall(CeedOperatorIsComposite(op, &is_composite));
544 
545   if (is_composite) {
546     // LCOV_EXCL_START
547     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
548     // LCOV_EXCL_STOP
549   }
550 
551   // Early exit for empty operator
552   {
553     CeedInt num_elem = 0;
554 
555     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
556     if (num_elem == 0) return CEED_ERROR_SUCCESS;
557   }
558 
559   if (op->LinearAssembleSingle) {
560     // Backend version
561     CeedCall(op->LinearAssembleSingle(op, offset, values));
562     return CEED_ERROR_SUCCESS;
563   } else {
564     // Operator fallback
565     CeedOperator op_fallback;
566 
567     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
568     if (op_fallback) {
569       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
570       return CEED_ERROR_SUCCESS;
571     }
572   }
573 
574   // Assemble QFunction
575   CeedQFunction qf;
576   CeedCall(CeedOperatorGetQFunction(op, &qf));
577   CeedVector          assembled_qf;
578   CeedElemRestriction rstr_q;
579   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
580   CeedSize qf_length;
581   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
582 
583   CeedInt            num_input_fields, num_output_fields;
584   CeedOperatorField *input_fields;
585   CeedOperatorField *output_fields;
586   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
587 
588   // Get assembly data
589   CeedOperatorAssemblyData data;
590   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
591   const CeedEvalMode **eval_modes_in, **eval_modes_out;
592   CeedInt             *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
593   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out,
594                                                 &eval_modes_out, NULL, NULL));
595   CeedBasis *bases;
596   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL));
597   CeedBasis basis_in = bases[0];
598 
599   if (num_active_bases > 1) {
600     // LCOV_EXCL_START
601     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases");
602     // LCOV_EXCL_STOP
603   }
604   if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) {
605     // LCOV_EXCL_START
606     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
607     // LCOV_EXCL_STOP
608   }
609 
610   CeedElemRestriction active_rstr;
611   CeedInt             num_elem, elem_size, num_qpts, num_comp;
612   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
613   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
614   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
615   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
616   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
617 
618   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
619 
620   // loop over elements and put in data structure
621   const CeedScalar *assembled_qf_array;
622   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
623 
624   CeedInt layout_qf[3];
625   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
626   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
627 
628   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
629   const CeedScalar **B_mats_in, **B_mats_out;
630   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out));
631   const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0];
632   CeedScalar        BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]];
633   CeedScalar        elem_mat[elem_size * elem_size];
634   CeedInt           count = 0;
635   CeedScalar       *vals;
636   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
637   for (CeedInt e = 0; e < num_elem; e++) {
638     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
639       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
640         // Compute B^T*D
641         for (CeedInt n = 0; n < elem_size; n++) {
642           for (CeedInt q = 0; q < num_qpts; q++) {
643             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
644               const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in);
645               CeedScalar    sum       = 0.0;
646               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
647                 const CeedInt b_out_index     = (num_eval_modes_out[0] * q + e_out) * elem_size + n;
648                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out;
649                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
650                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
651               }
652               BTD_mat[btd_index] = sum;
653             }
654           }
655         }
656         // form element matrix itself (for each block component)
657         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0]));
658 
659         // put element matrix in coordinate data structure
660         for (CeedInt i = 0; i < elem_size; i++) {
661           for (CeedInt j = 0; j < elem_size; j++) {
662             vals[offset + count] = elem_mat[i * elem_size + j];
663             count++;
664           }
665         }
666       }
667     }
668   }
669   if (count != local_num_entries) {
670     // LCOV_EXCL_START
671     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
672     // LCOV_EXCL_STOP
673   }
674   CeedCall(CeedVectorRestoreArray(values, &vals));
675 
676   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
677   CeedCall(CeedVectorDestroy(&assembled_qf));
678 
679   return CEED_ERROR_SUCCESS;
680 }
681 
682 /**
683   @brief Count number of entries for assembled CeedOperator
684 
685   @param[in]  op          CeedOperator to assemble
686   @param[out] num_entries Number of entries in assembled representation
687 
688   @return An error code: 0 - success, otherwise - failure
689 
690   @ref Utility
691 **/
692 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
693   bool                is_composite;
694   CeedElemRestriction rstr;
695   CeedInt             num_elem, elem_size, num_comp;
696 
697   CeedCall(CeedOperatorIsComposite(op, &is_composite));
698   if (is_composite) {
699     // LCOV_EXCL_START
700     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
701     // LCOV_EXCL_STOP
702   }
703   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
704   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
705   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
706   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
707   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
708 
709   return CEED_ERROR_SUCCESS;
710 }
711 
712 /**
713   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
714 
715   @param[in]  op_fine      Fine grid operator
716   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
717   @param[in]  rstr_coarse  Coarse grid restriction
718   @param[in]  basis_coarse Coarse grid active vector basis
719   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
720   @param[out] op_coarse    Coarse grid operator
721   @param[out] op_prolong   Coarse to fine operator, or NULL
722   @param[out] op_restrict  Fine to coarse operator, or NULL
723 
724   @return An error code: 0 - success, otherwise - failure
725 
726   @ref Developer
727 **/
728 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
729                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
730   Ceed       ceed;
731   CeedVector mult_vec = NULL;
732   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
733 
734   // Check for composite operator
735   bool is_composite;
736   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
737   if (is_composite) {
738     // LCOV_EXCL_START
739     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
740     // LCOV_EXCL_STOP
741   }
742 
743   // Coarse Grid
744   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
745   CeedElemRestriction rstr_fine = NULL;
746   // -- Clone input fields
747   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
748     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
749       rstr_fine = op_fine->input_fields[i]->elem_rstr;
750       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
751     } else {
752       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
753                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
754     }
755   }
756   // -- Clone output fields
757   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
758     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
759       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
760     } else {
761       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
762                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
763     }
764   }
765   // -- Clone QFunctionAssemblyData
766   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
767 
768   // Multiplicity vector
769   if (op_restrict || op_prolong) {
770     CeedVector mult_e_vec;
771 
772     if (!p_mult_fine) {
773       // LCOV_EXCL_START
774       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
775       // LCOV_EXCL_STOP
776     }
777     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
778     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
779     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
780     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
781     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
782     CeedCall(CeedVectorDestroy(&mult_e_vec));
783     CeedCall(CeedVectorReciprocal(mult_vec));
784   }
785 
786   // Clone name
787   bool   has_name = op_fine->name;
788   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
789   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
790 
791   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
792   if ((op_restrict || op_prolong) && !basis_c_to_f) {
793     // LCOV_EXCL_START
794     return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis");
795     // LCOV_EXCL_STOP
796   }
797 
798   // Restriction/Prolongation Operators
799   CeedInt num_comp;
800   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
801 
802   // Restriction
803   if (op_restrict) {
804     CeedInt             *num_comp_r_data;
805     CeedQFunction        qf_restrict;
806     CeedQFunctionContext ctx_r;
807 
808     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
809     CeedCall(CeedCalloc(1, &num_comp_r_data));
810     num_comp_r_data[0] = num_comp;
811     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
812     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
813     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
814     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
815     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
816     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
817     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
818     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
819 
820     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
821     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
822     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
823     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
824 
825     // Set name
826     char *restriction_name;
827     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
828     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
829     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
830     CeedCall(CeedFree(&restriction_name));
831 
832     // Check
833     CeedCall(CeedOperatorCheckReady(*op_restrict));
834 
835     // Cleanup
836     CeedCall(CeedQFunctionDestroy(&qf_restrict));
837   }
838 
839   // Prolongation
840   if (op_prolong) {
841     CeedInt             *num_comp_p_data;
842     CeedQFunction        qf_prolong;
843     CeedQFunctionContext ctx_p;
844 
845     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
846     CeedCall(CeedCalloc(1, &num_comp_p_data));
847     num_comp_p_data[0] = num_comp;
848     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
849     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
850     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
851     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
852     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
853     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
854     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
855     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
856 
857     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
858     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
859     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
860     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
861 
862     // Set name
863     char *prolongation_name;
864     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
865     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
866     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
867     CeedCall(CeedFree(&prolongation_name));
868 
869     // Check
870     CeedCall(CeedOperatorCheckReady(*op_prolong));
871 
872     // Cleanup
873     CeedCall(CeedQFunctionDestroy(&qf_prolong));
874   }
875 
876   // Check
877   CeedCall(CeedOperatorCheckReady(*op_coarse));
878 
879   // Cleanup
880   CeedCall(CeedVectorDestroy(&mult_vec));
881   CeedCall(CeedBasisDestroy(&basis_c_to_f));
882 
883   return CEED_ERROR_SUCCESS;
884 }
885 
886 /**
887   @brief Build 1D mass matrix and Laplacian with perturbation
888 
889   @param[in]  interp_1d   Interpolation matrix in one dimension
890   @param[in]  grad_1d     Gradient matrix in one dimension
891   @param[in]  q_weight_1d Quadrature weights in one dimension
892   @param[in]  P_1d        Number of basis nodes in one dimension
893   @param[in]  Q_1d        Number of quadrature points in one dimension
894   @param[in]  dim         Dimension of basis
895   @param[out] mass        Assembled mass matrix in one dimension
896   @param[out] laplace     Assembled perturbed Laplacian in one dimension
897 
898   @return An error code: 0 - success, otherwise - failure
899 
900   @ref Developer
901 **/
902 CeedPragmaOptimizeOff
903 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
904                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
905   for (CeedInt i = 0; i < P_1d; i++) {
906     for (CeedInt j = 0; j < P_1d; j++) {
907       CeedScalar sum = 0.0;
908       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
909       mass[i + j * P_1d] = sum;
910     }
911   }
912   // -- Laplacian
913   for (CeedInt i = 0; i < P_1d; i++) {
914     for (CeedInt j = 0; j < P_1d; j++) {
915       CeedScalar sum = 0.0;
916       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
917       laplace[i + j * P_1d] = sum;
918     }
919   }
920   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
921   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
922   return CEED_ERROR_SUCCESS;
923 }
924 CeedPragmaOptimizeOn
925 
926 /// @}
927 
928 /// ----------------------------------------------------------------------------
929 /// CeedOperator Backend API
930 /// ----------------------------------------------------------------------------
931 /// @addtogroup CeedOperatorBackend
932 /// @{
933 
934 /**
935   @brief Create object holding CeedQFunction assembly data for CeedOperator
936 
937   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
938   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
939 
940   @return An error code: 0 - success, otherwise - failure
941 
942   @ref Backend
943 **/
944 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
945   CeedCall(CeedCalloc(1, data));
946   (*data)->ref_count = 1;
947   (*data)->ceed      = ceed;
948   CeedCall(CeedReference(ceed));
949 
950   return CEED_ERROR_SUCCESS;
951 }
952 
953 /**
954   @brief Increment the reference counter for a CeedQFunctionAssemblyData
955 
956   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
957 
958   @return An error code: 0 - success, otherwise - failure
959 
960   @ref Backend
961 **/
962 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
963   data->ref_count++;
964   return CEED_ERROR_SUCCESS;
965 }
966 
967 /**
968   @brief Set re-use of CeedQFunctionAssemblyData
969 
970   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
971   @param[in]     reuse_data Boolean flag indicating data re-use
972 
973   @return An error code: 0 - success, otherwise - failure
974 
975   @ref Backend
976 **/
977 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
978   data->reuse_data        = reuse_data;
979   data->needs_data_update = true;
980   return CEED_ERROR_SUCCESS;
981 }
982 
983 /**
984   @brief Mark QFunctionAssemblyData as stale
985 
986   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
987   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
988 
989   @return An error code: 0 - success, otherwise - failure
990 
991   @ref Backend
992 **/
993 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
994   data->needs_data_update = needs_data_update;
995   return CEED_ERROR_SUCCESS;
996 }
997 
998 /**
999   @brief Determine if QFunctionAssemblyData needs update
1000 
1001   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
1002   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1003 
1004   @return An error code: 0 - success, otherwise - failure
1005 
1006   @ref Backend
1007 **/
1008 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1009   *is_update_needed = !data->reuse_data || data->needs_data_update;
1010   return CEED_ERROR_SUCCESS;
1011 }
1012 
1013 /**
1014   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1015            Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1016 
1017            Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
1018              CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
1019              CeedQFunctionAssemblyData.
1020 
1021   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1022   @param[in,out] data_copy Variable to store copied reference
1023 
1024   @return An error code: 0 - success, otherwise - failure
1025 
1026   @ref Backend
1027 **/
1028 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1029   CeedCall(CeedQFunctionAssemblyDataReference(data));
1030   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1031   *data_copy = data;
1032   return CEED_ERROR_SUCCESS;
1033 }
1034 
1035 /**
1036   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1037 
1038   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1039   @param[out] is_setup Boolean flag for setup status
1040 
1041   @return An error code: 0 - success, otherwise - failure
1042 
1043   @ref Backend
1044 **/
1045 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1046   *is_setup = data->is_setup;
1047   return CEED_ERROR_SUCCESS;
1048 }
1049 
1050 /**
1051   @brief Set internal objects for CeedQFunctionAssemblyData
1052 
1053   @param[in,out] data CeedQFunctionAssemblyData to set objects
1054   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1055   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1056 
1057   @return An error code: 0 - success, otherwise - failure
1058 
1059   @ref Backend
1060 **/
1061 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1062   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1063   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1064 
1065   data->is_setup = true;
1066   return CEED_ERROR_SUCCESS;
1067 }
1068 
1069 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1070   if (!data->is_setup) {
1071     // LCOV_EXCL_START
1072     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1073     // LCOV_EXCL_STOP
1074   }
1075 
1076   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1077   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1078 
1079   return CEED_ERROR_SUCCESS;
1080 }
1081 
1082 /**
1083   @brief Destroy CeedQFunctionAssemblyData
1084 
1085   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1086 
1087   @return An error code: 0 - success, otherwise - failure
1088 
1089   @ref Backend
1090 **/
1091 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1092   if (!*data || --(*data)->ref_count > 0) {
1093     *data = NULL;
1094     return CEED_ERROR_SUCCESS;
1095   }
1096   CeedCall(CeedDestroy(&(*data)->ceed));
1097   CeedCall(CeedVectorDestroy(&(*data)->vec));
1098   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1099 
1100   CeedCall(CeedFree(data));
1101   return CEED_ERROR_SUCCESS;
1102 }
1103 
1104 /**
1105   @brief Get CeedOperatorAssemblyData
1106 
1107   @param[in]  op   CeedOperator to assemble
1108   @param[out] data CeedQFunctionAssemblyData
1109 
1110   @return An error code: 0 - success, otherwise - failure
1111 
1112   @ref Backend
1113 **/
1114 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1115   if (!op->op_assembled) {
1116     CeedOperatorAssemblyData data;
1117 
1118     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1119     op->op_assembled = data;
1120   }
1121   *data = op->op_assembled;
1122 
1123   return CEED_ERROR_SUCCESS;
1124 }
1125 
1126 /**
1127   @brief Create object holding CeedOperator assembly data.
1128 
1129     The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1130     An array with references to the corresponding active CeedElemRestrictions is also stored.
1131     For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1132     The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1133       CeedEvalMode.
1134     The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1135     Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1136 
1137   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1138   @param[in]  op   CeedOperator to be assembled
1139   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1140 
1141   @return An error code: 0 - success, otherwise - failure
1142 
1143   @ref Backend
1144 **/
1145 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1146   CeedInt num_active_bases = 0;
1147 
1148   // Allocate
1149   CeedCall(CeedCalloc(1, data));
1150   (*data)->ceed = ceed;
1151   CeedCall(CeedReference(ceed));
1152 
1153   // Build OperatorAssembly data
1154   CeedQFunction       qf;
1155   CeedQFunctionField *qf_fields;
1156   CeedOperatorField  *op_fields;
1157   CeedInt             num_input_fields;
1158   CeedCall(CeedOperatorGetQFunction(op, &qf));
1159   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1160   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1161 
1162   // Determine active input basis
1163   CeedInt       *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0;
1164   CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL;
1165   CeedSize     **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1166   for (CeedInt i = 0; i < num_input_fields; i++) {
1167     CeedVector vec;
1168     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1169     if (vec == CEED_VECTOR_ACTIVE) {
1170       CeedBasis    basis_in = NULL;
1171       CeedEvalMode eval_mode;
1172       CeedInt      index = -1, dim, num_comp, q_comp;
1173       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1174       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1175       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1176       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1177       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1178       for (CeedInt i = 0; i < num_active_bases; i++) {
1179         if ((*data)->active_bases[i] == basis_in) index = i;
1180       }
1181       if (index == -1) {
1182         CeedElemRestriction elem_rstr_in;
1183         index = num_active_bases;
1184         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1185         (*data)->active_bases[num_active_bases] = NULL;
1186         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases]));
1187         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1188         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1189         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1190         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases]));
1191         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1192         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1193         num_eval_modes_in[index]  = 0;
1194         num_eval_modes_out[index] = 0;
1195         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1196         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1197         eval_modes_in[index]  = NULL;
1198         eval_modes_out[index] = NULL;
1199         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1200         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1201         eval_mode_offsets_in[index]  = NULL;
1202         eval_mode_offsets_out[index] = NULL;
1203         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1204         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1205         (*data)->assembled_bases_in[index]  = NULL;
1206         (*data)->assembled_bases_out[index] = NULL;
1207         num_active_bases++;
1208       }
1209       if (eval_mode != CEED_EVAL_WEIGHT) {
1210         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1211         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1212         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1213         for (CeedInt d = 0; d < q_comp; d++) {
1214           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1215           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1216           offset += num_comp;
1217         }
1218         num_eval_modes_in[index] += q_comp;
1219       }
1220     }
1221   }
1222   (*data)->num_eval_modes_in    = num_eval_modes_in;
1223   (*data)->eval_modes_in        = eval_modes_in;
1224   (*data)->eval_mode_offsets_in = eval_mode_offsets_in;
1225 
1226   // Determine active output basis
1227   CeedInt num_output_fields;
1228   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1229   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1230   offset = 0;
1231   for (CeedInt i = 0; i < num_output_fields; i++) {
1232     CeedVector vec;
1233     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1234     if (vec == CEED_VECTOR_ACTIVE) {
1235       CeedBasis    basis_out = NULL;
1236       CeedEvalMode eval_mode;
1237       CeedInt      index = -1, dim, num_comp, q_comp;
1238       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1239       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1240       CeedCall(CeedBasisGetDimension(basis_out, &dim));
1241       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1242       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1243       for (CeedInt i = 0; i < num_active_bases; i++) {
1244         if ((*data)->active_bases[i] == basis_out) index = i;
1245       }
1246       if (index == -1) {
1247         CeedElemRestriction elem_rstr_out;
1248 
1249         index = num_active_bases;
1250         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1251         (*data)->active_bases[num_active_bases] = NULL;
1252         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases]));
1253         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1254         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1255         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1256         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases]));
1257         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1258         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1259         num_eval_modes_in[index]  = 0;
1260         num_eval_modes_out[index] = 0;
1261         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1262         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1263         eval_modes_in[index]  = NULL;
1264         eval_modes_out[index] = NULL;
1265         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1266         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1267         eval_mode_offsets_in[index]  = NULL;
1268         eval_mode_offsets_out[index] = NULL;
1269         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1270         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1271         (*data)->assembled_bases_in[index]  = NULL;
1272         (*data)->assembled_bases_out[index] = NULL;
1273         num_active_bases++;
1274       }
1275       if (eval_mode != CEED_EVAL_WEIGHT) {
1276         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1277         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1278         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1279         for (CeedInt d = 0; d < q_comp; d++) {
1280           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1281           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1282           offset += num_comp;
1283         }
1284         num_eval_modes_out[index] += q_comp;
1285       }
1286     }
1287   }
1288   (*data)->num_output_components = offset;
1289   (*data)->num_eval_modes_out    = num_eval_modes_out;
1290   (*data)->eval_modes_out        = eval_modes_out;
1291   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1292   (*data)->num_active_bases      = num_active_bases;
1293 
1294   return CEED_ERROR_SUCCESS;
1295 }
1296 
1297 /**
1298   @brief Get CeedOperator CeedEvalModes for assembly.
1299 
1300     Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1301 
1302   @param[in]  data                  CeedOperatorAssemblyData
1303   @param[out] num_active_bases      Total number of active bases
1304   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1305                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1306   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1307   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1308   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1309   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1310   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1311   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1312                                       including contributions of all active bases
1313 
1314   @return An error code: 0 - success, otherwise - failure
1315 
1316 
1317   @ref Backend
1318 **/
1319 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in,
1320                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out,
1321                                          const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) {
1322   if (num_active_bases) *num_active_bases = data->num_active_bases;
1323   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1324   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1325   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1326   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1327   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1328   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1329   if (num_output_components) *num_output_components = data->num_output_components;
1330 
1331   return CEED_ERROR_SUCCESS;
1332 }
1333 
1334 /**
1335   @brief Get CeedOperator CeedBasis data for assembly.
1336 
1337     Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1338 
1339   @param[in]  data                CeedOperatorAssemblyData
1340   @param[out] num_active_bases    Number of active bases, or NULL
1341   @param[out] active_bases        Pointer to hold active CeedBasis, or NULL
1342   @param[out] assembled_bases_in  Pointer to hold assembled active input B, or NULL
1343   @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL
1344 
1345   @return An error code: 0 - success, otherwise - failure
1346 
1347   @ref Backend
1348 **/
1349 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases,
1350                                      const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) {
1351   // Assemble B_in, B_out if needed
1352   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1353     CeedInt num_qpts;
1354 
1355     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1356     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1357       CeedInt     num_nodes;
1358       CeedScalar *B_in = NULL, *identity = NULL;
1359       bool        has_eval_none = false;
1360 
1361       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1362       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1363 
1364       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1365         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1366       }
1367       if (has_eval_none) {
1368         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1369         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1370           identity[i * num_nodes + i] = 1.0;
1371         }
1372       }
1373 
1374       for (CeedInt q = 0; q < num_qpts; q++) {
1375         for (CeedInt n = 0; n < num_nodes; n++) {
1376           CeedInt      d_in              = 0, q_comp_in;
1377           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1378           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1379             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1380             const CeedScalar *B  = NULL;
1381             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B);
1382             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in));
1383             if (q_comp_in > 1) {
1384               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1385               else B = &B[(++d_in) * num_qpts * num_nodes];
1386             }
1387             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1388             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1389           }
1390         }
1391       }
1392       if (identity) CeedCall(CeedFree(identity));
1393       data->assembled_bases_in[b] = B_in;
1394     }
1395   }
1396 
1397   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1398     CeedInt num_qpts;
1399 
1400     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1401     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1402       CeedInt     num_nodes;
1403       bool        has_eval_none = false;
1404       CeedScalar *B_out = NULL, *identity = NULL;
1405 
1406       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1407       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1408 
1409       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1410         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1411       }
1412       if (has_eval_none) {
1413         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1414         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1415           identity[i * num_nodes + i] = 1.0;
1416         }
1417       }
1418 
1419       for (CeedInt q = 0; q < num_qpts; q++) {
1420         for (CeedInt n = 0; n < num_nodes; n++) {
1421           CeedInt      d_out              = 0, q_comp_out;
1422           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1423           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1424             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1425             const CeedScalar *B  = NULL;
1426             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B);
1427             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out));
1428             if (q_comp_out > 1) {
1429               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1430               else B = &B[(++d_out) * num_qpts * num_nodes];
1431             }
1432             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1433             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1434           }
1435         }
1436       }
1437       if (identity) CeedCall(CeedFree(identity));
1438       data->assembled_bases_out[b] = B_out;
1439     }
1440   }
1441 
1442   // Pass out assembled data
1443   if (active_bases) *active_bases = data->active_bases;
1444   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1445   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1446 
1447   return CEED_ERROR_SUCCESS;
1448 }
1449 
1450 /**
1451   @brief Get CeedOperator CeedBasis data for assembly.
1452 
1453   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1454 
1455   @param[in]  data                  CeedOperatorAssemblyData
1456   @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL
1457   @param[out] active_elem_rstrs     Pointer to hold active CeedElemRestrictions, or NULL
1458 
1459   @return An error code: 0 - success, otherwise - failure
1460 
1461   @ref Backend
1462 **/
1463 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs,
1464                                                 CeedElemRestriction **active_elem_rstrs) {
1465   if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases;
1466   if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs;
1467 
1468   return CEED_ERROR_SUCCESS;
1469 }
1470 
1471 /**
1472   @brief Destroy CeedOperatorAssemblyData
1473 
1474   @param[in,out] data CeedOperatorAssemblyData to destroy
1475 
1476   @return An error code: 0 - success, otherwise - failure
1477 
1478   @ref Backend
1479 **/
1480 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1481   if (!*data) {
1482     *data = NULL;
1483     return CEED_ERROR_SUCCESS;
1484   }
1485   CeedCall(CeedDestroy(&(*data)->ceed));
1486   for (CeedInt b = 0; b < (*data)->num_active_bases; b++) {
1487     CeedCall(CeedBasisDestroy(&(*data)->active_bases[b]));
1488     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b]));
1489     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1490     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1491     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1492     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1493     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1494     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1495   }
1496   CeedCall(CeedFree(&(*data)->active_bases));
1497   CeedCall(CeedFree(&(*data)->active_elem_rstrs));
1498   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1499   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1500   CeedCall(CeedFree(&(*data)->eval_modes_in));
1501   CeedCall(CeedFree(&(*data)->eval_modes_out));
1502   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1503   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1504   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1505   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1506 
1507   CeedCall(CeedFree(data));
1508   return CEED_ERROR_SUCCESS;
1509 }
1510 
1511 /// @}
1512 
1513 /// ----------------------------------------------------------------------------
1514 /// CeedOperator Public API
1515 /// ----------------------------------------------------------------------------
1516 /// @addtogroup CeedOperatorUser
1517 /// @{
1518 
1519 /**
1520   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1521 
1522   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1523     The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1524 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1525 
1526   Inputs and outputs are in the order provided by the
1527 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1528 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1529 and producing the output [dv_0, dv_1, v].
1530 
1531   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1532 
1533   @param[in]  op        CeedOperator to assemble CeedQFunction
1534   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1535   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1536   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1537 
1538   @return An error code: 0 - success, otherwise - failure
1539 
1540   @ref User
1541 **/
1542 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1543   CeedCall(CeedOperatorCheckReady(op));
1544 
1545   if (op->LinearAssembleQFunction) {
1546     // Backend version
1547     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1548   } else {
1549     // Operator fallback
1550     CeedOperator op_fallback;
1551 
1552     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1553     if (op_fallback) {
1554       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1555     } else {
1556       // LCOV_EXCL_START
1557       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1558       // LCOV_EXCL_STOP
1559     }
1560   }
1561   return CEED_ERROR_SUCCESS;
1562 }
1563 
1564 /**
1565   @brief Assemble CeedQFunction and store result internally.
1566            Return copied references of stored data to the caller.
1567            Caller is responsible for ownership and destruction of the copied references.
1568            See also @ref CeedOperatorLinearAssembleQFunction
1569 
1570   @param[in]  op        CeedOperator to assemble CeedQFunction
1571   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1572   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1573   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1574 
1575   @return An error code: 0 - success, otherwise - failure
1576 
1577   @ref User
1578 **/
1579 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1580   CeedCall(CeedOperatorCheckReady(op));
1581 
1582   if (op->LinearAssembleQFunctionUpdate) {
1583     // Backend version
1584     bool                qf_assembled_is_setup;
1585     CeedVector          assembled_vec  = NULL;
1586     CeedElemRestriction assembled_rstr = NULL;
1587 
1588     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1589     if (qf_assembled_is_setup) {
1590       bool update_needed;
1591 
1592       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1593       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1594       if (update_needed) {
1595         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1596       }
1597     } else {
1598       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1599       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1600     }
1601     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1602 
1603     // Copy reference from internally held copy
1604     *assembled = NULL;
1605     *rstr      = NULL;
1606     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1607     CeedCall(CeedVectorDestroy(&assembled_vec));
1608     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1609     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1610   } else {
1611     // Operator fallback
1612     CeedOperator op_fallback;
1613 
1614     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1615     if (op_fallback) {
1616       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1617     } else {
1618       // LCOV_EXCL_START
1619       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1620       // LCOV_EXCL_STOP
1621     }
1622   }
1623 
1624   return CEED_ERROR_SUCCESS;
1625 }
1626 
1627 /**
1628   @brief Assemble the diagonal of a square linear CeedOperator
1629 
1630   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1631 
1632   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1633 
1634   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1635 
1636   @param[in]  op        CeedOperator to assemble CeedQFunction
1637   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1638   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1639 
1640   @return An error code: 0 - success, otherwise - failure
1641 
1642   @ref User
1643 **/
1644 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1645   bool is_composite;
1646   CeedCall(CeedOperatorCheckReady(op));
1647   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1648 
1649   CeedSize input_size = 0, output_size = 0;
1650   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1651   if (input_size != output_size) {
1652     // LCOV_EXCL_START
1653     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1654     // LCOV_EXCL_STOP
1655   }
1656 
1657   // Early exit for empty operator
1658   if (!is_composite) {
1659     CeedInt num_elem = 0;
1660 
1661     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1662     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1663   }
1664 
1665   if (op->LinearAssembleDiagonal) {
1666     // Backend version
1667     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1668     return CEED_ERROR_SUCCESS;
1669   } else if (op->LinearAssembleAddDiagonal) {
1670     // Backend version with zeroing first
1671     CeedCall(CeedVectorSetValue(assembled, 0.0));
1672     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1673     return CEED_ERROR_SUCCESS;
1674   } else {
1675     // Operator fallback
1676     CeedOperator op_fallback;
1677 
1678     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1679     if (op_fallback) {
1680       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1681       return CEED_ERROR_SUCCESS;
1682     }
1683   }
1684   // Default interface implementation
1685   CeedCall(CeedVectorSetValue(assembled, 0.0));
1686   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1687 
1688   return CEED_ERROR_SUCCESS;
1689 }
1690 
1691 /**
1692   @brief Assemble the diagonal of a square linear CeedOperator
1693 
1694   This sums into a CeedVector the diagonal of a linear CeedOperator.
1695 
1696   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1697 
1698   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1699 
1700   @param[in]  op        CeedOperator to assemble CeedQFunction
1701   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1702   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1703 
1704   @return An error code: 0 - success, otherwise - failure
1705 
1706   @ref User
1707 **/
1708 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1709   bool is_composite;
1710   CeedCall(CeedOperatorCheckReady(op));
1711   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1712 
1713   CeedSize input_size = 0, output_size = 0;
1714   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1715   if (input_size != output_size) {
1716     // LCOV_EXCL_START
1717     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1718     // LCOV_EXCL_STOP
1719   }
1720 
1721   // Early exit for empty operator
1722   if (!is_composite) {
1723     CeedInt num_elem = 0;
1724 
1725     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1726     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1727   }
1728 
1729   if (op->LinearAssembleAddDiagonal) {
1730     // Backend version
1731     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1732     return CEED_ERROR_SUCCESS;
1733   } else {
1734     // Operator fallback
1735     CeedOperator op_fallback;
1736 
1737     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1738     if (op_fallback) {
1739       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1740       return CEED_ERROR_SUCCESS;
1741     }
1742   }
1743   // Default interface implementation
1744   if (is_composite) {
1745     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1746   } else {
1747     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1748   }
1749 
1750   return CEED_ERROR_SUCCESS;
1751 }
1752 
1753 /**
1754   @brief Assemble the point block diagonal of a square linear CeedOperator
1755 
1756   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1757 
1758   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1759 
1760   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1761 
1762   @param[in]  op        CeedOperator to assemble CeedQFunction
1763   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1764 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1765 component in].
1766   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1767 
1768   @return An error code: 0 - success, otherwise - failure
1769 
1770   @ref User
1771 **/
1772 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1773   bool is_composite;
1774   CeedCall(CeedOperatorCheckReady(op));
1775   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1776 
1777   CeedSize input_size = 0, output_size = 0;
1778   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1779   if (input_size != output_size) {
1780     // LCOV_EXCL_START
1781     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1782     // LCOV_EXCL_STOP
1783   }
1784 
1785   // Early exit for empty operator
1786   if (!is_composite) {
1787     CeedInt num_elem = 0;
1788 
1789     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1790     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1791   }
1792 
1793   if (op->LinearAssemblePointBlockDiagonal) {
1794     // Backend version
1795     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1796     return CEED_ERROR_SUCCESS;
1797   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1798     // Backend version with zeroing first
1799     CeedCall(CeedVectorSetValue(assembled, 0.0));
1800     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1801     return CEED_ERROR_SUCCESS;
1802   } else {
1803     // Operator fallback
1804     CeedOperator op_fallback;
1805 
1806     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1807     if (op_fallback) {
1808       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1809       return CEED_ERROR_SUCCESS;
1810     }
1811   }
1812   // Default interface implementation
1813   CeedCall(CeedVectorSetValue(assembled, 0.0));
1814   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1815 
1816   return CEED_ERROR_SUCCESS;
1817 }
1818 
1819 /**
1820   @brief Assemble the point block diagonal of a square linear CeedOperator
1821 
1822   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1823 
1824   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1825 
1826   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1827 
1828   @param[in]  op        CeedOperator to assemble CeedQFunction
1829   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1830 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1831 component in].
1832   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1833 
1834   @return An error code: 0 - success, otherwise - failure
1835 
1836   @ref User
1837 **/
1838 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1839   bool is_composite;
1840   CeedCall(CeedOperatorCheckReady(op));
1841   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1842 
1843   CeedSize input_size = 0, output_size = 0;
1844   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1845   if (input_size != output_size) {
1846     // LCOV_EXCL_START
1847     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1848     // LCOV_EXCL_STOP
1849   }
1850 
1851   // Early exit for empty operator
1852   if (!is_composite) {
1853     CeedInt num_elem = 0;
1854 
1855     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1856     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1857   }
1858 
1859   if (op->LinearAssembleAddPointBlockDiagonal) {
1860     // Backend version
1861     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1862     return CEED_ERROR_SUCCESS;
1863   } else {
1864     // Operator fallback
1865     CeedOperator op_fallback;
1866 
1867     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1868     if (op_fallback) {
1869       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1870       return CEED_ERROR_SUCCESS;
1871     }
1872   }
1873   // Default interface implementation
1874   if (is_composite) {
1875     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1876   } else {
1877     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1878   }
1879 
1880   return CEED_ERROR_SUCCESS;
1881 }
1882 
1883 /**
1884    @brief Fully assemble the nonzero pattern of a linear operator.
1885 
1886    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1887 
1888    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1889 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1890 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1891 
1892    This will generally be slow unless your operator is low-order.
1893 
1894    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1895 
1896    @param[in]  op          CeedOperator to assemble
1897    @param[out] num_entries Number of entries in coordinate nonzero pattern
1898    @param[out] rows        Row number for each entry
1899    @param[out] cols        Column number for each entry
1900 
1901    @ref User
1902 **/
1903 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1904   CeedInt       num_suboperators, single_entries;
1905   CeedOperator *sub_operators;
1906   bool          is_composite;
1907   CeedCall(CeedOperatorCheckReady(op));
1908   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1909 
1910   if (op->LinearAssembleSymbolic) {
1911     // Backend version
1912     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1913     return CEED_ERROR_SUCCESS;
1914   } else {
1915     // Operator fallback
1916     CeedOperator op_fallback;
1917 
1918     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1919     if (op_fallback) {
1920       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1921       return CEED_ERROR_SUCCESS;
1922     }
1923   }
1924 
1925   // Default interface implementation
1926 
1927   // count entries and allocate rows, cols arrays
1928   *num_entries = 0;
1929   if (is_composite) {
1930     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1931     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1932     for (CeedInt k = 0; k < num_suboperators; ++k) {
1933       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1934       *num_entries += single_entries;
1935     }
1936   } else {
1937     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1938     *num_entries += single_entries;
1939   }
1940   CeedCall(CeedCalloc(*num_entries, rows));
1941   CeedCall(CeedCalloc(*num_entries, cols));
1942 
1943   // assemble nonzero locations
1944   CeedInt offset = 0;
1945   if (is_composite) {
1946     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1947     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1948     for (CeedInt k = 0; k < num_suboperators; ++k) {
1949       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1950       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1951       offset += single_entries;
1952     }
1953   } else {
1954     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1955   }
1956 
1957   return CEED_ERROR_SUCCESS;
1958 }
1959 
1960 /**
1961    @brief Fully assemble the nonzero entries of a linear operator.
1962 
1963    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1964 
1965    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1966 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1967 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1968 
1969    This will generally be slow unless your operator is low-order.
1970 
1971    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1972 
1973    @param[in]  op     CeedOperator to assemble
1974    @param[out] values Values to assemble into matrix
1975 
1976    @ref User
1977 **/
1978 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1979   CeedInt       num_suboperators, single_entries = 0;
1980   CeedOperator *sub_operators;
1981   bool          is_composite;
1982   CeedCall(CeedOperatorCheckReady(op));
1983   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1984 
1985   // Early exit for empty operator
1986   if (!is_composite) {
1987     CeedInt num_elem = 0;
1988 
1989     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1990     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1991   }
1992 
1993   if (op->LinearAssemble) {
1994     // Backend version
1995     CeedCall(op->LinearAssemble(op, values));
1996     return CEED_ERROR_SUCCESS;
1997   } else {
1998     // Operator fallback
1999     CeedOperator op_fallback;
2000 
2001     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2002     if (op_fallback) {
2003       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2004       return CEED_ERROR_SUCCESS;
2005     }
2006   }
2007 
2008   // Default interface implementation
2009   CeedInt offset = 0;
2010   CeedCall(CeedVectorSetValue(values, 0.0));
2011   if (is_composite) {
2012     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2013     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2014     for (CeedInt k = 0; k < num_suboperators; k++) {
2015       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2016       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2017       offset += single_entries;
2018     }
2019   } else {
2020     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2021   }
2022 
2023   return CEED_ERROR_SUCCESS;
2024 }
2025 
2026 /**
2027   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2028 
2029   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2030 
2031   @param[in]  op               Composite CeedOperator
2032   @param[in]  num_skip_indices Number of suboperators to skip
2033   @param[in]  skip_indices     Array of indices of suboperators to skip
2034   @param[out] mult             Vector to store multiplicity (of size l_size)
2035 
2036   @return An error code: 0 - success, otherwise - failure
2037 
2038   @ref User
2039 **/
2040 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2041   CeedCall(CeedOperatorCheckReady(op));
2042 
2043   Ceed                ceed;
2044   CeedInt             num_suboperators;
2045   CeedSize            l_vec_len;
2046   CeedScalar         *mult_array;
2047   CeedVector          ones_l_vec;
2048   CeedElemRestriction elem_rstr;
2049   CeedOperator       *sub_operators;
2050 
2051   CeedCall(CeedOperatorGetCeed(op, &ceed));
2052 
2053   // Zero mult vector
2054   CeedCall(CeedVectorSetValue(mult, 0.0));
2055 
2056   // Get suboperators
2057   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2058   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2059   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2060 
2061   // Work vector
2062   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2063   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2064   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2065   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2066 
2067   // Compute multiplicity across suboperators
2068   for (CeedInt i = 0; i < num_suboperators; i++) {
2069     const CeedScalar *sub_mult_array;
2070     CeedVector        sub_mult_l_vec, ones_e_vec;
2071 
2072     // -- Check for suboperator to skip
2073     for (CeedInt j = 0; j < num_skip_indices; j++) {
2074       if (skip_indices[j] == i) continue;
2075     }
2076 
2077     // -- Sub operator multiplicity
2078     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2079     CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2080     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2081     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2082     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2083     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2084     // ---- Flag every node present in the current suboperator
2085     for (CeedInt j = 0; j < l_vec_len; j++) {
2086       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2087     }
2088     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2089     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2090     CeedCall(CeedVectorDestroy(&ones_e_vec));
2091   }
2092   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2093   CeedCall(CeedVectorDestroy(&ones_l_vec));
2094 
2095   return CEED_ERROR_SUCCESS;
2096 }
2097 
2098 /**
2099   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2100 grid interpolation
2101 
2102   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2103 
2104   @param[in]  op_fine      Fine grid operator
2105   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2106   @param[in]  rstr_coarse  Coarse grid restriction
2107   @param[in]  basis_coarse Coarse grid active vector basis
2108   @param[out] op_coarse    Coarse grid operator
2109   @param[out] op_prolong   Coarse to fine operator, or NULL
2110   @param[out] op_restrict  Fine to coarse operator, or NULL
2111 
2112   @return An error code: 0 - success, otherwise - failure
2113 
2114   @ref User
2115 **/
2116 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2117                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2118   CeedCall(CeedOperatorCheckReady(op_fine));
2119 
2120   // Build prolongation matrix, if required
2121   CeedBasis basis_c_to_f = NULL;
2122   if (op_prolong || op_restrict) {
2123     CeedBasis basis_fine;
2124     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2125     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2126   }
2127 
2128   // Core code
2129   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2130 
2131   return CEED_ERROR_SUCCESS;
2132 }
2133 
2134 /**
2135   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2136 
2137   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2138 
2139   @param[in]  op_fine       Fine grid operator
2140   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2141   @param[in]  rstr_coarse   Coarse grid restriction
2142   @param[in]  basis_coarse  Coarse grid active vector basis
2143   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2144   @param[out] op_coarse     Coarse grid operator
2145   @param[out] op_prolong    Coarse to fine operator, or NULL
2146   @param[out] op_restrict   Fine to coarse operator, or NULL
2147 
2148   @return An error code: 0 - success, otherwise - failure
2149 
2150   @ref User
2151 **/
2152 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2153                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2154                                              CeedOperator *op_restrict) {
2155   CeedCall(CeedOperatorCheckReady(op_fine));
2156   Ceed ceed;
2157   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2158 
2159   // Check for compatible quadrature spaces
2160   CeedBasis basis_fine;
2161   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2162   CeedInt Q_f, Q_c;
2163   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2164   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2165   if (Q_f != Q_c) {
2166     // LCOV_EXCL_START
2167     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2168     // LCOV_EXCL_STOP
2169   }
2170 
2171   // Create coarse to fine basis, if required
2172   CeedBasis basis_c_to_f = NULL;
2173   if (op_prolong || op_restrict) {
2174     // Check if interpolation matrix is provided
2175     if (!interp_c_to_f) {
2176       // LCOV_EXCL_START
2177       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2178       // LCOV_EXCL_STOP
2179     }
2180     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2181     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2182     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2183     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2184     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2185     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2186     CeedScalar *q_ref, *q_weight, *grad;
2187     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2188     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2189     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2190     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2191     CeedCall(CeedFree(&q_ref));
2192     CeedCall(CeedFree(&q_weight));
2193     CeedCall(CeedFree(&grad));
2194   }
2195 
2196   // Core code
2197   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2198   return CEED_ERROR_SUCCESS;
2199 }
2200 
2201 /**
2202   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2203 
2204   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2205 
2206   @param[in]  op_fine       Fine grid operator
2207   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2208   @param[in]  rstr_coarse   Coarse grid restriction
2209   @param[in]  basis_coarse  Coarse grid active vector basis
2210   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2211   @param[out] op_coarse     Coarse grid operator
2212   @param[out] op_prolong    Coarse to fine operator, or NULL
2213   @param[out] op_restrict   Fine to coarse operator, or NULL
2214 
2215   @return An error code: 0 - success, otherwise - failure
2216 
2217   @ref User
2218 **/
2219 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2220                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2221                                        CeedOperator *op_restrict) {
2222   CeedCall(CeedOperatorCheckReady(op_fine));
2223   Ceed ceed;
2224   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2225 
2226   // Check for compatible quadrature spaces
2227   CeedBasis basis_fine;
2228   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2229   CeedInt Q_f, Q_c;
2230   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2231   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2232   if (Q_f != Q_c) {
2233     // LCOV_EXCL_START
2234     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2235     // LCOV_EXCL_STOP
2236   }
2237 
2238   // Coarse to fine basis
2239   CeedBasis basis_c_to_f = NULL;
2240   if (op_prolong || op_restrict) {
2241     // Check if interpolation matrix is provided
2242     if (!interp_c_to_f) {
2243       // LCOV_EXCL_START
2244       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2245       // LCOV_EXCL_STOP
2246     }
2247     CeedElemTopology topo;
2248     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2249     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2250     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2251     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2252     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2253     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2254     CeedScalar *q_ref, *q_weight, *grad;
2255     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2256     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2257     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2258     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2259     CeedCall(CeedFree(&q_ref));
2260     CeedCall(CeedFree(&q_weight));
2261     CeedCall(CeedFree(&grad));
2262   }
2263 
2264   // Core code
2265   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2266   return CEED_ERROR_SUCCESS;
2267 }
2268 
2269 /**
2270   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2271 
2272   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2273     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2274     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2275 \hat S V\f$. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
2276 
2277   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2278 
2279   @param[in]  op      CeedOperator to create element inverses
2280   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2281   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2282 
2283   @return An error code: 0 - success, otherwise - failure
2284 
2285   @ref User
2286 **/
2287 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2288   CeedCall(CeedOperatorCheckReady(op));
2289 
2290   if (op->CreateFDMElementInverse) {
2291     // Backend version
2292     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2293     return CEED_ERROR_SUCCESS;
2294   } else {
2295     // Operator fallback
2296     CeedOperator op_fallback;
2297 
2298     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2299     if (op_fallback) {
2300       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2301       return CEED_ERROR_SUCCESS;
2302     }
2303   }
2304 
2305   // Default interface implementation
2306   Ceed ceed, ceed_parent;
2307   CeedCall(CeedOperatorGetCeed(op, &ceed));
2308   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2309   ceed_parent = ceed_parent ? ceed_parent : ceed;
2310   CeedQFunction qf;
2311   CeedCall(CeedOperatorGetQFunction(op, &qf));
2312 
2313   // Determine active input basis
2314   bool                interp = false, grad = false;
2315   CeedBasis           basis = NULL;
2316   CeedElemRestriction rstr  = NULL;
2317   CeedOperatorField  *op_fields;
2318   CeedQFunctionField *qf_fields;
2319   CeedInt             num_input_fields;
2320   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2321   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2322   for (CeedInt i = 0; i < num_input_fields; i++) {
2323     CeedVector vec;
2324     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2325     if (vec == CEED_VECTOR_ACTIVE) {
2326       CeedEvalMode eval_mode;
2327       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2328       interp = interp || eval_mode == CEED_EVAL_INTERP;
2329       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2330       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2331       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2332     }
2333   }
2334   if (!basis) {
2335     // LCOV_EXCL_START
2336     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2337     // LCOV_EXCL_STOP
2338   }
2339   CeedSize l_size = 1;
2340   CeedInt  P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2341   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2342   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2343   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2344   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2345   CeedCall(CeedBasisGetDimension(basis, &dim));
2346   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2347   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2348   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2349 
2350   // Build and diagonalize 1D Mass and Laplacian
2351   bool tensor_basis;
2352   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2353   if (!tensor_basis) {
2354     // LCOV_EXCL_START
2355     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2356     // LCOV_EXCL_STOP
2357   }
2358   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2359   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2360   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2361   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2362   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2363   CeedCall(CeedCalloc(P_1d, &lambda));
2364   // -- Build matrices
2365   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2366   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2367   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2368   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2369   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2370 
2371   // -- Diagonalize
2372   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2373   CeedCall(CeedFree(&mass));
2374   CeedCall(CeedFree(&laplace));
2375   for (CeedInt i = 0; i < P_1d; i++) {
2376     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2377   }
2378   CeedCall(CeedFree(&x));
2379 
2380   // Assemble QFunction
2381   CeedVector          assembled;
2382   CeedElemRestriction rstr_qf;
2383   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2384   CeedInt layout[3];
2385   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2386   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2387   CeedScalar max_norm = 0;
2388   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2389 
2390   // Calculate element averages
2391   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2392   CeedScalar       *elem_avg;
2393   const CeedScalar *assembled_array, *q_weight_array;
2394   CeedVector        q_weight;
2395   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2396   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2397   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2398   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2399   CeedCall(CeedCalloc(num_elem, &elem_avg));
2400   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2401   for (CeedInt e = 0; e < num_elem; e++) {
2402     CeedInt count = 0;
2403     for (CeedInt q = 0; q < num_qpts; q++) {
2404       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2405         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2406           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2407           count++;
2408         }
2409       }
2410     }
2411     if (count) {
2412       elem_avg[e] /= count;
2413     } else {
2414       elem_avg[e] = 1.0;
2415     }
2416   }
2417   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2418   CeedCall(CeedVectorDestroy(&assembled));
2419   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2420   CeedCall(CeedVectorDestroy(&q_weight));
2421 
2422   // Build FDM diagonal
2423   CeedVector  q_data;
2424   CeedScalar *q_data_array, *fdm_diagonal;
2425   CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2426   const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2427   for (CeedInt c = 0; c < num_comp; c++) {
2428     for (CeedInt n = 0; n < num_nodes; n++) {
2429       if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2430       if (grad) {
2431         for (CeedInt d = 0; d < dim; d++) {
2432           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2433           fdm_diagonal[c * num_nodes + n] += lambda[i];
2434         }
2435       }
2436       if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2437     }
2438   }
2439   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2440   CeedCall(CeedVectorSetValue(q_data, 0.0));
2441   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2442   for (CeedInt e = 0; e < num_elem; e++) {
2443     for (CeedInt c = 0; c < num_comp; c++) {
2444       for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2445     }
2446   }
2447   CeedCall(CeedFree(&elem_avg));
2448   CeedCall(CeedFree(&fdm_diagonal));
2449   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2450 
2451   // Setup FDM operator
2452   // -- Basis
2453   CeedBasis   fdm_basis;
2454   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2455   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2456   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2457   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2458   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2459   CeedCall(CeedFree(&fdm_interp));
2460   CeedCall(CeedFree(&grad_dummy));
2461   CeedCall(CeedFree(&q_ref_dummy));
2462   CeedCall(CeedFree(&q_weight_dummy));
2463   CeedCall(CeedFree(&lambda));
2464 
2465   // -- Restriction
2466   CeedElemRestriction rstr_qd_i;
2467   CeedInt             strides[3] = {1, num_nodes, num_nodes * num_comp};
2468   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2469   // -- QFunction
2470   CeedQFunction qf_fdm;
2471   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2472   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2473   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2474   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2475   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2476   // -- QFunction context
2477   CeedInt *num_comp_data;
2478   CeedCall(CeedCalloc(1, &num_comp_data));
2479   num_comp_data[0] = num_comp;
2480   CeedQFunctionContext ctx_fdm;
2481   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2482   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2483   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2484   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2485   // -- Operator
2486   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2487   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2488   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2489   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2490 
2491   // Cleanup
2492   CeedCall(CeedVectorDestroy(&q_data));
2493   CeedCall(CeedBasisDestroy(&fdm_basis));
2494   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2495   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2496 
2497   return CEED_ERROR_SUCCESS;
2498 }
2499 
2500 /// @}
2501