xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision 05b44d1b5ea8b564ccb88bb14760c6e7ef0ad51a)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed/backend.h>
11 #include <ceed/ceed.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  eval_mode Current basis evaluation mode
184   @param[in]  identity  Pointer to identity matrix
185   @param[in]  interp    Pointer to interpolation matrix
186   @param[in]  grad      Pointer to gradient matrix
187   @param[out] basis_ptr Basis pointer to set
188 
189   @ref Developer
190 **/
191 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad,
192                                                const CeedScalar **basis_ptr) {
193   switch (eval_mode) {
194     case CEED_EVAL_NONE:
195       *basis_ptr = identity;
196       break;
197     case CEED_EVAL_INTERP:
198       *basis_ptr = interp;
199       break;
200     case CEED_EVAL_GRAD:
201       *basis_ptr = grad;
202       break;
203     case CEED_EVAL_WEIGHT:
204     case CEED_EVAL_DIV:
205     case CEED_EVAL_CURL:
206       break;  // Caught by QF Assembly
207   }
208   assert(*basis_ptr != NULL);
209 }
210 
211 /**
212   @brief Create point block restriction for active operator field
213 
214   @param[in]  rstr            Original CeedElemRestriction for active field
215   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
216 
217   @return An error code: 0 - success, otherwise - failure
218 
219   @ref Developer
220 **/
221 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
222   Ceed ceed;
223   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
224   const CeedInt *offsets;
225   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
226 
227   // Expand offsets
228   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
229   CeedSize l_size;
230   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
231   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
232   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
233   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
234   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
235   CeedInt shift = num_comp;
236   if (comp_stride != 1) shift *= num_comp;
237   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
238   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
239     pointblock_offsets[i] = offsets[i] * shift;
240   }
241 
242   // Create new restriction
243   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
244                                      pointblock_offsets, pointblock_rstr));
245 
246   // Cleanup
247   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
248 
249   return CEED_ERROR_SUCCESS;
250 }
251 
252 /**
253   @brief Core logic for assembling operator diagonal or point block diagonal
254 
255   @param[in]  op            CeedOperator to assemble point block diagonal
256   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
257   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
258   @param[out] assembled     CeedVector to store assembled diagonal
259 
260   @return An error code: 0 - success, otherwise - failure
261 
262   @ref Developer
263 **/
264 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
265   Ceed ceed;
266   CeedCall(CeedOperatorGetCeed(op, &ceed));
267 
268   // Assemble QFunction
269   CeedQFunction       qf;
270   const CeedScalar   *assembled_qf_array;
271   CeedVector          assembled_qf;
272   CeedElemRestriction assembled_elem_rstr;
273   CeedInt             num_input_fields, num_output_fields;
274   CeedInt             layout[3];
275 
276   CeedCall(CeedOperatorGetQFunction(op, &qf));
277   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
278   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
279   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout));
280   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
281   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
282 
283   // Get assembly data
284   CeedOperatorAssemblyData data;
285   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
286   CeedInt                 *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
287   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
288   CeedBasis               *active_bases;
289   CeedElemRestriction     *active_elem_rstrs;
290 
291   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
292   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
293                                                 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components));
294   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL));
295   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs));
296 
297   // Loop over all active bases
298   for (CeedInt b = 0; b < num_active_bases; b++) {
299     // Assemble point block diagonal restriction, if needed
300     CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b];
301 
302     if (is_pointblock) {
303       CeedElemRestriction point_block_elem_rstr;
304 
305       CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr));
306       diag_elem_rstr = point_block_elem_rstr;
307     }
308 
309     // Create diagonal vector
310     CeedVector elem_diag;
311     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
312 
313     // Assemble element operator diagonals
314     CeedScalar *elem_diag_array;
315     CeedInt     num_elem, num_nodes, num_qpts, num_components;
316 
317     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
318     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
319     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
320     CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes));
321     CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components));
322     CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts));
323 
324     // Basis matrices
325     const CeedScalar *interp, *grad;
326     CeedScalar       *identity      = NULL;
327     bool              has_eval_none = false;
328     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
329       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
330     }
331     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
332       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
333     }
334     if (has_eval_none) {
335       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
336       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
337     }
338     CeedCall(CeedBasisGetInterp(active_bases[b], &interp));
339     CeedCall(CeedBasisGetGrad(active_bases[b], &grad));
340     // Compute the diagonal of B^T D B
341     // Each element
342     for (CeedInt e = 0; e < num_elem; e++) {
343       CeedInt d_out = -1;
344       // Each basis eval mode pair
345       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
346         const CeedScalar *B_t = NULL;
347         if (eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out += 1;
348         CeedOperatorGetBasisPointer(eval_modes_out[b][e_out], identity, interp, &grad[d_out * num_qpts * num_nodes], &B_t);
349         CeedInt d_in = -1;
350         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
351           const CeedScalar *B = NULL;
352           if (eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in += 1;
353           CeedOperatorGetBasisPointer(eval_modes_in[b][e_in], identity, interp, &grad[d_in * num_qpts * num_nodes], &B);
354           // Each component
355           for (CeedInt c_out = 0; c_out < num_components; c_out++) {
356             // Each qpt/node pair
357             for (CeedInt q = 0; q < num_qpts; q++) {
358               if (is_pointblock) {
359                 // Point Block Diagonal
360                 for (CeedInt c_in = 0; c_in < num_components; c_in++) {
361                   const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
362                   const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
363                   for (CeedInt n = 0; n < num_nodes; n++) {
364                     elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] +=
365                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
366                   }
367                 }
368               } else {
369                 // Diagonal Only
370                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
371                 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
372                 for (CeedInt n = 0; n < num_nodes; n++) {
373                   elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
374                 }
375               }
376             }
377           }
378         }
379       }
380     }
381     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
382 
383     // Assemble local operator diagonal
384     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
385 
386     // Cleanup
387     if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
388     CeedCall(CeedVectorDestroy(&elem_diag));
389     CeedCall(CeedFree(&identity));
390   }
391   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
392   CeedCall(CeedVectorDestroy(&assembled_qf));
393 
394   return CEED_ERROR_SUCCESS;
395 }
396 
397 /**
398   @brief Core logic for assembling composite operator diagonal
399 
400   @param[in]  op            CeedOperator to assemble point block diagonal
401   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
402   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
403   @param[out] assembled     CeedVector to store assembled diagonal
404 
405   @return An error code: 0 - success, otherwise - failure
406 
407   @ref Developer
408 **/
409 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
410                                                                  CeedVector assembled) {
411   CeedInt       num_sub;
412   CeedOperator *suboperators;
413   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
414   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
415   for (CeedInt i = 0; i < num_sub; i++) {
416     if (is_pointblock) {
417       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
418     } else {
419       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
420     }
421   }
422   return CEED_ERROR_SUCCESS;
423 }
424 
425 /**
426   @brief Build nonzero pattern for non-composite operator
427 
428   Users should generally use CeedOperatorLinearAssembleSymbolic()
429 
430   @param[in]  op     CeedOperator to assemble nonzero pattern
431   @param[in]  offset Offset for number of entries
432   @param[out] rows   Row number for each entry
433   @param[out] cols   Column number for each entry
434 
435   @return An error code: 0 - success, otherwise - failure
436 
437   @ref Developer
438 **/
439 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
440   Ceed ceed;
441   bool is_composite;
442   CeedCall(CeedOperatorGetCeed(op, &ceed));
443   CeedCall(CeedOperatorIsComposite(op, &is_composite));
444 
445   if (is_composite) {
446     // LCOV_EXCL_START
447     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
448     // LCOV_EXCL_STOP
449   }
450 
451   CeedSize num_nodes;
452   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
453   CeedElemRestriction rstr_in;
454   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
455   CeedInt num_elem, elem_size, num_comp;
456   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
457   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
458   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
459   CeedInt layout_er[3];
460   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
461 
462   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
463 
464   // Determine elem_dof relation
465   CeedVector index_vec;
466   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
467   CeedScalar *array;
468   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
469   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
470   CeedCall(CeedVectorRestoreArray(index_vec, &array));
471   CeedVector elem_dof;
472   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
473   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
474   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
475   const CeedScalar *elem_dof_a;
476   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
477   CeedCall(CeedVectorDestroy(&index_vec));
478 
479   // Determine i, j locations for element matrices
480   CeedInt count = 0;
481   for (CeedInt e = 0; e < num_elem; e++) {
482     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
483       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
484         for (CeedInt i = 0; i < elem_size; i++) {
485           for (CeedInt j = 0; j < elem_size; j++) {
486             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
487             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
488 
489             const CeedInt row = elem_dof_a[elem_dof_index_row];
490             const CeedInt col = elem_dof_a[elem_dof_index_col];
491 
492             rows[offset + count] = row;
493             cols[offset + count] = col;
494             count++;
495           }
496         }
497       }
498     }
499   }
500   if (count != local_num_entries) {
501     // LCOV_EXCL_START
502     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
503     // LCOV_EXCL_STOP
504   }
505   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
506   CeedCall(CeedVectorDestroy(&elem_dof));
507 
508   return CEED_ERROR_SUCCESS;
509 }
510 
511 /**
512   @brief Assemble nonzero entries for non-composite operator
513 
514   Users should generally use CeedOperatorLinearAssemble()
515 
516   @param[in]  op     CeedOperator to assemble
517   @param[in]  offset Offset for number of entries
518   @param[out] values Values to assemble into matrix
519 
520   @return An error code: 0 - success, otherwise - failure
521 
522   @ref Developer
523 **/
524 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
525   Ceed ceed;
526   bool is_composite;
527   CeedCall(CeedOperatorGetCeed(op, &ceed));
528   CeedCall(CeedOperatorIsComposite(op, &is_composite));
529 
530   if (is_composite) {
531     // LCOV_EXCL_START
532     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
533     // LCOV_EXCL_STOP
534   }
535 
536   // Early exit for empty operator
537   {
538     CeedInt num_elem = 0;
539 
540     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
541     if (num_elem == 0) return CEED_ERROR_SUCCESS;
542   }
543 
544   if (op->LinearAssembleSingle) {
545     // Backend version
546     CeedCall(op->LinearAssembleSingle(op, offset, values));
547     return CEED_ERROR_SUCCESS;
548   } else {
549     // Operator fallback
550     CeedOperator op_fallback;
551 
552     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
553     if (op_fallback) {
554       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
555       return CEED_ERROR_SUCCESS;
556     }
557   }
558 
559   // Assemble QFunction
560   CeedQFunction qf;
561   CeedCall(CeedOperatorGetQFunction(op, &qf));
562   CeedVector          assembled_qf;
563   CeedElemRestriction rstr_q;
564   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
565   CeedSize qf_length;
566   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
567 
568   CeedInt            num_input_fields, num_output_fields;
569   CeedOperatorField *input_fields;
570   CeedOperatorField *output_fields;
571   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
572 
573   // Get assembly data
574   CeedOperatorAssemblyData data;
575   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
576   const CeedEvalMode **eval_modes_in, **eval_modes_out;
577   CeedInt             *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
578   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out,
579                                                 &eval_modes_out, NULL, NULL));
580   CeedBasis *bases;
581   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL));
582   CeedBasis basis_in = bases[0];
583 
584   if (num_active_bases > 1) {
585     // LCOV_EXCL_START
586     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases");
587     // LCOV_EXCL_STOP
588   }
589   if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) {
590     // LCOV_EXCL_START
591     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
592     // LCOV_EXCL_STOP
593   }
594 
595   CeedElemRestriction active_rstr;
596   CeedInt             num_elem, elem_size, num_qpts, num_comp;
597   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
598   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
599   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
600   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
601   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
602 
603   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
604 
605   // loop over elements and put in data structure
606   const CeedScalar *interp_in, *grad_in;
607   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
608   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
609 
610   const CeedScalar *assembled_qf_array;
611   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
612 
613   CeedInt layout_qf[3];
614   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
615   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
616 
617   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
618   const CeedScalar **B_mats_in, **B_mats_out;
619   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out));
620   const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0];
621   CeedScalar        BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]];
622   CeedScalar        elem_mat[elem_size * elem_size];
623   CeedInt           count = 0;
624   CeedScalar       *vals;
625   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
626   for (CeedInt e = 0; e < num_elem; e++) {
627     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
628       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
629         // Compute B^T*D
630         for (CeedInt n = 0; n < elem_size; n++) {
631           for (CeedInt q = 0; q < num_qpts; q++) {
632             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
633               const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in);
634               CeedScalar    sum       = 0.0;
635               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
636                 const CeedInt b_out_index     = (num_eval_modes_out[0] * q + e_out) * elem_size + n;
637                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out;
638                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
639                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
640               }
641               BTD_mat[btd_index] = sum;
642             }
643           }
644         }
645         // form element matrix itself (for each block component)
646         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0]));
647 
648         // put element matrix in coordinate data structure
649         for (CeedInt i = 0; i < elem_size; i++) {
650           for (CeedInt j = 0; j < elem_size; j++) {
651             vals[offset + count] = elem_mat[i * elem_size + j];
652             count++;
653           }
654         }
655       }
656     }
657   }
658   if (count != local_num_entries) {
659     // LCOV_EXCL_START
660     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
661     // LCOV_EXCL_STOP
662   }
663   CeedCall(CeedVectorRestoreArray(values, &vals));
664 
665   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
666   CeedCall(CeedVectorDestroy(&assembled_qf));
667 
668   return CEED_ERROR_SUCCESS;
669 }
670 
671 /**
672   @brief Count number of entries for assembled CeedOperator
673 
674   @param[in]  op          CeedOperator to assemble
675   @param[out] num_entries Number of entries in assembled representation
676 
677   @return An error code: 0 - success, otherwise - failure
678 
679   @ref Utility
680 **/
681 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
682   bool                is_composite;
683   CeedElemRestriction rstr;
684   CeedInt             num_elem, elem_size, num_comp;
685 
686   CeedCall(CeedOperatorIsComposite(op, &is_composite));
687   if (is_composite) {
688     // LCOV_EXCL_START
689     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
690     // LCOV_EXCL_STOP
691   }
692   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
693   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
694   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
695   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
696   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
697 
698   return CEED_ERROR_SUCCESS;
699 }
700 
701 /**
702   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
703 
704   @param[in]  op_fine      Fine grid operator
705   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
706   @param[in]  rstr_coarse  Coarse grid restriction
707   @param[in]  basis_coarse Coarse grid active vector basis
708   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
709   @param[out] op_coarse    Coarse grid operator
710   @param[out] op_prolong   Coarse to fine operator, or NULL
711   @param[out] op_restrict  Fine to coarse operator, or NULL
712 
713   @return An error code: 0 - success, otherwise - failure
714 
715   @ref Developer
716 **/
717 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
718                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
719   Ceed       ceed;
720   CeedVector mult_vec = NULL;
721   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
722 
723   // Check for composite operator
724   bool is_composite;
725   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
726   if (is_composite) {
727     // LCOV_EXCL_START
728     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
729     // LCOV_EXCL_STOP
730   }
731 
732   // Coarse Grid
733   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
734   CeedElemRestriction rstr_fine = NULL;
735   // -- Clone input fields
736   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
737     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
738       rstr_fine = op_fine->input_fields[i]->elem_rstr;
739       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
740     } else {
741       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
742                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
743     }
744   }
745   // -- Clone output fields
746   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
747     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
748       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
749     } else {
750       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
751                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
752     }
753   }
754   // -- Clone QFunctionAssemblyData
755   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
756 
757   // Multiplicity vector
758   if (op_restrict || op_prolong) {
759     CeedVector mult_e_vec;
760 
761     if (!p_mult_fine) {
762       // LCOV_EXCL_START
763       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
764       // LCOV_EXCL_STOP
765     }
766     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
767     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
768     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
769     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
770     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
771     CeedCall(CeedVectorDestroy(&mult_e_vec));
772     CeedCall(CeedVectorReciprocal(mult_vec));
773   }
774 
775   // Clone name
776   bool   has_name = op_fine->name;
777   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
778   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
779 
780   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
781   if ((op_restrict || op_prolong) && !basis_c_to_f) {
782     // LCOV_EXCL_START
783     return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis");
784     // LCOV_EXCL_STOP
785   }
786 
787   // Restriction/Prolongation Operators
788   CeedInt num_comp;
789   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
790 
791   // Restriction
792   if (op_restrict) {
793     CeedInt             *num_comp_r_data;
794     CeedQFunction        qf_restrict;
795     CeedQFunctionContext ctx_r;
796 
797     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
798     CeedCall(CeedCalloc(1, &num_comp_r_data));
799     num_comp_r_data[0] = num_comp;
800     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
801     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
802     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
803     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
804     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
805     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
806     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
807     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
808 
809     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
810     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
811     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
812     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
813 
814     // Set name
815     char *restriction_name;
816     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
817     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
818     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
819     CeedCall(CeedFree(&restriction_name));
820 
821     // Check
822     CeedCall(CeedOperatorCheckReady(*op_restrict));
823 
824     // Cleanup
825     CeedCall(CeedQFunctionDestroy(&qf_restrict));
826   }
827 
828   // Prolongation
829   if (op_prolong) {
830     CeedInt             *num_comp_p_data;
831     CeedQFunction        qf_prolong;
832     CeedQFunctionContext ctx_p;
833 
834     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
835     CeedCall(CeedCalloc(1, &num_comp_p_data));
836     num_comp_p_data[0] = num_comp;
837     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
838     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
839     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
840     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
841     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
842     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
843     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
844     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
845 
846     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
847     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
848     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
849     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
850 
851     // Set name
852     char *prolongation_name;
853     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
854     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
855     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
856     CeedCall(CeedFree(&prolongation_name));
857 
858     // Check
859     CeedCall(CeedOperatorCheckReady(*op_prolong));
860 
861     // Cleanup
862     CeedCall(CeedQFunctionDestroy(&qf_prolong));
863   }
864 
865   // Check
866   CeedCall(CeedOperatorCheckReady(*op_coarse));
867 
868   // Cleanup
869   CeedCall(CeedVectorDestroy(&mult_vec));
870   CeedCall(CeedBasisDestroy(&basis_c_to_f));
871 
872   return CEED_ERROR_SUCCESS;
873 }
874 
875 /**
876   @brief Build 1D mass matrix and Laplacian with perturbation
877 
878   @param[in]  interp_1d   Interpolation matrix in one dimension
879   @param[in]  grad_1d     Gradient matrix in one dimension
880   @param[in]  q_weight_1d Quadrature weights in one dimension
881   @param[in]  P_1d        Number of basis nodes in one dimension
882   @param[in]  Q_1d        Number of quadrature points in one dimension
883   @param[in]  dim         Dimension of basis
884   @param[out] mass        Assembled mass matrix in one dimension
885   @param[out] laplace     Assembled perturbed Laplacian in one dimension
886 
887   @return An error code: 0 - success, otherwise - failure
888 
889   @ref Developer
890 **/
891 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d,
892                                                       CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
893   for (CeedInt i = 0; i < P_1d; i++) {
894     for (CeedInt j = 0; j < P_1d; j++) {
895       CeedScalar sum = 0.0;
896       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
897       mass[i + j * P_1d] = sum;
898     }
899   }
900   // -- Laplacian
901   for (CeedInt i = 0; i < P_1d; i++) {
902     for (CeedInt j = 0; j < P_1d; j++) {
903       CeedScalar sum = 0.0;
904       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
905       laplace[i + j * P_1d] = sum;
906     }
907   }
908   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
909   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
910   return CEED_ERROR_SUCCESS;
911 }
912 CeedPragmaOptimizeOn;
913 
914 /// @}
915 
916 /// ----------------------------------------------------------------------------
917 /// CeedOperator Backend API
918 /// ----------------------------------------------------------------------------
919 /// @addtogroup CeedOperatorBackend
920 /// @{
921 
922 /**
923   @brief Create object holding CeedQFunction assembly data for CeedOperator
924 
925   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
926   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
927 
928   @return An error code: 0 - success, otherwise - failure
929 
930   @ref Backend
931 **/
932 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
933   CeedCall(CeedCalloc(1, data));
934   (*data)->ref_count = 1;
935   (*data)->ceed      = ceed;
936   CeedCall(CeedReference(ceed));
937 
938   return CEED_ERROR_SUCCESS;
939 }
940 
941 /**
942   @brief Increment the reference counter for a CeedQFunctionAssemblyData
943 
944   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
945 
946   @return An error code: 0 - success, otherwise - failure
947 
948   @ref Backend
949 **/
950 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
951   data->ref_count++;
952   return CEED_ERROR_SUCCESS;
953 }
954 
955 /**
956   @brief Set re-use of CeedQFunctionAssemblyData
957 
958   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
959   @param[in]     reuse_data Boolean flag indicating data re-use
960 
961   @return An error code: 0 - success, otherwise - failure
962 
963   @ref Backend
964 **/
965 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
966   data->reuse_data        = reuse_data;
967   data->needs_data_update = true;
968   return CEED_ERROR_SUCCESS;
969 }
970 
971 /**
972   @brief Mark QFunctionAssemblyData as stale
973 
974   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
975   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
976 
977   @return An error code: 0 - success, otherwise - failure
978 
979   @ref Backend
980 **/
981 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
982   data->needs_data_update = needs_data_update;
983   return CEED_ERROR_SUCCESS;
984 }
985 
986 /**
987   @brief Determine if QFunctionAssemblyData needs update
988 
989   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
990   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
991 
992   @return An error code: 0 - success, otherwise - failure
993 
994   @ref Backend
995 **/
996 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
997   *is_update_needed = !data->reuse_data || data->needs_data_update;
998   return CEED_ERROR_SUCCESS;
999 }
1000 
1001 /**
1002   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1003            Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1004            Note: If `*data_copy` is non-NULL, then it is assumed that `*data_copy` is a pointer to a CeedQFunctionAssemblyData.
1005              This CeedQFunctionAssemblyData will be destroyed if `*data_copy` is the only reference to this CeedQFunctionAssemblyData.
1006 
1007   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1008   @param[in,out] data_copy Variable to store copied reference
1009 
1010   @return An error code: 0 - success, otherwise - failure
1011 
1012   @ref Backend
1013 **/
1014 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1015   CeedCall(CeedQFunctionAssemblyDataReference(data));
1016   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1017   *data_copy = data;
1018   return CEED_ERROR_SUCCESS;
1019 }
1020 
1021 /**
1022   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1023 
1024   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1025   @param[out] is_setup Boolean flag for setup status
1026 
1027   @return An error code: 0 - success, otherwise - failure
1028 
1029   @ref Backend
1030 **/
1031 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1032   *is_setup = data->is_setup;
1033   return CEED_ERROR_SUCCESS;
1034 }
1035 
1036 /**
1037   @brief Set internal objects for CeedQFunctionAssemblyData
1038 
1039   @param[in,out] data CeedQFunctionAssemblyData to set objects
1040   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1041   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1042 
1043   @return An error code: 0 - success, otherwise - failure
1044 
1045   @ref Backend
1046 **/
1047 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1048   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1049   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1050 
1051   data->is_setup = true;
1052   return CEED_ERROR_SUCCESS;
1053 }
1054 
1055 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1056   if (!data->is_setup) {
1057     // LCOV_EXCL_START
1058     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1059     // LCOV_EXCL_STOP
1060   }
1061 
1062   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1063   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1064 
1065   return CEED_ERROR_SUCCESS;
1066 }
1067 
1068 /**
1069   @brief Destroy CeedQFunctionAssemblyData
1070 
1071   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1072 
1073   @return An error code: 0 - success, otherwise - failure
1074 
1075   @ref Backend
1076 **/
1077 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1078   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1079 
1080   CeedCall(CeedDestroy(&(*data)->ceed));
1081   CeedCall(CeedVectorDestroy(&(*data)->vec));
1082   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1083 
1084   CeedCall(CeedFree(data));
1085   return CEED_ERROR_SUCCESS;
1086 }
1087 
1088 /**
1089   @brief Get CeedOperatorAssemblyData
1090 
1091   @param[in]  op   CeedOperator to assemble
1092   @param[out] data CeedQFunctionAssemblyData
1093 
1094   @return An error code: 0 - success, otherwise - failure
1095 
1096   @ref Backend
1097 **/
1098 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1099   if (!op->op_assembled) {
1100     CeedOperatorAssemblyData data;
1101 
1102     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1103     op->op_assembled = data;
1104   }
1105   *data = op->op_assembled;
1106 
1107   return CEED_ERROR_SUCCESS;
1108 }
1109 
1110 /**
1111   @brief Create object holding CeedOperator assembly data.
1112 
1113     The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1114     An array with references to the corresponding active CeedElemRestrictions is also stored.
1115     For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1116     The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1117       CeedEvalMode.
1118     The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1119     Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1120 
1121   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1122   @param[in]  op   CeedOperator to be assembled
1123   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1124 
1125   @return An error code: 0 - success, otherwise - failure
1126 
1127   @ref Backend
1128 **/
1129 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1130   CeedInt num_active_bases = 0;
1131 
1132   // Allocate
1133   CeedCall(CeedCalloc(1, data));
1134   (*data)->ceed = ceed;
1135   CeedCall(CeedReference(ceed));
1136 
1137   // Build OperatorAssembly data
1138   CeedQFunction       qf;
1139   CeedQFunctionField *qf_fields;
1140   CeedOperatorField  *op_fields;
1141   CeedInt             num_input_fields;
1142   CeedCall(CeedOperatorGetQFunction(op, &qf));
1143   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1144   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1145 
1146   // Determine active input basis
1147   CeedInt       *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0;
1148   CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL;
1149   CeedSize     **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1150 
1151   for (CeedInt i = 0; i < num_input_fields; i++) {
1152     CeedVector vec;
1153 
1154     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1155     if (vec == CEED_VECTOR_ACTIVE) {
1156       CeedInt      index = -1, dim = 1, num_components;
1157       CeedBasis    basis_in = NULL;
1158       CeedEvalMode eval_mode;
1159 
1160       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1161       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1162       CeedCall(CeedBasisGetNumComponents(basis_in, &num_components));
1163       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1164       for (CeedInt i = 0; i < num_active_bases; i++) {
1165         if ((*data)->active_bases[i] == basis_in) index = i;
1166       }
1167       if (index == -1) {
1168         CeedElemRestriction elem_rstr_in;
1169 
1170         index = num_active_bases;
1171         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1172         (*data)->active_bases[num_active_bases] = NULL;
1173         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases]));
1174         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1175         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1176         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1177         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases]));
1178         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1179         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1180         num_eval_modes_in[index]  = 0;
1181         num_eval_modes_out[index] = 0;
1182         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1183         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1184         eval_modes_in[index]  = NULL;
1185         eval_modes_out[index] = NULL;
1186         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1187         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1188         eval_mode_offsets_in[index]  = NULL;
1189         eval_mode_offsets_out[index] = NULL;
1190         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1191         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1192         (*data)->assembled_bases_in[index]  = NULL;
1193         (*data)->assembled_bases_out[index] = NULL;
1194         num_active_bases++;
1195       }
1196       switch (eval_mode) {
1197         case CEED_EVAL_NONE:
1198         case CEED_EVAL_INTERP:
1199           CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_modes_in[index]));
1200           CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_mode_offsets_in[index]));
1201           eval_modes_in[index][num_eval_modes_in[index]]        = eval_mode;
1202           eval_mode_offsets_in[index][num_eval_modes_in[index]] = offset;
1203           offset += num_components;
1204           num_eval_modes_in[index] += 1;
1205           break;
1206         case CEED_EVAL_GRAD:
1207           CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_modes_in[index]));
1208           CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_mode_offsets_in[index]));
1209           for (CeedInt d = 0; d < dim; d++) {
1210             eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1211             eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1212             offset += num_components;
1213           }
1214           num_eval_modes_in[index] += dim;
1215           break;
1216         case CEED_EVAL_WEIGHT:
1217         case CEED_EVAL_DIV:
1218         case CEED_EVAL_CURL:
1219           break;  // Caught by QF Assembly
1220       }
1221     }
1222   }
1223   (*data)->num_eval_modes_in    = num_eval_modes_in;
1224   (*data)->eval_modes_in        = eval_modes_in;
1225   (*data)->eval_mode_offsets_in = eval_mode_offsets_in;
1226 
1227   // Determine active output basis
1228   CeedInt num_output_fields;
1229 
1230   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1231   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1232   offset = 0;
1233   for (CeedInt i = 0; i < num_output_fields; i++) {
1234     CeedVector vec;
1235 
1236     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1237     if (vec == CEED_VECTOR_ACTIVE) {
1238       CeedInt      index = -1, dim = 1, num_components;
1239       CeedBasis    basis_out = NULL;
1240       CeedEvalMode eval_mode;
1241 
1242       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1243       CeedCall(CeedBasisGetDimension(basis_out, &dim));
1244       CeedCall(CeedBasisGetNumComponents(basis_out, &num_components));
1245       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1246       for (CeedInt i = 0; i < num_active_bases; i++) {
1247         if ((*data)->active_bases[i] == basis_out) index = i;
1248       }
1249       if (index == -1) {
1250         CeedElemRestriction elem_rstr_out;
1251 
1252         index = num_active_bases;
1253         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1254         (*data)->active_bases[num_active_bases] = NULL;
1255         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases]));
1256         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1257         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1258         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1259         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases]));
1260         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1261         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1262         num_eval_modes_in[index]  = 0;
1263         num_eval_modes_out[index] = 0;
1264         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1265         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1266         eval_modes_in[index]  = NULL;
1267         eval_modes_out[index] = NULL;
1268         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1269         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1270         eval_mode_offsets_in[index]  = NULL;
1271         eval_mode_offsets_out[index] = NULL;
1272         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1273         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1274         (*data)->assembled_bases_in[index]  = NULL;
1275         (*data)->assembled_bases_out[index] = NULL;
1276         num_active_bases++;
1277       }
1278       switch (eval_mode) {
1279         case CEED_EVAL_NONE:
1280         case CEED_EVAL_INTERP:
1281           CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_modes_out[index]));
1282           CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_mode_offsets_out[index]));
1283           eval_modes_out[index][num_eval_modes_out[index]]        = eval_mode;
1284           eval_mode_offsets_out[index][num_eval_modes_out[index]] = offset;
1285           offset += num_components;
1286           num_eval_modes_out[index] += 1;
1287           break;
1288         case CEED_EVAL_GRAD:
1289           CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_modes_out[index]));
1290           CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_mode_offsets_out[index]));
1291           for (CeedInt d = 0; d < dim; d++) {
1292             eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1293             eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1294             offset += num_components;
1295           }
1296           num_eval_modes_out[index] += dim;
1297           break;
1298         case CEED_EVAL_WEIGHT:
1299         case CEED_EVAL_DIV:
1300         case CEED_EVAL_CURL:
1301           break;  // Caught by QF Assembly
1302       }
1303     }
1304   }
1305   (*data)->num_output_components = offset;
1306   (*data)->num_eval_modes_out    = num_eval_modes_out;
1307   (*data)->eval_modes_out        = eval_modes_out;
1308   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1309   (*data)->num_active_bases      = num_active_bases;
1310 
1311   return CEED_ERROR_SUCCESS;
1312 }
1313 
1314 /**
1315   @brief Get CeedOperator CeedEvalModes for assembly.
1316 
1317     Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1318 
1319   @param[in]  data                  CeedOperatorAssemblyData
1320   @param[out] num_active_bases      Total number of active bases
1321   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1322                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1323   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1324   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1325   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1326   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1327   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1328   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1329                                       including contributions of all active bases
1330 
1331   @return An error code: 0 - success, otherwise - failure
1332 
1333 
1334   @ref Backend
1335 **/
1336 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in,
1337                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out,
1338                                          const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) {
1339   if (num_active_bases) *num_active_bases = data->num_active_bases;
1340   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1341   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1342   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1343   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1344   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1345   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1346   if (num_output_components) *num_output_components = data->num_output_components;
1347 
1348   return CEED_ERROR_SUCCESS;
1349 }
1350 
1351 /**
1352   @brief Get CeedOperator CeedBasis data for assembly.
1353 
1354     Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1355 
1356   @param[in]  data                CeedOperatorAssemblyData
1357   @param[out] num_active_bases    Number of active bases, or NULL
1358   @param[out] active_bases        Pointer to hold active CeedBasis, or NULL
1359   @param[out] assembled_bases_in  Pointer to hold assembled active input B, or NULL
1360   @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL
1361 
1362   @return An error code: 0 - success, otherwise - failure
1363 
1364   @ref Backend
1365 **/
1366 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases,
1367                                      const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) {
1368   // Assemble B_in, B_out if needed
1369   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1370     CeedInt num_qpts;
1371 
1372     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1373     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1374       CeedInt           elem_size;
1375       CeedScalar       *B_in = NULL, *identity = NULL;
1376       const CeedScalar *interp_in, *grad_in;
1377       bool              has_eval_none = false;
1378 
1379       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size));
1380       CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_in[b], &B_in));
1381 
1382       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1383         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1384       }
1385       if (has_eval_none) {
1386         CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1387         for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1388           identity[i * elem_size + i] = 1.0;
1389         }
1390       }
1391       CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_in));
1392       CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_in));
1393 
1394       for (CeedInt q = 0; q < num_qpts; q++) {
1395         for (CeedInt n = 0; n < elem_size; n++) {
1396           CeedInt d_in = -1;
1397           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1398             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1399             const CeedScalar *B  = NULL;
1400 
1401             if (data->eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in++;
1402             CeedOperatorGetBasisPointer(data->eval_modes_in[b][e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &B);
1403             B_in[(qq + e_in) * elem_size + n] = B[q * elem_size + n];
1404           }
1405         }
1406       }
1407       if (identity) CeedCall(CeedFree(identity));
1408       data->assembled_bases_in[b] = B_in;
1409     }
1410   }
1411 
1412   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1413     CeedInt num_qpts;
1414 
1415     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1416     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1417       CeedInt           elem_size;
1418       const CeedScalar *interp_out, *grad_out;
1419       bool              has_eval_none = false;
1420       CeedScalar       *B_out = NULL, *identity = NULL;
1421 
1422       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size));
1423       CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_out[b], &B_out));
1424 
1425       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1426         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1427       }
1428       if (has_eval_none) {
1429         CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1430         for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1431           identity[i * elem_size + i] = 1.0;
1432         }
1433       }
1434       CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_out));
1435       CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_out));
1436 
1437       for (CeedInt q = 0; q < num_qpts; q++) {
1438         for (CeedInt n = 0; n < elem_size; n++) {
1439           CeedInt d_out = -1;
1440           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1441             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1442             const CeedScalar *B  = NULL;
1443 
1444             if (data->eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out++;
1445             CeedOperatorGetBasisPointer(data->eval_modes_out[b][e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &B);
1446             B_out[(qq + e_out) * elem_size + n] = B[q * elem_size + n];
1447           }
1448         }
1449       }
1450       if (identity) CeedCall(CeedFree(identity));
1451       data->assembled_bases_out[b] = B_out;
1452     }
1453   }
1454 
1455   // Pass out assembled data
1456   if (active_bases) *active_bases = data->active_bases;
1457   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1458   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1459 
1460   return CEED_ERROR_SUCCESS;
1461 }
1462 
1463 /**
1464   @brief Get CeedOperator CeedBasis data for assembly.
1465 
1466   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1467 
1468   @param[in]  data                  CeedOperatorAssemblyData
1469   @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL
1470   @param[out] active_elem_rstrs     Pointer to hold active CeedElemRestrictions, or NULL
1471 
1472   @return An error code: 0 - success, otherwise - failure
1473 
1474   @ref Backend
1475 **/
1476 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs,
1477                                                 CeedElemRestriction **active_elem_rstrs) {
1478   if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases;
1479   if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs;
1480 
1481   return CEED_ERROR_SUCCESS;
1482 }
1483 
1484 /**
1485   @brief Destroy CeedOperatorAssemblyData
1486 
1487   @param[in,out] data CeedOperatorAssemblyData to destroy
1488 
1489   @return An error code: 0 - success, otherwise - failure
1490 
1491   @ref Backend
1492 **/
1493 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1494   if (!*data) return CEED_ERROR_SUCCESS;
1495 
1496   CeedCall(CeedDestroy(&(*data)->ceed));
1497   for (CeedInt b = 0; b < (*data)->num_active_bases; b++) {
1498     CeedCall(CeedBasisDestroy(&(*data)->active_bases[b]));
1499     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b]));
1500     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1501     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1502     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1503     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1504     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1505     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1506   }
1507   CeedCall(CeedFree(&(*data)->active_bases));
1508   CeedCall(CeedFree(&(*data)->active_elem_rstrs));
1509   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1510   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1511   CeedCall(CeedFree(&(*data)->eval_modes_in));
1512   CeedCall(CeedFree(&(*data)->eval_modes_out));
1513   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1514   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1515   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1516   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1517 
1518   CeedCall(CeedFree(data));
1519   return CEED_ERROR_SUCCESS;
1520 }
1521 
1522 /// @}
1523 
1524 /// ----------------------------------------------------------------------------
1525 /// CeedOperator Public API
1526 /// ----------------------------------------------------------------------------
1527 /// @addtogroup CeedOperatorUser
1528 /// @{
1529 
1530 /**
1531   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1532 
1533   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1534     The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices
1535 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the
1536 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1537 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1538 and producing the output [dv_0, dv_1, v].
1539 
1540   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1541 
1542   @param[in]  op        CeedOperator to assemble CeedQFunction
1543   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1544   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1545   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1546 
1547   @return An error code: 0 - success, otherwise - failure
1548 
1549   @ref User
1550 **/
1551 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1552   CeedCall(CeedOperatorCheckReady(op));
1553 
1554   if (op->LinearAssembleQFunction) {
1555     // Backend version
1556     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1557   } else {
1558     // Operator fallback
1559     CeedOperator op_fallback;
1560 
1561     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1562     if (op_fallback) {
1563       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1564     } else {
1565       // LCOV_EXCL_START
1566       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1567       // LCOV_EXCL_STOP
1568     }
1569   }
1570   return CEED_ERROR_SUCCESS;
1571 }
1572 
1573 /**
1574   @brief Assemble CeedQFunction and store result internally.
1575            Return copied references of stored data to the caller.
1576            Caller is responsible for ownership and destruction of the copied references.
1577            See also @ref CeedOperatorLinearAssembleQFunction
1578 
1579   @param[in]  op        CeedOperator to assemble CeedQFunction
1580   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1581   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1582   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1583 
1584   @return An error code: 0 - success, otherwise - failure
1585 
1586   @ref User
1587 **/
1588 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1589   CeedCall(CeedOperatorCheckReady(op));
1590 
1591   if (op->LinearAssembleQFunctionUpdate) {
1592     // Backend version
1593     bool                qf_assembled_is_setup;
1594     CeedVector          assembled_vec  = NULL;
1595     CeedElemRestriction assembled_rstr = NULL;
1596 
1597     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1598     if (qf_assembled_is_setup) {
1599       bool update_needed;
1600 
1601       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1602       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1603       if (update_needed) {
1604         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1605       }
1606     } else {
1607       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1608       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1609     }
1610     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1611 
1612     // Copy reference from internally held copy
1613     *assembled = NULL;
1614     *rstr      = NULL;
1615     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1616     CeedCall(CeedVectorDestroy(&assembled_vec));
1617     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1618     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1619   } else {
1620     // Operator fallback
1621     CeedOperator op_fallback;
1622 
1623     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1624     if (op_fallback) {
1625       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1626     } else {
1627       // LCOV_EXCL_START
1628       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1629       // LCOV_EXCL_STOP
1630     }
1631   }
1632 
1633   return CEED_ERROR_SUCCESS;
1634 }
1635 
1636 /**
1637   @brief Assemble the diagonal of a square linear CeedOperator
1638 
1639   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1640 
1641   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1642 
1643   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1644 
1645   @param[in]  op        CeedOperator to assemble CeedQFunction
1646   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1647   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1648 
1649   @return An error code: 0 - success, otherwise - failure
1650 
1651   @ref User
1652 **/
1653 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1654   bool is_composite;
1655   CeedCall(CeedOperatorCheckReady(op));
1656   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1657 
1658   CeedSize input_size = 0, output_size = 0;
1659   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1660   if (input_size != output_size) {
1661     // LCOV_EXCL_START
1662     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1663     // LCOV_EXCL_STOP
1664   }
1665 
1666   // Early exit for empty operator
1667   if (!is_composite) {
1668     CeedInt num_elem = 0;
1669 
1670     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1671     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1672   }
1673 
1674   if (op->LinearAssembleDiagonal) {
1675     // Backend version
1676     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1677     return CEED_ERROR_SUCCESS;
1678   } else if (op->LinearAssembleAddDiagonal) {
1679     // Backend version with zeroing first
1680     CeedCall(CeedVectorSetValue(assembled, 0.0));
1681     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1682     return CEED_ERROR_SUCCESS;
1683   } else {
1684     // Operator fallback
1685     CeedOperator op_fallback;
1686 
1687     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1688     if (op_fallback) {
1689       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1690       return CEED_ERROR_SUCCESS;
1691     }
1692   }
1693   // Default interface implementation
1694   CeedCall(CeedVectorSetValue(assembled, 0.0));
1695   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1696 
1697   return CEED_ERROR_SUCCESS;
1698 }
1699 
1700 /**
1701   @brief Assemble the diagonal of a square linear CeedOperator
1702 
1703   This sums into a CeedVector the diagonal of a linear CeedOperator.
1704 
1705   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1706 
1707   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1708 
1709   @param[in]  op        CeedOperator to assemble CeedQFunction
1710   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1711   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1712 
1713   @return An error code: 0 - success, otherwise - failure
1714 
1715   @ref User
1716 **/
1717 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1718   bool is_composite;
1719   CeedCall(CeedOperatorCheckReady(op));
1720   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1721 
1722   CeedSize input_size = 0, output_size = 0;
1723   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1724   if (input_size != output_size) {
1725     // LCOV_EXCL_START
1726     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1727     // LCOV_EXCL_STOP
1728   }
1729 
1730   // Early exit for empty operator
1731   if (!is_composite) {
1732     CeedInt num_elem = 0;
1733 
1734     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1735     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1736   }
1737 
1738   if (op->LinearAssembleAddDiagonal) {
1739     // Backend version
1740     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1741     return CEED_ERROR_SUCCESS;
1742   } else {
1743     // Operator fallback
1744     CeedOperator op_fallback;
1745 
1746     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1747     if (op_fallback) {
1748       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1749       return CEED_ERROR_SUCCESS;
1750     }
1751   }
1752   // Default interface implementation
1753   if (is_composite) {
1754     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1755   } else {
1756     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1757   }
1758 
1759   return CEED_ERROR_SUCCESS;
1760 }
1761 
1762 /**
1763   @brief Assemble the point block diagonal of a square linear CeedOperator
1764 
1765   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1766 
1767   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1768 
1769   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1770 
1771   @param[in]  op        CeedOperator to assemble CeedQFunction
1772   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1773 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1774 component in].
1775   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1776 
1777   @return An error code: 0 - success, otherwise - failure
1778 
1779   @ref User
1780 **/
1781 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1782   bool is_composite;
1783   CeedCall(CeedOperatorCheckReady(op));
1784   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1785 
1786   CeedSize input_size = 0, output_size = 0;
1787   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1788   if (input_size != output_size) {
1789     // LCOV_EXCL_START
1790     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1791     // LCOV_EXCL_STOP
1792   }
1793 
1794   // Early exit for empty operator
1795   if (!is_composite) {
1796     CeedInt num_elem = 0;
1797 
1798     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1799     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1800   }
1801 
1802   if (op->LinearAssemblePointBlockDiagonal) {
1803     // Backend version
1804     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1805     return CEED_ERROR_SUCCESS;
1806   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1807     // Backend version with zeroing first
1808     CeedCall(CeedVectorSetValue(assembled, 0.0));
1809     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1810     return CEED_ERROR_SUCCESS;
1811   } else {
1812     // Operator fallback
1813     CeedOperator op_fallback;
1814 
1815     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1816     if (op_fallback) {
1817       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1818       return CEED_ERROR_SUCCESS;
1819     }
1820   }
1821   // Default interface implementation
1822   CeedCall(CeedVectorSetValue(assembled, 0.0));
1823   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1824 
1825   return CEED_ERROR_SUCCESS;
1826 }
1827 
1828 /**
1829   @brief Assemble the point block diagonal of a square linear CeedOperator
1830 
1831   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1832 
1833   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1834 
1835   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1836 
1837   @param[in]  op        CeedOperator to assemble CeedQFunction
1838   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1839 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1840 component in].
1841   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1842 
1843   @return An error code: 0 - success, otherwise - failure
1844 
1845   @ref User
1846 **/
1847 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1848   bool is_composite;
1849   CeedCall(CeedOperatorCheckReady(op));
1850   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1851 
1852   CeedSize input_size = 0, output_size = 0;
1853   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1854   if (input_size != output_size) {
1855     // LCOV_EXCL_START
1856     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1857     // LCOV_EXCL_STOP
1858   }
1859 
1860   // Early exit for empty operator
1861   if (!is_composite) {
1862     CeedInt num_elem = 0;
1863 
1864     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1865     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1866   }
1867 
1868   if (op->LinearAssembleAddPointBlockDiagonal) {
1869     // Backend version
1870     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1871     return CEED_ERROR_SUCCESS;
1872   } else {
1873     // Operator fallback
1874     CeedOperator op_fallback;
1875 
1876     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1877     if (op_fallback) {
1878       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1879       return CEED_ERROR_SUCCESS;
1880     }
1881   }
1882   // Default interface implementation
1883   if (is_composite) {
1884     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1885   } else {
1886     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1887   }
1888 
1889   return CEED_ERROR_SUCCESS;
1890 }
1891 
1892 /**
1893    @brief Fully assemble the nonzero pattern of a linear operator.
1894 
1895    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1896 
1897    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1898 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1899 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1900 
1901    This will generally be slow unless your operator is low-order.
1902 
1903    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1904 
1905    @param[in]  op          CeedOperator to assemble
1906    @param[out] num_entries Number of entries in coordinate nonzero pattern
1907    @param[out] rows        Row number for each entry
1908    @param[out] cols        Column number for each entry
1909 
1910    @ref User
1911 **/
1912 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1913   CeedInt       num_suboperators, single_entries;
1914   CeedOperator *sub_operators;
1915   bool          is_composite;
1916   CeedCall(CeedOperatorCheckReady(op));
1917   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1918 
1919   if (op->LinearAssembleSymbolic) {
1920     // Backend version
1921     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1922     return CEED_ERROR_SUCCESS;
1923   } else {
1924     // Operator fallback
1925     CeedOperator op_fallback;
1926 
1927     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1928     if (op_fallback) {
1929       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1930       return CEED_ERROR_SUCCESS;
1931     }
1932   }
1933 
1934   // Default interface implementation
1935 
1936   // count entries and allocate rows, cols arrays
1937   *num_entries = 0;
1938   if (is_composite) {
1939     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1940     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1941     for (CeedInt k = 0; k < num_suboperators; ++k) {
1942       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1943       *num_entries += single_entries;
1944     }
1945   } else {
1946     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1947     *num_entries += single_entries;
1948   }
1949   CeedCall(CeedCalloc(*num_entries, rows));
1950   CeedCall(CeedCalloc(*num_entries, cols));
1951 
1952   // assemble nonzero locations
1953   CeedInt offset = 0;
1954   if (is_composite) {
1955     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1956     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1957     for (CeedInt k = 0; k < num_suboperators; ++k) {
1958       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1959       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1960       offset += single_entries;
1961     }
1962   } else {
1963     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1964   }
1965 
1966   return CEED_ERROR_SUCCESS;
1967 }
1968 
1969 /**
1970    @brief Fully assemble the nonzero entries of a linear operator.
1971 
1972    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1973 
1974    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1975 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1976 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1977 
1978    This will generally be slow unless your operator is low-order.
1979 
1980    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1981 
1982    @param[in]  op     CeedOperator to assemble
1983    @param[out] values Values to assemble into matrix
1984 
1985    @ref User
1986 **/
1987 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1988   CeedInt       num_suboperators, single_entries = 0;
1989   CeedOperator *sub_operators;
1990   bool          is_composite;
1991   CeedCall(CeedOperatorCheckReady(op));
1992   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1993 
1994   // Early exit for empty operator
1995   if (!is_composite) {
1996     CeedInt num_elem = 0;
1997 
1998     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1999     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2000   }
2001 
2002   if (op->LinearAssemble) {
2003     // Backend version
2004     CeedCall(op->LinearAssemble(op, values));
2005     return CEED_ERROR_SUCCESS;
2006   } else {
2007     // Operator fallback
2008     CeedOperator op_fallback;
2009 
2010     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2011     if (op_fallback) {
2012       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2013       return CEED_ERROR_SUCCESS;
2014     }
2015   }
2016 
2017   // Default interface implementation
2018   CeedInt offset = 0;
2019   CeedCall(CeedVectorSetValue(values, 0.0));
2020   if (is_composite) {
2021     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2022     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2023     for (CeedInt k = 0; k < num_suboperators; k++) {
2024       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2025       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2026       offset += single_entries;
2027     }
2028   } else {
2029     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2030   }
2031 
2032   return CEED_ERROR_SUCCESS;
2033 }
2034 
2035 /**
2036   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2037 
2038   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2039 
2040   @param[in]  op               Composite CeedOperator
2041   @param[in]  num_skip_indices Number of suboperators to skip
2042   @param[in]  skip_indices     Array of indices of suboperators to skip
2043   @param[out] mult             Vector to store multiplicity (of size l_size)
2044 
2045   @return An error code: 0 - success, otherwise - failure
2046 
2047   @ref User
2048 **/
2049 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2050   CeedCall(CeedOperatorCheckReady(op));
2051 
2052   Ceed                ceed;
2053   CeedInt             num_suboperators;
2054   CeedSize            l_vec_len;
2055   CeedScalar         *mult_array;
2056   CeedVector          ones_l_vec;
2057   CeedElemRestriction elem_rstr;
2058   CeedOperator       *sub_operators;
2059 
2060   CeedCall(CeedOperatorGetCeed(op, &ceed));
2061 
2062   // Zero mult vector
2063   CeedCall(CeedVectorSetValue(mult, 0.0));
2064 
2065   // Get suboperators
2066   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2067   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2068   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2069 
2070   // Work vector
2071   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2072   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2073   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2074   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2075 
2076   // Compute multiplicity across suboperators
2077   for (CeedInt i = 0; i < num_suboperators; i++) {
2078     const CeedScalar *sub_mult_array;
2079     CeedVector        sub_mult_l_vec, ones_e_vec;
2080 
2081     // -- Check for suboperator to skip
2082     for (CeedInt j = 0; j < num_skip_indices; j++) {
2083       if (skip_indices[j] == i) continue;
2084     }
2085 
2086     // -- Sub operator multiplicity
2087     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2088     CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2089     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2090     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2091     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2092     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2093     // ---- Flag every node present in the current suboperator
2094     for (CeedInt j = 0; j < l_vec_len; j++) {
2095       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2096     }
2097     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2098     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2099     CeedCall(CeedVectorDestroy(&ones_e_vec));
2100   }
2101   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2102   CeedCall(CeedVectorDestroy(&ones_l_vec));
2103 
2104   return CEED_ERROR_SUCCESS;
2105 }
2106 
2107 /**
2108   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2109 grid interpolation
2110 
2111   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2112 
2113   @param[in]  op_fine      Fine grid operator
2114   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2115   @param[in]  rstr_coarse  Coarse grid restriction
2116   @param[in]  basis_coarse Coarse grid active vector basis
2117   @param[out] op_coarse    Coarse grid operator
2118   @param[out] op_prolong   Coarse to fine operator, or NULL
2119   @param[out] op_restrict  Fine to coarse operator, or NULL
2120 
2121   @return An error code: 0 - success, otherwise - failure
2122 
2123   @ref User
2124 **/
2125 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2126                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2127   CeedCall(CeedOperatorCheckReady(op_fine));
2128 
2129   // Build prolongation matrix, if required
2130   CeedBasis basis_c_to_f = NULL;
2131   if (op_prolong || op_restrict) {
2132     CeedBasis basis_fine;
2133     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2134     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2135   }
2136 
2137   // Core code
2138   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2139 
2140   return CEED_ERROR_SUCCESS;
2141 }
2142 
2143 /**
2144   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2145 
2146   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2147 
2148   @param[in]  op_fine       Fine grid operator
2149   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2150   @param[in]  rstr_coarse   Coarse grid restriction
2151   @param[in]  basis_coarse  Coarse grid active vector basis
2152   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2153   @param[out] op_coarse     Coarse grid operator
2154   @param[out] op_prolong    Coarse to fine operator, or NULL
2155   @param[out] op_restrict   Fine to coarse operator, or NULL
2156 
2157   @return An error code: 0 - success, otherwise - failure
2158 
2159   @ref User
2160 **/
2161 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2162                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2163                                              CeedOperator *op_restrict) {
2164   CeedCall(CeedOperatorCheckReady(op_fine));
2165   Ceed ceed;
2166   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2167 
2168   // Check for compatible quadrature spaces
2169   CeedBasis basis_fine;
2170   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2171   CeedInt Q_f, Q_c;
2172   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2173   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2174   if (Q_f != Q_c) {
2175     // LCOV_EXCL_START
2176     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2177     // LCOV_EXCL_STOP
2178   }
2179 
2180   // Create coarse to fine basis, if required
2181   CeedBasis basis_c_to_f = NULL;
2182   if (op_prolong || op_restrict) {
2183     // Check if interpolation matrix is provided
2184     if (!interp_c_to_f) {
2185       // LCOV_EXCL_START
2186       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2187       // LCOV_EXCL_STOP
2188     }
2189     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2190     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2191     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2192     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2193     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2194     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2195     CeedScalar *q_ref, *q_weight, *grad;
2196     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2197     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2198     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2199     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2200     CeedCall(CeedFree(&q_ref));
2201     CeedCall(CeedFree(&q_weight));
2202     CeedCall(CeedFree(&grad));
2203   }
2204 
2205   // Core code
2206   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2207   return CEED_ERROR_SUCCESS;
2208 }
2209 
2210 /**
2211   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2212 
2213   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2214 
2215   @param[in]  op_fine       Fine grid operator
2216   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2217   @param[in]  rstr_coarse   Coarse grid restriction
2218   @param[in]  basis_coarse  Coarse grid active vector basis
2219   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2220   @param[out] op_coarse     Coarse grid operator
2221   @param[out] op_prolong    Coarse to fine operator, or NULL
2222   @param[out] op_restrict   Fine to coarse operator, or NULL
2223 
2224   @return An error code: 0 - success, otherwise - failure
2225 
2226   @ref User
2227 **/
2228 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2229                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2230                                        CeedOperator *op_restrict) {
2231   CeedCall(CeedOperatorCheckReady(op_fine));
2232   Ceed ceed;
2233   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2234 
2235   // Check for compatible quadrature spaces
2236   CeedBasis basis_fine;
2237   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2238   CeedInt Q_f, Q_c;
2239   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2240   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2241   if (Q_f != Q_c) {
2242     // LCOV_EXCL_START
2243     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2244     // LCOV_EXCL_STOP
2245   }
2246 
2247   // Coarse to fine basis
2248   CeedBasis basis_c_to_f = NULL;
2249   if (op_prolong || op_restrict) {
2250     // Check if interpolation matrix is provided
2251     if (!interp_c_to_f) {
2252       // LCOV_EXCL_START
2253       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2254       // LCOV_EXCL_STOP
2255     }
2256     CeedElemTopology topo;
2257     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2258     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2259     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2260     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2261     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2262     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2263     CeedScalar *q_ref, *q_weight, *grad;
2264     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2265     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2266     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2267     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2268     CeedCall(CeedFree(&q_ref));
2269     CeedCall(CeedFree(&q_weight));
2270     CeedCall(CeedFree(&grad));
2271   }
2272 
2273   // Core code
2274   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2275   return CEED_ERROR_SUCCESS;
2276 }
2277 
2278 /**
2279   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2280 
2281   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2282     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V.
2283     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T
2284 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
2285 
2286   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2287 
2288   @param[in]  op      CeedOperator to create element inverses
2289   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2290   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2291 
2292   @return An error code: 0 - success, otherwise - failure
2293 
2294   @ref User
2295 **/
2296 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2297   CeedCall(CeedOperatorCheckReady(op));
2298 
2299   if (op->CreateFDMElementInverse) {
2300     // Backend version
2301     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2302     return CEED_ERROR_SUCCESS;
2303   } else {
2304     // Operator fallback
2305     CeedOperator op_fallback;
2306 
2307     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2308     if (op_fallback) {
2309       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2310       return CEED_ERROR_SUCCESS;
2311     }
2312   }
2313 
2314   // Default interface implementation
2315   Ceed ceed, ceed_parent;
2316   CeedCall(CeedOperatorGetCeed(op, &ceed));
2317   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2318   ceed_parent = ceed_parent ? ceed_parent : ceed;
2319   CeedQFunction qf;
2320   CeedCall(CeedOperatorGetQFunction(op, &qf));
2321 
2322   // Determine active input basis
2323   bool                interp = false, grad = false;
2324   CeedBasis           basis = NULL;
2325   CeedElemRestriction rstr  = NULL;
2326   CeedOperatorField  *op_fields;
2327   CeedQFunctionField *qf_fields;
2328   CeedInt             num_input_fields;
2329   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2330   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2331   for (CeedInt i = 0; i < num_input_fields; i++) {
2332     CeedVector vec;
2333     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2334     if (vec == CEED_VECTOR_ACTIVE) {
2335       CeedEvalMode eval_mode;
2336       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2337       interp = interp || eval_mode == CEED_EVAL_INTERP;
2338       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2339       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2340       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2341     }
2342   }
2343   if (!basis) {
2344     // LCOV_EXCL_START
2345     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2346     // LCOV_EXCL_STOP
2347   }
2348   CeedSize l_size = 1;
2349   CeedInt  P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2350   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2351   CeedCall(CeedBasisGetNumNodes(basis, &elem_size));
2352   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2353   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2354   CeedCall(CeedBasisGetDimension(basis, &dim));
2355   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2356   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2357   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2358 
2359   // Build and diagonalize 1D Mass and Laplacian
2360   bool tensor_basis;
2361   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2362   if (!tensor_basis) {
2363     // LCOV_EXCL_START
2364     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2365     // LCOV_EXCL_STOP
2366   }
2367   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2368   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2369   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2370   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2371   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2372   CeedCall(CeedCalloc(P_1d, &lambda));
2373   // -- Build matrices
2374   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2375   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2376   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2377   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2378   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2379 
2380   // -- Diagonalize
2381   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2382   CeedCall(CeedFree(&mass));
2383   CeedCall(CeedFree(&laplace));
2384   for (CeedInt i = 0; i < P_1d; i++) {
2385     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2386   }
2387   CeedCall(CeedFree(&x));
2388 
2389   // Assemble QFunction
2390   CeedVector          assembled;
2391   CeedElemRestriction rstr_qf;
2392   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2393   CeedInt layout[3];
2394   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2395   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2396   CeedScalar max_norm = 0;
2397   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2398 
2399   // Calculate element averages
2400   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2401   CeedScalar       *elem_avg;
2402   const CeedScalar *assembled_array, *q_weight_array;
2403   CeedVector        q_weight;
2404   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2405   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2406   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2407   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2408   CeedCall(CeedCalloc(num_elem, &elem_avg));
2409   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2410   for (CeedInt e = 0; e < num_elem; e++) {
2411     CeedInt count = 0;
2412     for (CeedInt q = 0; q < num_qpts; q++) {
2413       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2414         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2415           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2416           count++;
2417         }
2418       }
2419     }
2420     if (count) {
2421       elem_avg[e] /= count;
2422     } else {
2423       elem_avg[e] = 1.0;
2424     }
2425   }
2426   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2427   CeedCall(CeedVectorDestroy(&assembled));
2428   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2429   CeedCall(CeedVectorDestroy(&q_weight));
2430 
2431   // Build FDM diagonal
2432   CeedVector  q_data;
2433   CeedScalar *q_data_array, *fdm_diagonal;
2434   CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal));
2435   const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON;
2436   for (CeedInt c = 0; c < num_comp; c++) {
2437     for (CeedInt n = 0; n < elem_size; n++) {
2438       if (interp) fdm_diagonal[c * elem_size + n] = 1.0;
2439       if (grad) {
2440         for (CeedInt d = 0; d < dim; d++) {
2441           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2442           fdm_diagonal[c * elem_size + n] += lambda[i];
2443         }
2444       }
2445       if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound;
2446     }
2447   }
2448   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data));
2449   CeedCall(CeedVectorSetValue(q_data, 0.0));
2450   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2451   for (CeedInt e = 0; e < num_elem; e++) {
2452     for (CeedInt c = 0; c < num_comp; c++) {
2453       for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]);
2454     }
2455   }
2456   CeedCall(CeedFree(&elem_avg));
2457   CeedCall(CeedFree(&fdm_diagonal));
2458   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2459 
2460   // Setup FDM operator
2461   // -- Basis
2462   CeedBasis   fdm_basis;
2463   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2464   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2465   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2466   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2467   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2468   CeedCall(CeedFree(&fdm_interp));
2469   CeedCall(CeedFree(&grad_dummy));
2470   CeedCall(CeedFree(&q_ref_dummy));
2471   CeedCall(CeedFree(&q_weight_dummy));
2472   CeedCall(CeedFree(&lambda));
2473 
2474   // -- Restriction
2475   CeedElemRestriction rstr_qd_i;
2476   CeedInt             strides[3] = {1, elem_size, elem_size * num_comp};
2477   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i));
2478   // -- QFunction
2479   CeedQFunction qf_fdm;
2480   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2481   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2482   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2483   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2484   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2485   // -- QFunction context
2486   CeedInt *num_comp_data;
2487   CeedCall(CeedCalloc(1, &num_comp_data));
2488   num_comp_data[0] = num_comp;
2489   CeedQFunctionContext ctx_fdm;
2490   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2491   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2492   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2493   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2494   // -- Operator
2495   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2496   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2497   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2498   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2499 
2500   // Cleanup
2501   CeedCall(CeedVectorDestroy(&q_data));
2502   CeedCall(CeedBasisDestroy(&fdm_basis));
2503   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2504   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2505 
2506   return CEED_ERROR_SUCCESS;
2507 }
2508 
2509 /// @}
2510