xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision 83065cc9fcccd70414a6c051e3462f3c3dae8b75)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed/backend.h>
11 #include <ceed/ceed.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  eval_mode Current basis evaluation mode
184   @param[in]  identity  Pointer to identity matrix
185   @param[in]  interp    Pointer to interpolation matrix
186   @param[in]  grad      Pointer to gradient matrix
187   @param[out] basis_ptr Basis pointer to set
188 
189   @ref Developer
190 **/
191 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad,
192                                                const CeedScalar **basis_ptr) {
193   switch (eval_mode) {
194     case CEED_EVAL_NONE:
195       *basis_ptr = identity;
196       break;
197     case CEED_EVAL_INTERP:
198       *basis_ptr = interp;
199       break;
200     case CEED_EVAL_GRAD:
201       *basis_ptr = grad;
202       break;
203     case CEED_EVAL_WEIGHT:
204     case CEED_EVAL_DIV:
205     case CEED_EVAL_CURL:
206       break;  // Caught by QF Assembly
207   }
208   assert(*basis_ptr != NULL);
209 }
210 
211 /**
212   @brief Create point block restriction for active operator field
213 
214   @param[in]  rstr            Original CeedElemRestriction for active field
215   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
216 
217   @return An error code: 0 - success, otherwise - failure
218 
219   @ref Developer
220 **/
221 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
222   Ceed ceed;
223   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
224   const CeedInt *offsets;
225   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
226 
227   // Expand offsets
228   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
229   CeedSize l_size;
230   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
231   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
232   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
233   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
234   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
235   CeedInt shift = num_comp;
236   if (comp_stride != 1) shift *= num_comp;
237   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
238   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
239     pointblock_offsets[i] = offsets[i] * shift;
240   }
241 
242   // Create new restriction
243   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
244                                      pointblock_offsets, pointblock_rstr));
245 
246   // Cleanup
247   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
248 
249   return CEED_ERROR_SUCCESS;
250 }
251 
252 /**
253   @brief Core logic for assembling operator diagonal or point block diagonal
254 
255   @param[in]  op            CeedOperator to assemble point block diagonal
256   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
257   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
258   @param[out] assembled     CeedVector to store assembled diagonal
259 
260   @return An error code: 0 - success, otherwise - failure
261 
262   @ref Developer
263 **/
264 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
265   Ceed ceed;
266   CeedCall(CeedOperatorGetCeed(op, &ceed));
267 
268   // Assemble QFunction
269   CeedQFunction       qf;
270   const CeedScalar   *assembled_qf_array;
271   CeedVector          assembled_qf;
272   CeedElemRestriction assembled_elem_rstr;
273   CeedInt             num_input_fields, num_output_fields;
274   CeedInt             layout[3];
275 
276   CeedCall(CeedOperatorGetQFunction(op, &qf));
277   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
278   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
279   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout));
280   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
281   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
282 
283   // Get assembly data
284   CeedOperatorAssemblyData data;
285   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
286   CeedInt                 *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
287   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
288   CeedBasis               *active_bases;
289   CeedElemRestriction     *active_elem_rstrs;
290 
291   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
292   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
293                                                 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components));
294   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL));
295   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs));
296 
297   // Loop over all active bases
298   for (CeedInt b = 0; b < num_active_bases; b++) {
299     // Assemble point block diagonal restriction, if needed
300     CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b];
301 
302     if (is_pointblock) {
303       CeedElemRestriction point_block_elem_rstr;
304 
305       CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr));
306       diag_elem_rstr = point_block_elem_rstr;
307     }
308 
309     // Create diagonal vector
310     CeedVector elem_diag;
311     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
312 
313     // Assemble element operator diagonals
314     CeedScalar *elem_diag_array;
315     CeedInt     num_elem, num_nodes, num_qpts, num_components;
316 
317     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
318     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
319     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
320     CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes));
321     CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components));
322     CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts));
323 
324     // Basis matrices
325     const CeedScalar *interp, *grad;
326     CeedScalar       *identity      = NULL;
327     bool              has_eval_none = false;
328     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
329       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
330     }
331     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
332       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
333     }
334     if (has_eval_none) {
335       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
336       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
337     }
338     CeedCall(CeedBasisGetInterp(active_bases[b], &interp));
339     CeedCall(CeedBasisGetGrad(active_bases[b], &grad));
340     // Compute the diagonal of B^T D B
341     // Each element
342     for (CeedInt e = 0; e < num_elem; e++) {
343       CeedInt d_out = -1;
344       // Each basis eval mode pair
345       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
346         const CeedScalar *B_t = NULL;
347         if (eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out += 1;
348         CeedOperatorGetBasisPointer(eval_modes_out[b][e_out], identity, interp, &grad[d_out * num_qpts * num_nodes], &B_t);
349         CeedInt d_in = -1;
350         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
351           const CeedScalar *B = NULL;
352           if (eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in += 1;
353           CeedOperatorGetBasisPointer(eval_modes_in[b][e_in], identity, interp, &grad[d_in * num_qpts * num_nodes], &B);
354           // Each component
355           for (CeedInt c_out = 0; c_out < num_components; c_out++) {
356             // Each qpt/node pair
357             for (CeedInt q = 0; q < num_qpts; q++) {
358               if (is_pointblock) {
359                 // Point Block Diagonal
360                 for (CeedInt c_in = 0; c_in < num_components; c_in++) {
361                   const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
362                   const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
363                   for (CeedInt n = 0; n < num_nodes; n++) {
364                     elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] +=
365                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
366                   }
367                 }
368               } else {
369                 // Diagonal Only
370                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
371                 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
372                 for (CeedInt n = 0; n < num_nodes; n++) {
373                   elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
374                 }
375               }
376             }
377           }
378         }
379       }
380     }
381     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
382 
383     // Assemble local operator diagonal
384     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
385 
386     // Cleanup
387     if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
388     CeedCall(CeedVectorDestroy(&elem_diag));
389     CeedCall(CeedFree(&identity));
390   }
391   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
392   CeedCall(CeedVectorDestroy(&assembled_qf));
393 
394   return CEED_ERROR_SUCCESS;
395 }
396 
397 /**
398   @brief Core logic for assembling composite operator diagonal
399 
400   @param[in]  op            CeedOperator to assemble point block diagonal
401   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
402   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
403   @param[out] assembled     CeedVector to store assembled diagonal
404 
405   @return An error code: 0 - success, otherwise - failure
406 
407   @ref Developer
408 **/
409 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
410                                                                  CeedVector assembled) {
411   CeedInt       num_sub;
412   CeedOperator *suboperators;
413   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
414   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
415   for (CeedInt i = 0; i < num_sub; i++) {
416     if (is_pointblock) {
417       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
418     } else {
419       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
420     }
421   }
422   return CEED_ERROR_SUCCESS;
423 }
424 
425 /**
426   @brief Build nonzero pattern for non-composite operator
427 
428   Users should generally use CeedOperatorLinearAssembleSymbolic()
429 
430   @param[in]  op     CeedOperator to assemble nonzero pattern
431   @param[in]  offset Offset for number of entries
432   @param[out] rows   Row number for each entry
433   @param[out] cols   Column number for each entry
434 
435   @return An error code: 0 - success, otherwise - failure
436 
437   @ref Developer
438 **/
439 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
440   Ceed ceed;
441   bool is_composite;
442   CeedCall(CeedOperatorGetCeed(op, &ceed));
443   CeedCall(CeedOperatorIsComposite(op, &is_composite));
444 
445   if (is_composite) {
446     // LCOV_EXCL_START
447     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
448     // LCOV_EXCL_STOP
449   }
450 
451   CeedSize num_nodes;
452   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
453   CeedElemRestriction rstr_in;
454   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
455   CeedInt num_elem, elem_size, num_comp;
456   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
457   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
458   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
459   CeedInt layout_er[3];
460   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
461 
462   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
463 
464   // Determine elem_dof relation
465   CeedVector index_vec;
466   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
467   CeedScalar *array;
468   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
469   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
470   CeedCall(CeedVectorRestoreArray(index_vec, &array));
471   CeedVector elem_dof;
472   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
473   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
474   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
475   const CeedScalar *elem_dof_a;
476   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
477   CeedCall(CeedVectorDestroy(&index_vec));
478 
479   // Determine i, j locations for element matrices
480   CeedInt count = 0;
481   for (CeedInt e = 0; e < num_elem; e++) {
482     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
483       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
484         for (CeedInt i = 0; i < elem_size; i++) {
485           for (CeedInt j = 0; j < elem_size; j++) {
486             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
487             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
488 
489             const CeedInt row = elem_dof_a[elem_dof_index_row];
490             const CeedInt col = elem_dof_a[elem_dof_index_col];
491 
492             rows[offset + count] = row;
493             cols[offset + count] = col;
494             count++;
495           }
496         }
497       }
498     }
499   }
500   if (count != local_num_entries) {
501     // LCOV_EXCL_START
502     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
503     // LCOV_EXCL_STOP
504   }
505   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
506   CeedCall(CeedVectorDestroy(&elem_dof));
507 
508   return CEED_ERROR_SUCCESS;
509 }
510 
511 /**
512   @brief Assemble nonzero entries for non-composite operator
513 
514   Users should generally use CeedOperatorLinearAssemble()
515 
516   @param[in]  op     CeedOperator to assemble
517   @param[in]  offset Offset for number of entries
518   @param[out] values Values to assemble into matrix
519 
520   @return An error code: 0 - success, otherwise - failure
521 
522   @ref Developer
523 **/
524 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
525   Ceed ceed;
526   bool is_composite;
527   CeedCall(CeedOperatorGetCeed(op, &ceed));
528   CeedCall(CeedOperatorIsComposite(op, &is_composite));
529 
530   if (is_composite) {
531     // LCOV_EXCL_START
532     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
533     // LCOV_EXCL_STOP
534   }
535 
536   // Early exit for empty operator
537   {
538     CeedInt num_elem = 0;
539 
540     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
541     if (num_elem == 0) return CEED_ERROR_SUCCESS;
542   }
543 
544   if (op->LinearAssembleSingle) {
545     // Backend version
546     CeedCall(op->LinearAssembleSingle(op, offset, values));
547     return CEED_ERROR_SUCCESS;
548   } else {
549     // Operator fallback
550     CeedOperator op_fallback;
551 
552     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
553     if (op_fallback) {
554       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
555       return CEED_ERROR_SUCCESS;
556     }
557   }
558 
559   // Assemble QFunction
560   CeedQFunction qf;
561   CeedCall(CeedOperatorGetQFunction(op, &qf));
562   CeedVector          assembled_qf;
563   CeedElemRestriction rstr_q;
564   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
565   CeedSize qf_length;
566   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
567 
568   CeedInt            num_input_fields, num_output_fields;
569   CeedOperatorField *input_fields;
570   CeedOperatorField *output_fields;
571   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
572 
573   // Get assembly data
574   CeedOperatorAssemblyData data;
575   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
576   const CeedEvalMode **eval_modes_in, **eval_modes_out;
577   CeedInt             *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
578   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out,
579                                                 &eval_modes_out, NULL, NULL));
580   CeedBasis *bases;
581   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL));
582   CeedBasis basis_in = bases[0];
583 
584   if (num_active_bases > 1) {
585     // LCOV_EXCL_START
586     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases");
587     // LCOV_EXCL_STOP
588   }
589   if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) {
590     // LCOV_EXCL_START
591     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
592     // LCOV_EXCL_STOP
593   }
594 
595   CeedElemRestriction active_rstr;
596   CeedInt             num_elem, elem_size, num_qpts, num_comp;
597   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
598   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
599   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
600   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
601   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
602 
603   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
604 
605   // loop over elements and put in data structure
606   const CeedScalar *interp_in, *grad_in;
607   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
608   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
609 
610   const CeedScalar *assembled_qf_array;
611   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
612 
613   CeedInt layout_qf[3];
614   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
615   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
616 
617   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
618   const CeedScalar **B_mats_in, **B_mats_out;
619   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out));
620   const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0];
621   CeedScalar        BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]];
622   CeedScalar        elem_mat[elem_size * elem_size];
623   CeedInt           count = 0;
624   CeedScalar       *vals;
625   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
626   for (CeedInt e = 0; e < num_elem; e++) {
627     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
628       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
629         // Compute B^T*D
630         for (CeedInt n = 0; n < elem_size; n++) {
631           for (CeedInt q = 0; q < num_qpts; q++) {
632             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
633               const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in);
634               CeedScalar    sum       = 0.0;
635               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
636                 const CeedInt b_out_index     = (num_eval_modes_out[0] * q + e_out) * elem_size + n;
637                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out;
638                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
639                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
640               }
641               BTD_mat[btd_index] = sum;
642             }
643           }
644         }
645         // form element matrix itself (for each block component)
646         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0]));
647 
648         // put element matrix in coordinate data structure
649         for (CeedInt i = 0; i < elem_size; i++) {
650           for (CeedInt j = 0; j < elem_size; j++) {
651             vals[offset + count] = elem_mat[i * elem_size + j];
652             count++;
653           }
654         }
655       }
656     }
657   }
658   if (count != local_num_entries) {
659     // LCOV_EXCL_START
660     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
661     // LCOV_EXCL_STOP
662   }
663   CeedCall(CeedVectorRestoreArray(values, &vals));
664 
665   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
666   CeedCall(CeedVectorDestroy(&assembled_qf));
667 
668   return CEED_ERROR_SUCCESS;
669 }
670 
671 /**
672   @brief Count number of entries for assembled CeedOperator
673 
674   @param[in]  op          CeedOperator to assemble
675   @param[out] num_entries Number of entries in assembled representation
676 
677   @return An error code: 0 - success, otherwise - failure
678 
679   @ref Utility
680 **/
681 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
682   bool                is_composite;
683   CeedElemRestriction rstr;
684   CeedInt             num_elem, elem_size, num_comp;
685 
686   CeedCall(CeedOperatorIsComposite(op, &is_composite));
687   if (is_composite) {
688     // LCOV_EXCL_START
689     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
690     // LCOV_EXCL_STOP
691   }
692   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
693   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
694   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
695   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
696   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
697 
698   return CEED_ERROR_SUCCESS;
699 }
700 
701 /**
702   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
703 
704   @param[in]  op_fine      Fine grid operator
705   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
706   @param[in]  rstr_coarse  Coarse grid restriction
707   @param[in]  basis_coarse Coarse grid active vector basis
708   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
709   @param[out] op_coarse    Coarse grid operator
710   @param[out] op_prolong   Coarse to fine operator, or NULL
711   @param[out] op_restrict  Fine to coarse operator, or NULL
712 
713   @return An error code: 0 - success, otherwise - failure
714 
715   @ref Developer
716 **/
717 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
718                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
719   Ceed       ceed;
720   CeedVector mult_vec = NULL;
721   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
722 
723   // Check for composite operator
724   bool is_composite;
725   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
726   if (is_composite) {
727     // LCOV_EXCL_START
728     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
729     // LCOV_EXCL_STOP
730   }
731 
732   // Coarse Grid
733   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
734   CeedElemRestriction rstr_fine = NULL;
735   // -- Clone input fields
736   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
737     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
738       rstr_fine = op_fine->input_fields[i]->elem_rstr;
739       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
740     } else {
741       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
742                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
743     }
744   }
745   // -- Clone output fields
746   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
747     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
748       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
749     } else {
750       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
751                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
752     }
753   }
754   // -- Clone QFunctionAssemblyData
755   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
756 
757   // Multiplicity vector
758   if (op_restrict || op_prolong) {
759     CeedVector mult_e_vec;
760 
761     if (!p_mult_fine) {
762       // LCOV_EXCL_START
763       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
764       // LCOV_EXCL_STOP
765     }
766     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
767     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
768     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
769     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
770     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
771     CeedCall(CeedVectorDestroy(&mult_e_vec));
772     CeedCall(CeedVectorReciprocal(mult_vec));
773   }
774 
775   // Clone name
776   bool   has_name = op_fine->name;
777   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
778   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
779 
780   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
781   if ((op_restrict || op_prolong) && !basis_c_to_f) {
782     // LCOV_EXCL_START
783     return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis");
784     // LCOV_EXCL_STOP
785   }
786 
787   // Restriction/Prolongation Operators
788   CeedInt num_comp;
789   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
790 
791   // Restriction
792   if (op_restrict) {
793     CeedInt             *num_comp_r_data;
794     CeedQFunction        qf_restrict;
795     CeedQFunctionContext ctx_r;
796 
797     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
798     CeedCall(CeedCalloc(1, &num_comp_r_data));
799     num_comp_r_data[0] = num_comp;
800     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
801     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
802     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
803     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
804     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
805     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
806     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
807     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
808 
809     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
810     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
811     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
812     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
813 
814     // Set name
815     char *restriction_name;
816     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
817     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
818     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
819     CeedCall(CeedFree(&restriction_name));
820 
821     // Check
822     CeedCall(CeedOperatorCheckReady(*op_restrict));
823 
824     // Cleanup
825     CeedCall(CeedQFunctionDestroy(&qf_restrict));
826   }
827 
828   // Prolongation
829   if (op_prolong) {
830     CeedInt             *num_comp_p_data;
831     CeedQFunction        qf_prolong;
832     CeedQFunctionContext ctx_p;
833 
834     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
835     CeedCall(CeedCalloc(1, &num_comp_p_data));
836     num_comp_p_data[0] = num_comp;
837     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
838     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
839     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
840     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
841     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
842     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
843     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
844     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
845 
846     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
847     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
848     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
849     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
850 
851     // Set name
852     char *prolongation_name;
853     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
854     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
855     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
856     CeedCall(CeedFree(&prolongation_name));
857 
858     // Check
859     CeedCall(CeedOperatorCheckReady(*op_prolong));
860 
861     // Cleanup
862     CeedCall(CeedQFunctionDestroy(&qf_prolong));
863   }
864 
865   // Check
866   CeedCall(CeedOperatorCheckReady(*op_coarse));
867 
868   // Cleanup
869   CeedCall(CeedVectorDestroy(&mult_vec));
870   CeedCall(CeedBasisDestroy(&basis_c_to_f));
871 
872   return CEED_ERROR_SUCCESS;
873 }
874 
875 /**
876   @brief Build 1D mass matrix and Laplacian with perturbation
877 
878   @param[in]  interp_1d   Interpolation matrix in one dimension
879   @param[in]  grad_1d     Gradient matrix in one dimension
880   @param[in]  q_weight_1d Quadrature weights in one dimension
881   @param[in]  P_1d        Number of basis nodes in one dimension
882   @param[in]  Q_1d        Number of quadrature points in one dimension
883   @param[in]  dim         Dimension of basis
884   @param[out] mass        Assembled mass matrix in one dimension
885   @param[out] laplace     Assembled perturbed Laplacian in one dimension
886 
887   @return An error code: 0 - success, otherwise - failure
888 
889   @ref Developer
890 **/
891 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d,
892                                                       CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
893   for (CeedInt i = 0; i < P_1d; i++) {
894     for (CeedInt j = 0; j < P_1d; j++) {
895       CeedScalar sum = 0.0;
896       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
897       mass[i + j * P_1d] = sum;
898     }
899   }
900   // -- Laplacian
901   for (CeedInt i = 0; i < P_1d; i++) {
902     for (CeedInt j = 0; j < P_1d; j++) {
903       CeedScalar sum = 0.0;
904       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
905       laplace[i + j * P_1d] = sum;
906     }
907   }
908   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
909   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
910   return CEED_ERROR_SUCCESS;
911 }
912 CeedPragmaOptimizeOn;
913 
914 /// @}
915 
916 /// ----------------------------------------------------------------------------
917 /// CeedOperator Backend API
918 /// ----------------------------------------------------------------------------
919 /// @addtogroup CeedOperatorBackend
920 /// @{
921 
922 /**
923   @brief Create object holding CeedQFunction assembly data for CeedOperator
924 
925   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
926   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
927 
928   @return An error code: 0 - success, otherwise - failure
929 
930   @ref Backend
931 **/
932 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
933   CeedCall(CeedCalloc(1, data));
934   (*data)->ref_count = 1;
935   (*data)->ceed      = ceed;
936   CeedCall(CeedReference(ceed));
937 
938   return CEED_ERROR_SUCCESS;
939 }
940 
941 /**
942   @brief Increment the reference counter for a CeedQFunctionAssemblyData
943 
944   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
945 
946   @return An error code: 0 - success, otherwise - failure
947 
948   @ref Backend
949 **/
950 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
951   data->ref_count++;
952   return CEED_ERROR_SUCCESS;
953 }
954 
955 /**
956   @brief Set re-use of CeedQFunctionAssemblyData
957 
958   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
959   @param[in]     reuse_data Boolean flag indicating data re-use
960 
961   @return An error code: 0 - success, otherwise - failure
962 
963   @ref Backend
964 **/
965 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
966   data->reuse_data        = reuse_data;
967   data->needs_data_update = true;
968   return CEED_ERROR_SUCCESS;
969 }
970 
971 /**
972   @brief Mark QFunctionAssemblyData as stale
973 
974   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
975   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
976 
977   @return An error code: 0 - success, otherwise - failure
978 
979   @ref Backend
980 **/
981 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
982   data->needs_data_update = needs_data_update;
983   return CEED_ERROR_SUCCESS;
984 }
985 
986 /**
987   @brief Determine if QFunctionAssemblyData needs update
988 
989   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
990   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
991 
992   @return An error code: 0 - success, otherwise - failure
993 
994   @ref Backend
995 **/
996 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
997   *is_update_needed = !data->reuse_data || data->needs_data_update;
998   return CEED_ERROR_SUCCESS;
999 }
1000 
1001 /**
1002   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1003            Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1004 
1005            Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
1006              CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
1007              CeedQFunctionAssemblyData.
1008 
1009   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1010   @param[in,out] data_copy Variable to store copied reference
1011 
1012   @return An error code: 0 - success, otherwise - failure
1013 
1014   @ref Backend
1015 **/
1016 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1017   CeedCall(CeedQFunctionAssemblyDataReference(data));
1018   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1019   *data_copy = data;
1020   return CEED_ERROR_SUCCESS;
1021 }
1022 
1023 /**
1024   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1025 
1026   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1027   @param[out] is_setup Boolean flag for setup status
1028 
1029   @return An error code: 0 - success, otherwise - failure
1030 
1031   @ref Backend
1032 **/
1033 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1034   *is_setup = data->is_setup;
1035   return CEED_ERROR_SUCCESS;
1036 }
1037 
1038 /**
1039   @brief Set internal objects for CeedQFunctionAssemblyData
1040 
1041   @param[in,out] data CeedQFunctionAssemblyData to set objects
1042   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1043   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1044 
1045   @return An error code: 0 - success, otherwise - failure
1046 
1047   @ref Backend
1048 **/
1049 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1050   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1051   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1052 
1053   data->is_setup = true;
1054   return CEED_ERROR_SUCCESS;
1055 }
1056 
1057 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1058   if (!data->is_setup) {
1059     // LCOV_EXCL_START
1060     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1061     // LCOV_EXCL_STOP
1062   }
1063 
1064   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1065   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1066 
1067   return CEED_ERROR_SUCCESS;
1068 }
1069 
1070 /**
1071   @brief Destroy CeedQFunctionAssemblyData
1072 
1073   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1074 
1075   @return An error code: 0 - success, otherwise - failure
1076 
1077   @ref Backend
1078 **/
1079 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1080   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1081 
1082   CeedCall(CeedDestroy(&(*data)->ceed));
1083   CeedCall(CeedVectorDestroy(&(*data)->vec));
1084   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1085 
1086   CeedCall(CeedFree(data));
1087   return CEED_ERROR_SUCCESS;
1088 }
1089 
1090 /**
1091   @brief Get CeedOperatorAssemblyData
1092 
1093   @param[in]  op   CeedOperator to assemble
1094   @param[out] data CeedQFunctionAssemblyData
1095 
1096   @return An error code: 0 - success, otherwise - failure
1097 
1098   @ref Backend
1099 **/
1100 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1101   if (!op->op_assembled) {
1102     CeedOperatorAssemblyData data;
1103 
1104     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1105     op->op_assembled = data;
1106   }
1107   *data = op->op_assembled;
1108 
1109   return CEED_ERROR_SUCCESS;
1110 }
1111 
1112 /**
1113   @brief Create object holding CeedOperator assembly data.
1114 
1115     The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1116     An array with references to the corresponding active CeedElemRestrictions is also stored.
1117     For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1118     The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1119       CeedEvalMode.
1120     The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1121     Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1122 
1123   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1124   @param[in]  op   CeedOperator to be assembled
1125   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1126 
1127   @return An error code: 0 - success, otherwise - failure
1128 
1129   @ref Backend
1130 **/
1131 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1132   CeedInt num_active_bases = 0;
1133 
1134   // Allocate
1135   CeedCall(CeedCalloc(1, data));
1136   (*data)->ceed = ceed;
1137   CeedCall(CeedReference(ceed));
1138 
1139   // Build OperatorAssembly data
1140   CeedQFunction       qf;
1141   CeedQFunctionField *qf_fields;
1142   CeedOperatorField  *op_fields;
1143   CeedInt             num_input_fields;
1144   CeedCall(CeedOperatorGetQFunction(op, &qf));
1145   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1146   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1147 
1148   // Determine active input basis
1149   CeedInt       *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0;
1150   CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL;
1151   CeedSize     **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1152 
1153   for (CeedInt i = 0; i < num_input_fields; i++) {
1154     CeedVector vec;
1155 
1156     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1157     if (vec == CEED_VECTOR_ACTIVE) {
1158       CeedInt      index = -1, dim = 1, num_components;
1159       CeedBasis    basis_in = NULL;
1160       CeedEvalMode eval_mode;
1161 
1162       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1163       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1164       CeedCall(CeedBasisGetNumComponents(basis_in, &num_components));
1165       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1166       for (CeedInt i = 0; i < num_active_bases; i++) {
1167         if ((*data)->active_bases[i] == basis_in) index = i;
1168       }
1169       if (index == -1) {
1170         CeedElemRestriction elem_rstr_in;
1171 
1172         index = num_active_bases;
1173         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1174         (*data)->active_bases[num_active_bases] = NULL;
1175         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases]));
1176         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1177         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1178         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1179         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases]));
1180         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1181         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1182         num_eval_modes_in[index]  = 0;
1183         num_eval_modes_out[index] = 0;
1184         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1185         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1186         eval_modes_in[index]  = NULL;
1187         eval_modes_out[index] = NULL;
1188         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1189         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1190         eval_mode_offsets_in[index]  = NULL;
1191         eval_mode_offsets_out[index] = NULL;
1192         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1193         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1194         (*data)->assembled_bases_in[index]  = NULL;
1195         (*data)->assembled_bases_out[index] = NULL;
1196         num_active_bases++;
1197       }
1198       switch (eval_mode) {
1199         case CEED_EVAL_NONE:
1200         case CEED_EVAL_INTERP:
1201           CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_modes_in[index]));
1202           CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_mode_offsets_in[index]));
1203           eval_modes_in[index][num_eval_modes_in[index]]        = eval_mode;
1204           eval_mode_offsets_in[index][num_eval_modes_in[index]] = offset;
1205           offset += num_components;
1206           num_eval_modes_in[index] += 1;
1207           break;
1208         case CEED_EVAL_GRAD:
1209           CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_modes_in[index]));
1210           CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_mode_offsets_in[index]));
1211           for (CeedInt d = 0; d < dim; d++) {
1212             eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1213             eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1214             offset += num_components;
1215           }
1216           num_eval_modes_in[index] += dim;
1217           break;
1218         case CEED_EVAL_WEIGHT:
1219         case CEED_EVAL_DIV:
1220         case CEED_EVAL_CURL:
1221           break;  // Caught by QF Assembly
1222       }
1223     }
1224   }
1225   (*data)->num_eval_modes_in    = num_eval_modes_in;
1226   (*data)->eval_modes_in        = eval_modes_in;
1227   (*data)->eval_mode_offsets_in = eval_mode_offsets_in;
1228 
1229   // Determine active output basis
1230   CeedInt num_output_fields;
1231 
1232   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1233   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1234   offset = 0;
1235   for (CeedInt i = 0; i < num_output_fields; i++) {
1236     CeedVector vec;
1237 
1238     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1239     if (vec == CEED_VECTOR_ACTIVE) {
1240       CeedInt      index = -1, dim = 1, num_components;
1241       CeedBasis    basis_out = NULL;
1242       CeedEvalMode eval_mode;
1243 
1244       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1245       CeedCall(CeedBasisGetDimension(basis_out, &dim));
1246       CeedCall(CeedBasisGetNumComponents(basis_out, &num_components));
1247       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1248       for (CeedInt i = 0; i < num_active_bases; i++) {
1249         if ((*data)->active_bases[i] == basis_out) index = i;
1250       }
1251       if (index == -1) {
1252         CeedElemRestriction elem_rstr_out;
1253 
1254         index = num_active_bases;
1255         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1256         (*data)->active_bases[num_active_bases] = NULL;
1257         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases]));
1258         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1259         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1260         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1261         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases]));
1262         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1263         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1264         num_eval_modes_in[index]  = 0;
1265         num_eval_modes_out[index] = 0;
1266         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1267         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1268         eval_modes_in[index]  = NULL;
1269         eval_modes_out[index] = NULL;
1270         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1271         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1272         eval_mode_offsets_in[index]  = NULL;
1273         eval_mode_offsets_out[index] = NULL;
1274         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1275         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1276         (*data)->assembled_bases_in[index]  = NULL;
1277         (*data)->assembled_bases_out[index] = NULL;
1278         num_active_bases++;
1279       }
1280       switch (eval_mode) {
1281         case CEED_EVAL_NONE:
1282         case CEED_EVAL_INTERP:
1283           CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_modes_out[index]));
1284           CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_mode_offsets_out[index]));
1285           eval_modes_out[index][num_eval_modes_out[index]]        = eval_mode;
1286           eval_mode_offsets_out[index][num_eval_modes_out[index]] = offset;
1287           offset += num_components;
1288           num_eval_modes_out[index] += 1;
1289           break;
1290         case CEED_EVAL_GRAD:
1291           CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_modes_out[index]));
1292           CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_mode_offsets_out[index]));
1293           for (CeedInt d = 0; d < dim; d++) {
1294             eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1295             eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1296             offset += num_components;
1297           }
1298           num_eval_modes_out[index] += dim;
1299           break;
1300         case CEED_EVAL_WEIGHT:
1301         case CEED_EVAL_DIV:
1302         case CEED_EVAL_CURL:
1303           break;  // Caught by QF Assembly
1304       }
1305     }
1306   }
1307   (*data)->num_output_components = offset;
1308   (*data)->num_eval_modes_out    = num_eval_modes_out;
1309   (*data)->eval_modes_out        = eval_modes_out;
1310   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1311   (*data)->num_active_bases      = num_active_bases;
1312 
1313   return CEED_ERROR_SUCCESS;
1314 }
1315 
1316 /**
1317   @brief Get CeedOperator CeedEvalModes for assembly.
1318 
1319     Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1320 
1321   @param[in]  data                  CeedOperatorAssemblyData
1322   @param[out] num_active_bases      Total number of active bases
1323   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1324                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1325   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1326   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1327   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1328   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1329   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1330   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1331                                       including contributions of all active bases
1332 
1333   @return An error code: 0 - success, otherwise - failure
1334 
1335 
1336   @ref Backend
1337 **/
1338 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in,
1339                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out,
1340                                          const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) {
1341   if (num_active_bases) *num_active_bases = data->num_active_bases;
1342   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1343   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1344   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1345   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1346   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1347   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1348   if (num_output_components) *num_output_components = data->num_output_components;
1349 
1350   return CEED_ERROR_SUCCESS;
1351 }
1352 
1353 /**
1354   @brief Get CeedOperator CeedBasis data for assembly.
1355 
1356     Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1357 
1358   @param[in]  data                CeedOperatorAssemblyData
1359   @param[out] num_active_bases    Number of active bases, or NULL
1360   @param[out] active_bases        Pointer to hold active CeedBasis, or NULL
1361   @param[out] assembled_bases_in  Pointer to hold assembled active input B, or NULL
1362   @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL
1363 
1364   @return An error code: 0 - success, otherwise - failure
1365 
1366   @ref Backend
1367 **/
1368 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases,
1369                                      const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) {
1370   // Assemble B_in, B_out if needed
1371   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1372     CeedInt num_qpts;
1373 
1374     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1375     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1376       CeedInt           elem_size;
1377       CeedScalar       *B_in = NULL, *identity = NULL;
1378       const CeedScalar *interp_in, *grad_in;
1379       bool              has_eval_none = false;
1380 
1381       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size));
1382       CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_in[b], &B_in));
1383 
1384       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1385         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1386       }
1387       if (has_eval_none) {
1388         CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1389         for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1390           identity[i * elem_size + i] = 1.0;
1391         }
1392       }
1393       CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_in));
1394       CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_in));
1395 
1396       for (CeedInt q = 0; q < num_qpts; q++) {
1397         for (CeedInt n = 0; n < elem_size; n++) {
1398           CeedInt d_in = -1;
1399           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1400             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1401             const CeedScalar *B  = NULL;
1402 
1403             if (data->eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in++;
1404             CeedOperatorGetBasisPointer(data->eval_modes_in[b][e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &B);
1405             B_in[(qq + e_in) * elem_size + n] = B[q * elem_size + n];
1406           }
1407         }
1408       }
1409       if (identity) CeedCall(CeedFree(identity));
1410       data->assembled_bases_in[b] = B_in;
1411     }
1412   }
1413 
1414   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1415     CeedInt num_qpts;
1416 
1417     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1418     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1419       CeedInt           elem_size;
1420       const CeedScalar *interp_out, *grad_out;
1421       bool              has_eval_none = false;
1422       CeedScalar       *B_out = NULL, *identity = NULL;
1423 
1424       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size));
1425       CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_out[b], &B_out));
1426 
1427       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1428         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1429       }
1430       if (has_eval_none) {
1431         CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1432         for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1433           identity[i * elem_size + i] = 1.0;
1434         }
1435       }
1436       CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_out));
1437       CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_out));
1438 
1439       for (CeedInt q = 0; q < num_qpts; q++) {
1440         for (CeedInt n = 0; n < elem_size; n++) {
1441           CeedInt d_out = -1;
1442           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1443             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1444             const CeedScalar *B  = NULL;
1445 
1446             if (data->eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out++;
1447             CeedOperatorGetBasisPointer(data->eval_modes_out[b][e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &B);
1448             B_out[(qq + e_out) * elem_size + n] = B[q * elem_size + n];
1449           }
1450         }
1451       }
1452       if (identity) CeedCall(CeedFree(identity));
1453       data->assembled_bases_out[b] = B_out;
1454     }
1455   }
1456 
1457   // Pass out assembled data
1458   if (active_bases) *active_bases = data->active_bases;
1459   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1460   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1461 
1462   return CEED_ERROR_SUCCESS;
1463 }
1464 
1465 /**
1466   @brief Get CeedOperator CeedBasis data for assembly.
1467 
1468   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1469 
1470   @param[in]  data                  CeedOperatorAssemblyData
1471   @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL
1472   @param[out] active_elem_rstrs     Pointer to hold active CeedElemRestrictions, or NULL
1473 
1474   @return An error code: 0 - success, otherwise - failure
1475 
1476   @ref Backend
1477 **/
1478 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs,
1479                                                 CeedElemRestriction **active_elem_rstrs) {
1480   if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases;
1481   if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs;
1482 
1483   return CEED_ERROR_SUCCESS;
1484 }
1485 
1486 /**
1487   @brief Destroy CeedOperatorAssemblyData
1488 
1489   @param[in,out] data CeedOperatorAssemblyData to destroy
1490 
1491   @return An error code: 0 - success, otherwise - failure
1492 
1493   @ref Backend
1494 **/
1495 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1496   if (!*data) return CEED_ERROR_SUCCESS;
1497 
1498   CeedCall(CeedDestroy(&(*data)->ceed));
1499   for (CeedInt b = 0; b < (*data)->num_active_bases; b++) {
1500     CeedCall(CeedBasisDestroy(&(*data)->active_bases[b]));
1501     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b]));
1502     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1503     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1504     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1505     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1506     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1507     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1508   }
1509   CeedCall(CeedFree(&(*data)->active_bases));
1510   CeedCall(CeedFree(&(*data)->active_elem_rstrs));
1511   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1512   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1513   CeedCall(CeedFree(&(*data)->eval_modes_in));
1514   CeedCall(CeedFree(&(*data)->eval_modes_out));
1515   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1516   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1517   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1518   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1519 
1520   CeedCall(CeedFree(data));
1521   return CEED_ERROR_SUCCESS;
1522 }
1523 
1524 /// @}
1525 
1526 /// ----------------------------------------------------------------------------
1527 /// CeedOperator Public API
1528 /// ----------------------------------------------------------------------------
1529 /// @addtogroup CeedOperatorUser
1530 /// @{
1531 
1532 /**
1533   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1534 
1535   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1536     The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices
1537 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the
1538 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1539 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1540 and producing the output [dv_0, dv_1, v].
1541 
1542   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1543 
1544   @param[in]  op        CeedOperator to assemble CeedQFunction
1545   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1546   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1547   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1548 
1549   @return An error code: 0 - success, otherwise - failure
1550 
1551   @ref User
1552 **/
1553 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1554   CeedCall(CeedOperatorCheckReady(op));
1555 
1556   if (op->LinearAssembleQFunction) {
1557     // Backend version
1558     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1559   } else {
1560     // Operator fallback
1561     CeedOperator op_fallback;
1562 
1563     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1564     if (op_fallback) {
1565       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1566     } else {
1567       // LCOV_EXCL_START
1568       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1569       // LCOV_EXCL_STOP
1570     }
1571   }
1572   return CEED_ERROR_SUCCESS;
1573 }
1574 
1575 /**
1576   @brief Assemble CeedQFunction and store result internally.
1577            Return copied references of stored data to the caller.
1578            Caller is responsible for ownership and destruction of the copied references.
1579            See also @ref CeedOperatorLinearAssembleQFunction
1580 
1581   @param[in]  op        CeedOperator to assemble CeedQFunction
1582   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1583   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1584   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1585 
1586   @return An error code: 0 - success, otherwise - failure
1587 
1588   @ref User
1589 **/
1590 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1591   CeedCall(CeedOperatorCheckReady(op));
1592 
1593   if (op->LinearAssembleQFunctionUpdate) {
1594     // Backend version
1595     bool                qf_assembled_is_setup;
1596     CeedVector          assembled_vec  = NULL;
1597     CeedElemRestriction assembled_rstr = NULL;
1598 
1599     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1600     if (qf_assembled_is_setup) {
1601       bool update_needed;
1602 
1603       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1604       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1605       if (update_needed) {
1606         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1607       }
1608     } else {
1609       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1610       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1611     }
1612     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1613 
1614     // Copy reference from internally held copy
1615     *assembled = NULL;
1616     *rstr      = NULL;
1617     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1618     CeedCall(CeedVectorDestroy(&assembled_vec));
1619     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1620     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1621   } else {
1622     // Operator fallback
1623     CeedOperator op_fallback;
1624 
1625     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1626     if (op_fallback) {
1627       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1628     } else {
1629       // LCOV_EXCL_START
1630       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1631       // LCOV_EXCL_STOP
1632     }
1633   }
1634 
1635   return CEED_ERROR_SUCCESS;
1636 }
1637 
1638 /**
1639   @brief Assemble the diagonal of a square linear CeedOperator
1640 
1641   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1642 
1643   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1644 
1645   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1646 
1647   @param[in]  op        CeedOperator to assemble CeedQFunction
1648   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1649   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1650 
1651   @return An error code: 0 - success, otherwise - failure
1652 
1653   @ref User
1654 **/
1655 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1656   bool is_composite;
1657   CeedCall(CeedOperatorCheckReady(op));
1658   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1659 
1660   CeedSize input_size = 0, output_size = 0;
1661   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1662   if (input_size != output_size) {
1663     // LCOV_EXCL_START
1664     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1665     // LCOV_EXCL_STOP
1666   }
1667 
1668   // Early exit for empty operator
1669   if (!is_composite) {
1670     CeedInt num_elem = 0;
1671 
1672     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1673     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1674   }
1675 
1676   if (op->LinearAssembleDiagonal) {
1677     // Backend version
1678     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1679     return CEED_ERROR_SUCCESS;
1680   } else if (op->LinearAssembleAddDiagonal) {
1681     // Backend version with zeroing first
1682     CeedCall(CeedVectorSetValue(assembled, 0.0));
1683     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1684     return CEED_ERROR_SUCCESS;
1685   } else {
1686     // Operator fallback
1687     CeedOperator op_fallback;
1688 
1689     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1690     if (op_fallback) {
1691       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1692       return CEED_ERROR_SUCCESS;
1693     }
1694   }
1695   // Default interface implementation
1696   CeedCall(CeedVectorSetValue(assembled, 0.0));
1697   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1698 
1699   return CEED_ERROR_SUCCESS;
1700 }
1701 
1702 /**
1703   @brief Assemble the diagonal of a square linear CeedOperator
1704 
1705   This sums into a CeedVector the diagonal of a linear CeedOperator.
1706 
1707   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1708 
1709   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1710 
1711   @param[in]  op        CeedOperator to assemble CeedQFunction
1712   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1713   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1714 
1715   @return An error code: 0 - success, otherwise - failure
1716 
1717   @ref User
1718 **/
1719 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1720   bool is_composite;
1721   CeedCall(CeedOperatorCheckReady(op));
1722   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1723 
1724   CeedSize input_size = 0, output_size = 0;
1725   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1726   if (input_size != output_size) {
1727     // LCOV_EXCL_START
1728     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1729     // LCOV_EXCL_STOP
1730   }
1731 
1732   // Early exit for empty operator
1733   if (!is_composite) {
1734     CeedInt num_elem = 0;
1735 
1736     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1737     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1738   }
1739 
1740   if (op->LinearAssembleAddDiagonal) {
1741     // Backend version
1742     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1743     return CEED_ERROR_SUCCESS;
1744   } else {
1745     // Operator fallback
1746     CeedOperator op_fallback;
1747 
1748     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1749     if (op_fallback) {
1750       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1751       return CEED_ERROR_SUCCESS;
1752     }
1753   }
1754   // Default interface implementation
1755   if (is_composite) {
1756     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1757   } else {
1758     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1759   }
1760 
1761   return CEED_ERROR_SUCCESS;
1762 }
1763 
1764 /**
1765   @brief Assemble the point block diagonal of a square linear CeedOperator
1766 
1767   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1768 
1769   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1770 
1771   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1772 
1773   @param[in]  op        CeedOperator to assemble CeedQFunction
1774   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1775 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1776 component in].
1777   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1778 
1779   @return An error code: 0 - success, otherwise - failure
1780 
1781   @ref User
1782 **/
1783 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1784   bool is_composite;
1785   CeedCall(CeedOperatorCheckReady(op));
1786   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1787 
1788   CeedSize input_size = 0, output_size = 0;
1789   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1790   if (input_size != output_size) {
1791     // LCOV_EXCL_START
1792     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1793     // LCOV_EXCL_STOP
1794   }
1795 
1796   // Early exit for empty operator
1797   if (!is_composite) {
1798     CeedInt num_elem = 0;
1799 
1800     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1801     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1802   }
1803 
1804   if (op->LinearAssemblePointBlockDiagonal) {
1805     // Backend version
1806     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1807     return CEED_ERROR_SUCCESS;
1808   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1809     // Backend version with zeroing first
1810     CeedCall(CeedVectorSetValue(assembled, 0.0));
1811     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1812     return CEED_ERROR_SUCCESS;
1813   } else {
1814     // Operator fallback
1815     CeedOperator op_fallback;
1816 
1817     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1818     if (op_fallback) {
1819       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1820       return CEED_ERROR_SUCCESS;
1821     }
1822   }
1823   // Default interface implementation
1824   CeedCall(CeedVectorSetValue(assembled, 0.0));
1825   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1826 
1827   return CEED_ERROR_SUCCESS;
1828 }
1829 
1830 /**
1831   @brief Assemble the point block diagonal of a square linear CeedOperator
1832 
1833   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1834 
1835   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1836 
1837   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1838 
1839   @param[in]  op        CeedOperator to assemble CeedQFunction
1840   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1841 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1842 component in].
1843   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1844 
1845   @return An error code: 0 - success, otherwise - failure
1846 
1847   @ref User
1848 **/
1849 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1850   bool is_composite;
1851   CeedCall(CeedOperatorCheckReady(op));
1852   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1853 
1854   CeedSize input_size = 0, output_size = 0;
1855   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1856   if (input_size != output_size) {
1857     // LCOV_EXCL_START
1858     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1859     // LCOV_EXCL_STOP
1860   }
1861 
1862   // Early exit for empty operator
1863   if (!is_composite) {
1864     CeedInt num_elem = 0;
1865 
1866     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1867     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1868   }
1869 
1870   if (op->LinearAssembleAddPointBlockDiagonal) {
1871     // Backend version
1872     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1873     return CEED_ERROR_SUCCESS;
1874   } else {
1875     // Operator fallback
1876     CeedOperator op_fallback;
1877 
1878     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1879     if (op_fallback) {
1880       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1881       return CEED_ERROR_SUCCESS;
1882     }
1883   }
1884   // Default interface implementation
1885   if (is_composite) {
1886     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1887   } else {
1888     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1889   }
1890 
1891   return CEED_ERROR_SUCCESS;
1892 }
1893 
1894 /**
1895    @brief Fully assemble the nonzero pattern of a linear operator.
1896 
1897    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1898 
1899    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1900 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1901 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1902 
1903    This will generally be slow unless your operator is low-order.
1904 
1905    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1906 
1907    @param[in]  op          CeedOperator to assemble
1908    @param[out] num_entries Number of entries in coordinate nonzero pattern
1909    @param[out] rows        Row number for each entry
1910    @param[out] cols        Column number for each entry
1911 
1912    @ref User
1913 **/
1914 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1915   CeedInt       num_suboperators, single_entries;
1916   CeedOperator *sub_operators;
1917   bool          is_composite;
1918   CeedCall(CeedOperatorCheckReady(op));
1919   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1920 
1921   if (op->LinearAssembleSymbolic) {
1922     // Backend version
1923     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1924     return CEED_ERROR_SUCCESS;
1925   } else {
1926     // Operator fallback
1927     CeedOperator op_fallback;
1928 
1929     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1930     if (op_fallback) {
1931       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1932       return CEED_ERROR_SUCCESS;
1933     }
1934   }
1935 
1936   // Default interface implementation
1937 
1938   // count entries and allocate rows, cols arrays
1939   *num_entries = 0;
1940   if (is_composite) {
1941     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1942     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1943     for (CeedInt k = 0; k < num_suboperators; ++k) {
1944       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1945       *num_entries += single_entries;
1946     }
1947   } else {
1948     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1949     *num_entries += single_entries;
1950   }
1951   CeedCall(CeedCalloc(*num_entries, rows));
1952   CeedCall(CeedCalloc(*num_entries, cols));
1953 
1954   // assemble nonzero locations
1955   CeedInt offset = 0;
1956   if (is_composite) {
1957     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1958     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1959     for (CeedInt k = 0; k < num_suboperators; ++k) {
1960       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1961       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1962       offset += single_entries;
1963     }
1964   } else {
1965     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1966   }
1967 
1968   return CEED_ERROR_SUCCESS;
1969 }
1970 
1971 /**
1972    @brief Fully assemble the nonzero entries of a linear operator.
1973 
1974    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1975 
1976    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1977 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1978 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1979 
1980    This will generally be slow unless your operator is low-order.
1981 
1982    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1983 
1984    @param[in]  op     CeedOperator to assemble
1985    @param[out] values Values to assemble into matrix
1986 
1987    @ref User
1988 **/
1989 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1990   CeedInt       num_suboperators, single_entries = 0;
1991   CeedOperator *sub_operators;
1992   bool          is_composite;
1993   CeedCall(CeedOperatorCheckReady(op));
1994   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1995 
1996   // Early exit for empty operator
1997   if (!is_composite) {
1998     CeedInt num_elem = 0;
1999 
2000     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2001     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2002   }
2003 
2004   if (op->LinearAssemble) {
2005     // Backend version
2006     CeedCall(op->LinearAssemble(op, values));
2007     return CEED_ERROR_SUCCESS;
2008   } else {
2009     // Operator fallback
2010     CeedOperator op_fallback;
2011 
2012     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2013     if (op_fallback) {
2014       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2015       return CEED_ERROR_SUCCESS;
2016     }
2017   }
2018 
2019   // Default interface implementation
2020   CeedInt offset = 0;
2021   CeedCall(CeedVectorSetValue(values, 0.0));
2022   if (is_composite) {
2023     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2024     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2025     for (CeedInt k = 0; k < num_suboperators; k++) {
2026       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2027       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2028       offset += single_entries;
2029     }
2030   } else {
2031     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2032   }
2033 
2034   return CEED_ERROR_SUCCESS;
2035 }
2036 
2037 /**
2038   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2039 
2040   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2041 
2042   @param[in]  op               Composite CeedOperator
2043   @param[in]  num_skip_indices Number of suboperators to skip
2044   @param[in]  skip_indices     Array of indices of suboperators to skip
2045   @param[out] mult             Vector to store multiplicity (of size l_size)
2046 
2047   @return An error code: 0 - success, otherwise - failure
2048 
2049   @ref User
2050 **/
2051 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2052   CeedCall(CeedOperatorCheckReady(op));
2053 
2054   Ceed                ceed;
2055   CeedInt             num_suboperators;
2056   CeedSize            l_vec_len;
2057   CeedScalar         *mult_array;
2058   CeedVector          ones_l_vec;
2059   CeedElemRestriction elem_rstr;
2060   CeedOperator       *sub_operators;
2061 
2062   CeedCall(CeedOperatorGetCeed(op, &ceed));
2063 
2064   // Zero mult vector
2065   CeedCall(CeedVectorSetValue(mult, 0.0));
2066 
2067   // Get suboperators
2068   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2069   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2070   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2071 
2072   // Work vector
2073   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2074   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2075   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2076   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2077 
2078   // Compute multiplicity across suboperators
2079   for (CeedInt i = 0; i < num_suboperators; i++) {
2080     const CeedScalar *sub_mult_array;
2081     CeedVector        sub_mult_l_vec, ones_e_vec;
2082 
2083     // -- Check for suboperator to skip
2084     for (CeedInt j = 0; j < num_skip_indices; j++) {
2085       if (skip_indices[j] == i) continue;
2086     }
2087 
2088     // -- Sub operator multiplicity
2089     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2090     CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2091     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2092     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2093     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2094     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2095     // ---- Flag every node present in the current suboperator
2096     for (CeedInt j = 0; j < l_vec_len; j++) {
2097       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2098     }
2099     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2100     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2101     CeedCall(CeedVectorDestroy(&ones_e_vec));
2102   }
2103   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2104   CeedCall(CeedVectorDestroy(&ones_l_vec));
2105 
2106   return CEED_ERROR_SUCCESS;
2107 }
2108 
2109 /**
2110   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2111 grid interpolation
2112 
2113   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2114 
2115   @param[in]  op_fine      Fine grid operator
2116   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2117   @param[in]  rstr_coarse  Coarse grid restriction
2118   @param[in]  basis_coarse Coarse grid active vector basis
2119   @param[out] op_coarse    Coarse grid operator
2120   @param[out] op_prolong   Coarse to fine operator, or NULL
2121   @param[out] op_restrict  Fine to coarse operator, or NULL
2122 
2123   @return An error code: 0 - success, otherwise - failure
2124 
2125   @ref User
2126 **/
2127 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2128                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2129   CeedCall(CeedOperatorCheckReady(op_fine));
2130 
2131   // Build prolongation matrix, if required
2132   CeedBasis basis_c_to_f = NULL;
2133   if (op_prolong || op_restrict) {
2134     CeedBasis basis_fine;
2135     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2136     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2137   }
2138 
2139   // Core code
2140   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2141 
2142   return CEED_ERROR_SUCCESS;
2143 }
2144 
2145 /**
2146   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2147 
2148   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2149 
2150   @param[in]  op_fine       Fine grid operator
2151   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2152   @param[in]  rstr_coarse   Coarse grid restriction
2153   @param[in]  basis_coarse  Coarse grid active vector basis
2154   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2155   @param[out] op_coarse     Coarse grid operator
2156   @param[out] op_prolong    Coarse to fine operator, or NULL
2157   @param[out] op_restrict   Fine to coarse operator, or NULL
2158 
2159   @return An error code: 0 - success, otherwise - failure
2160 
2161   @ref User
2162 **/
2163 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2164                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2165                                              CeedOperator *op_restrict) {
2166   CeedCall(CeedOperatorCheckReady(op_fine));
2167   Ceed ceed;
2168   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2169 
2170   // Check for compatible quadrature spaces
2171   CeedBasis basis_fine;
2172   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2173   CeedInt Q_f, Q_c;
2174   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2175   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2176   if (Q_f != Q_c) {
2177     // LCOV_EXCL_START
2178     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2179     // LCOV_EXCL_STOP
2180   }
2181 
2182   // Create coarse to fine basis, if required
2183   CeedBasis basis_c_to_f = NULL;
2184   if (op_prolong || op_restrict) {
2185     // Check if interpolation matrix is provided
2186     if (!interp_c_to_f) {
2187       // LCOV_EXCL_START
2188       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2189       // LCOV_EXCL_STOP
2190     }
2191     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2192     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2193     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2194     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2195     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2196     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2197     CeedScalar *q_ref, *q_weight, *grad;
2198     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2199     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2200     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2201     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2202     CeedCall(CeedFree(&q_ref));
2203     CeedCall(CeedFree(&q_weight));
2204     CeedCall(CeedFree(&grad));
2205   }
2206 
2207   // Core code
2208   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2209   return CEED_ERROR_SUCCESS;
2210 }
2211 
2212 /**
2213   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2214 
2215   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2216 
2217   @param[in]  op_fine       Fine grid operator
2218   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2219   @param[in]  rstr_coarse   Coarse grid restriction
2220   @param[in]  basis_coarse  Coarse grid active vector basis
2221   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2222   @param[out] op_coarse     Coarse grid operator
2223   @param[out] op_prolong    Coarse to fine operator, or NULL
2224   @param[out] op_restrict   Fine to coarse operator, or NULL
2225 
2226   @return An error code: 0 - success, otherwise - failure
2227 
2228   @ref User
2229 **/
2230 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2231                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2232                                        CeedOperator *op_restrict) {
2233   CeedCall(CeedOperatorCheckReady(op_fine));
2234   Ceed ceed;
2235   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2236 
2237   // Check for compatible quadrature spaces
2238   CeedBasis basis_fine;
2239   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2240   CeedInt Q_f, Q_c;
2241   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2242   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2243   if (Q_f != Q_c) {
2244     // LCOV_EXCL_START
2245     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2246     // LCOV_EXCL_STOP
2247   }
2248 
2249   // Coarse to fine basis
2250   CeedBasis basis_c_to_f = NULL;
2251   if (op_prolong || op_restrict) {
2252     // Check if interpolation matrix is provided
2253     if (!interp_c_to_f) {
2254       // LCOV_EXCL_START
2255       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2256       // LCOV_EXCL_STOP
2257     }
2258     CeedElemTopology topo;
2259     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2260     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2261     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2262     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2263     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2264     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2265     CeedScalar *q_ref, *q_weight, *grad;
2266     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2267     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2268     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2269     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2270     CeedCall(CeedFree(&q_ref));
2271     CeedCall(CeedFree(&q_weight));
2272     CeedCall(CeedFree(&grad));
2273   }
2274 
2275   // Core code
2276   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2277   return CEED_ERROR_SUCCESS;
2278 }
2279 
2280 /**
2281   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2282 
2283   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2284     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V.
2285     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T
2286 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
2287 
2288   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2289 
2290   @param[in]  op      CeedOperator to create element inverses
2291   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2292   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2293 
2294   @return An error code: 0 - success, otherwise - failure
2295 
2296   @ref User
2297 **/
2298 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2299   CeedCall(CeedOperatorCheckReady(op));
2300 
2301   if (op->CreateFDMElementInverse) {
2302     // Backend version
2303     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2304     return CEED_ERROR_SUCCESS;
2305   } else {
2306     // Operator fallback
2307     CeedOperator op_fallback;
2308 
2309     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2310     if (op_fallback) {
2311       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2312       return CEED_ERROR_SUCCESS;
2313     }
2314   }
2315 
2316   // Default interface implementation
2317   Ceed ceed, ceed_parent;
2318   CeedCall(CeedOperatorGetCeed(op, &ceed));
2319   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2320   ceed_parent = ceed_parent ? ceed_parent : ceed;
2321   CeedQFunction qf;
2322   CeedCall(CeedOperatorGetQFunction(op, &qf));
2323 
2324   // Determine active input basis
2325   bool                interp = false, grad = false;
2326   CeedBasis           basis = NULL;
2327   CeedElemRestriction rstr  = NULL;
2328   CeedOperatorField  *op_fields;
2329   CeedQFunctionField *qf_fields;
2330   CeedInt             num_input_fields;
2331   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2332   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2333   for (CeedInt i = 0; i < num_input_fields; i++) {
2334     CeedVector vec;
2335     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2336     if (vec == CEED_VECTOR_ACTIVE) {
2337       CeedEvalMode eval_mode;
2338       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2339       interp = interp || eval_mode == CEED_EVAL_INTERP;
2340       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2341       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2342       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2343     }
2344   }
2345   if (!basis) {
2346     // LCOV_EXCL_START
2347     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2348     // LCOV_EXCL_STOP
2349   }
2350   CeedSize l_size = 1;
2351   CeedInt  P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2352   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2353   CeedCall(CeedBasisGetNumNodes(basis, &elem_size));
2354   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2355   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2356   CeedCall(CeedBasisGetDimension(basis, &dim));
2357   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2358   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2359   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2360 
2361   // Build and diagonalize 1D Mass and Laplacian
2362   bool tensor_basis;
2363   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2364   if (!tensor_basis) {
2365     // LCOV_EXCL_START
2366     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2367     // LCOV_EXCL_STOP
2368   }
2369   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2370   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2371   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2372   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2373   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2374   CeedCall(CeedCalloc(P_1d, &lambda));
2375   // -- Build matrices
2376   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2377   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2378   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2379   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2380   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2381 
2382   // -- Diagonalize
2383   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2384   CeedCall(CeedFree(&mass));
2385   CeedCall(CeedFree(&laplace));
2386   for (CeedInt i = 0; i < P_1d; i++) {
2387     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2388   }
2389   CeedCall(CeedFree(&x));
2390 
2391   // Assemble QFunction
2392   CeedVector          assembled;
2393   CeedElemRestriction rstr_qf;
2394   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2395   CeedInt layout[3];
2396   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2397   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2398   CeedScalar max_norm = 0;
2399   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2400 
2401   // Calculate element averages
2402   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2403   CeedScalar       *elem_avg;
2404   const CeedScalar *assembled_array, *q_weight_array;
2405   CeedVector        q_weight;
2406   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2407   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2408   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2409   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2410   CeedCall(CeedCalloc(num_elem, &elem_avg));
2411   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2412   for (CeedInt e = 0; e < num_elem; e++) {
2413     CeedInt count = 0;
2414     for (CeedInt q = 0; q < num_qpts; q++) {
2415       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2416         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2417           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2418           count++;
2419         }
2420       }
2421     }
2422     if (count) {
2423       elem_avg[e] /= count;
2424     } else {
2425       elem_avg[e] = 1.0;
2426     }
2427   }
2428   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2429   CeedCall(CeedVectorDestroy(&assembled));
2430   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2431   CeedCall(CeedVectorDestroy(&q_weight));
2432 
2433   // Build FDM diagonal
2434   CeedVector  q_data;
2435   CeedScalar *q_data_array, *fdm_diagonal;
2436   CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal));
2437   const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON;
2438   for (CeedInt c = 0; c < num_comp; c++) {
2439     for (CeedInt n = 0; n < elem_size; n++) {
2440       if (interp) fdm_diagonal[c * elem_size + n] = 1.0;
2441       if (grad) {
2442         for (CeedInt d = 0; d < dim; d++) {
2443           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2444           fdm_diagonal[c * elem_size + n] += lambda[i];
2445         }
2446       }
2447       if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound;
2448     }
2449   }
2450   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data));
2451   CeedCall(CeedVectorSetValue(q_data, 0.0));
2452   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2453   for (CeedInt e = 0; e < num_elem; e++) {
2454     for (CeedInt c = 0; c < num_comp; c++) {
2455       for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]);
2456     }
2457   }
2458   CeedCall(CeedFree(&elem_avg));
2459   CeedCall(CeedFree(&fdm_diagonal));
2460   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2461 
2462   // Setup FDM operator
2463   // -- Basis
2464   CeedBasis   fdm_basis;
2465   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2466   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2467   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2468   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2469   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2470   CeedCall(CeedFree(&fdm_interp));
2471   CeedCall(CeedFree(&grad_dummy));
2472   CeedCall(CeedFree(&q_ref_dummy));
2473   CeedCall(CeedFree(&q_weight_dummy));
2474   CeedCall(CeedFree(&lambda));
2475 
2476   // -- Restriction
2477   CeedElemRestriction rstr_qd_i;
2478   CeedInt             strides[3] = {1, elem_size, elem_size * num_comp};
2479   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i));
2480   // -- QFunction
2481   CeedQFunction qf_fdm;
2482   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2483   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2484   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2485   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2486   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2487   // -- QFunction context
2488   CeedInt *num_comp_data;
2489   CeedCall(CeedCalloc(1, &num_comp_data));
2490   num_comp_data[0] = num_comp;
2491   CeedQFunctionContext ctx_fdm;
2492   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2493   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2494   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2495   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2496   // -- Operator
2497   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2498   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2499   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2500   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2501 
2502   // Cleanup
2503   CeedCall(CeedVectorDestroy(&q_data));
2504   CeedCall(CeedBasisDestroy(&fdm_basis));
2505   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2506   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2507 
2508   return CEED_ERROR_SUCCESS;
2509 }
2510 
2511 /// @}
2512