xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision 916593bb45df27de2596a307978bc45ad7db29a9)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed/backend.h>
11 #include <ceed/ceed.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  eval_mode Current basis evaluation mode
184   @param[in]  identity  Pointer to identity matrix
185   @param[in]  interp    Pointer to interpolation matrix
186   @param[in]  grad      Pointer to gradient matrix
187   @param[out] basis_ptr Basis pointer to set
188 
189   @ref Developer
190 **/
191 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad,
192                                                const CeedScalar **basis_ptr) {
193   switch (eval_mode) {
194     case CEED_EVAL_NONE:
195       *basis_ptr = identity;
196       break;
197     case CEED_EVAL_INTERP:
198       *basis_ptr = interp;
199       break;
200     case CEED_EVAL_GRAD:
201       *basis_ptr = grad;
202       break;
203     case CEED_EVAL_WEIGHT:
204     case CEED_EVAL_DIV:
205     case CEED_EVAL_CURL:
206       break;  // Caught by QF Assembly
207   }
208   assert(*basis_ptr != NULL);
209 }
210 
211 /**
212   @brief Create point block restriction for active operator field
213 
214   @param[in]  rstr            Original CeedElemRestriction for active field
215   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
216 
217   @return An error code: 0 - success, otherwise - failure
218 
219   @ref Developer
220 **/
221 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
222   Ceed ceed;
223   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
224   const CeedInt *offsets;
225   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
226 
227   // Expand offsets
228   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
229   CeedSize l_size;
230   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
231   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
232   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
233   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
234   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
235   CeedInt shift = num_comp;
236   if (comp_stride != 1) shift *= num_comp;
237   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
238   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
239     pointblock_offsets[i] = offsets[i] * shift;
240   }
241 
242   // Create new restriction
243   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
244                                      pointblock_offsets, pointblock_rstr));
245 
246   // Cleanup
247   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
248 
249   return CEED_ERROR_SUCCESS;
250 }
251 
252 /**
253   @brief Core logic for assembling operator diagonal or point block diagonal
254 
255   @param[in]  op            CeedOperator to assemble point block diagonal
256   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
257   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
258   @param[out] assembled     CeedVector to store assembled diagonal
259 
260   @return An error code: 0 - success, otherwise - failure
261 
262   @ref Developer
263 **/
264 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
265   Ceed ceed;
266   CeedCall(CeedOperatorGetCeed(op, &ceed));
267 
268   // Assemble QFunction
269   CeedQFunction       qf;
270   const CeedScalar   *assembled_qf_array;
271   CeedVector          assembled_qf;
272   CeedElemRestriction assembled_elem_rstr;
273   CeedInt             num_input_fields, num_output_fields;
274   CeedInt             layout[3];
275 
276   CeedCall(CeedOperatorGetQFunction(op, &qf));
277   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
278   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
279   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout));
280   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
281   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
282 
283   // Get assembly data
284   CeedOperatorAssemblyData data;
285   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
286   CeedInt                 *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
287   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
288   CeedBasis               *active_bases;
289   CeedElemRestriction     *active_elem_rstrs;
290 
291   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
292   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
293                                                 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components));
294   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL));
295   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs));
296 
297   // Loop over all active bases
298   for (CeedInt b = 0; b < num_active_bases; b++) {
299     // Assemble point block diagonal restriction, if needed
300     CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b];
301 
302     if (is_pointblock) {
303       CeedElemRestriction point_block_elem_rstr;
304 
305       CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr));
306       diag_elem_rstr = point_block_elem_rstr;
307     }
308 
309     // Create diagonal vector
310     CeedVector elem_diag;
311     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
312 
313     // Assemble element operator diagonals
314     CeedScalar *elem_diag_array;
315     CeedInt     num_elem, num_nodes, num_qpts, num_components;
316 
317     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
318     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
319     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
320     CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes));
321     CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components));
322     CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts));
323 
324     // Basis matrices
325     const CeedScalar *interp, *grad;
326     CeedScalar       *identity      = NULL;
327     bool              has_eval_none = false;
328     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
329       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
330     }
331     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
332       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
333     }
334     if (has_eval_none) {
335       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
336       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
337     }
338     CeedCall(CeedBasisGetInterp(active_bases[b], &interp));
339     CeedCall(CeedBasisGetGrad(active_bases[b], &grad));
340     // Compute the diagonal of B^T D B
341     // Each element
342     for (CeedInt e = 0; e < num_elem; e++) {
343       CeedInt d_out = -1;
344       // Each basis eval mode pair
345       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
346         const CeedScalar *B_t = NULL;
347         if (eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out += 1;
348         CeedOperatorGetBasisPointer(eval_modes_out[b][e_out], identity, interp, &grad[d_out * num_qpts * num_nodes], &B_t);
349         CeedInt d_in = -1;
350         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
351           const CeedScalar *B = NULL;
352           if (eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in += 1;
353           CeedOperatorGetBasisPointer(eval_modes_in[b][e_in], identity, interp, &grad[d_in * num_qpts * num_nodes], &B);
354           // Each component
355           for (CeedInt c_out = 0; c_out < num_components; c_out++) {
356             // Each qpt/node pair
357             for (CeedInt q = 0; q < num_qpts; q++) {
358               if (is_pointblock) {
359                 // Point Block Diagonal
360                 for (CeedInt c_in = 0; c_in < num_components; c_in++) {
361                   const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
362                   const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
363                   for (CeedInt n = 0; n < num_nodes; n++) {
364                     elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] +=
365                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
366                   }
367                 }
368               } else {
369                 // Diagonal Only
370                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
371                 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
372                 for (CeedInt n = 0; n < num_nodes; n++) {
373                   elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
374                 }
375               }
376             }
377           }
378         }
379       }
380     }
381     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
382 
383     // Assemble local operator diagonal
384     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
385 
386     // Cleanup
387     if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
388     CeedCall(CeedVectorDestroy(&elem_diag));
389     CeedCall(CeedFree(&identity));
390   }
391   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
392   CeedCall(CeedVectorDestroy(&assembled_qf));
393 
394   return CEED_ERROR_SUCCESS;
395 }
396 
397 /**
398   @brief Core logic for assembling composite operator diagonal
399 
400   @param[in]  op            CeedOperator to assemble point block diagonal
401   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
402   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
403   @param[out] assembled     CeedVector to store assembled diagonal
404 
405   @return An error code: 0 - success, otherwise - failure
406 
407   @ref Developer
408 **/
409 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
410                                                                  CeedVector assembled) {
411   CeedInt       num_sub;
412   CeedOperator *suboperators;
413   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
414   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
415   for (CeedInt i = 0; i < num_sub; i++) {
416     if (is_pointblock) {
417       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
418     } else {
419       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
420     }
421   }
422   return CEED_ERROR_SUCCESS;
423 }
424 
425 /**
426   @brief Build nonzero pattern for non-composite operator
427 
428   Users should generally use CeedOperatorLinearAssembleSymbolic()
429 
430   @param[in]  op     CeedOperator to assemble nonzero pattern
431   @param[in]  offset Offset for number of entries
432   @param[out] rows   Row number for each entry
433   @param[out] cols   Column number for each entry
434 
435   @return An error code: 0 - success, otherwise - failure
436 
437   @ref Developer
438 **/
439 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
440   Ceed ceed;
441   bool is_composite;
442   CeedCall(CeedOperatorGetCeed(op, &ceed));
443   CeedCall(CeedOperatorIsComposite(op, &is_composite));
444 
445   if (is_composite) {
446     // LCOV_EXCL_START
447     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
448     // LCOV_EXCL_STOP
449   }
450 
451   CeedSize num_nodes;
452   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
453   CeedElemRestriction rstr_in;
454   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
455   CeedInt num_elem, elem_size, num_comp;
456   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
457   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
458   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
459   CeedInt layout_er[3];
460   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
461 
462   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
463 
464   // Determine elem_dof relation
465   CeedVector index_vec;
466   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
467   CeedScalar *array;
468   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
469   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
470   CeedCall(CeedVectorRestoreArray(index_vec, &array));
471   CeedVector elem_dof;
472   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
473   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
474   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
475   const CeedScalar *elem_dof_a;
476   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
477   CeedCall(CeedVectorDestroy(&index_vec));
478 
479   // Determine i, j locations for element matrices
480   CeedInt count = 0;
481   for (CeedInt e = 0; e < num_elem; e++) {
482     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
483       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
484         for (CeedInt i = 0; i < elem_size; i++) {
485           for (CeedInt j = 0; j < elem_size; j++) {
486             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
487             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
488 
489             const CeedInt row = elem_dof_a[elem_dof_index_row];
490             const CeedInt col = elem_dof_a[elem_dof_index_col];
491 
492             rows[offset + count] = row;
493             cols[offset + count] = col;
494             count++;
495           }
496         }
497       }
498     }
499   }
500   if (count != local_num_entries) {
501     // LCOV_EXCL_START
502     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
503     // LCOV_EXCL_STOP
504   }
505   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
506   CeedCall(CeedVectorDestroy(&elem_dof));
507 
508   return CEED_ERROR_SUCCESS;
509 }
510 
511 /**
512   @brief Assemble nonzero entries for non-composite operator
513 
514   Users should generally use CeedOperatorLinearAssemble()
515 
516   @param[in]  op     CeedOperator to assemble
517   @param[in]  offset Offset for number of entries
518   @param[out] values Values to assemble into matrix
519 
520   @return An error code: 0 - success, otherwise - failure
521 
522   @ref Developer
523 **/
524 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
525   Ceed ceed;
526   bool is_composite;
527   CeedCall(CeedOperatorGetCeed(op, &ceed));
528   CeedCall(CeedOperatorIsComposite(op, &is_composite));
529 
530   if (is_composite) {
531     // LCOV_EXCL_START
532     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
533     // LCOV_EXCL_STOP
534   }
535 
536   // Early exit for empty operator
537   {
538     CeedInt num_elem = 0;
539 
540     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
541     if (num_elem == 0) return CEED_ERROR_SUCCESS;
542   }
543 
544   if (op->LinearAssembleSingle) {
545     // Backend version
546     CeedCall(op->LinearAssembleSingle(op, offset, values));
547     return CEED_ERROR_SUCCESS;
548   } else {
549     // Operator fallback
550     CeedOperator op_fallback;
551 
552     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
553     if (op_fallback) {
554       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
555       return CEED_ERROR_SUCCESS;
556     }
557   }
558 
559   // Assemble QFunction
560   CeedQFunction qf;
561   CeedCall(CeedOperatorGetQFunction(op, &qf));
562   CeedVector          assembled_qf;
563   CeedElemRestriction rstr_q;
564   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
565   CeedSize qf_length;
566   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
567 
568   CeedInt            num_input_fields, num_output_fields;
569   CeedOperatorField *input_fields;
570   CeedOperatorField *output_fields;
571   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
572 
573   // Get assembly data
574   CeedOperatorAssemblyData data;
575   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
576   const CeedEvalMode **eval_modes_in, **eval_modes_out;
577   CeedInt             *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
578   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out,
579                                                 &eval_modes_out, NULL, NULL));
580   CeedBasis *bases;
581   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL));
582   CeedBasis basis_in = bases[0];
583 
584   if (num_active_bases > 1) {
585     // LCOV_EXCL_START
586     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases");
587     // LCOV_EXCL_STOP
588   }
589   if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) {
590     // LCOV_EXCL_START
591     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
592     // LCOV_EXCL_STOP
593   }
594 
595   CeedElemRestriction active_rstr;
596   CeedInt             num_elem, elem_size, num_qpts, num_comp;
597   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
598   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
599   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
600   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
601   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
602 
603   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
604 
605   // loop over elements and put in data structure
606   const CeedScalar *interp_in, *grad_in;
607   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
608   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
609 
610   const CeedScalar *assembled_qf_array;
611   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
612 
613   CeedInt layout_qf[3];
614   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
615   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
616 
617   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
618   const CeedScalar **B_mats_in, **B_mats_out;
619   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out));
620   const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0];
621   CeedScalar        BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]];
622   CeedScalar        elem_mat[elem_size * elem_size];
623   CeedInt           count = 0;
624   CeedScalar       *vals;
625   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
626   for (CeedInt e = 0; e < num_elem; e++) {
627     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
628       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
629         // Compute B^T*D
630         for (CeedInt n = 0; n < elem_size; n++) {
631           for (CeedInt q = 0; q < num_qpts; q++) {
632             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
633               const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in);
634               CeedScalar    sum       = 0.0;
635               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
636                 const CeedInt b_out_index     = (num_eval_modes_out[0] * q + e_out) * elem_size + n;
637                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out;
638                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
639                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
640               }
641               BTD_mat[btd_index] = sum;
642             }
643           }
644         }
645         // form element matrix itself (for each block component)
646         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0]));
647 
648         // put element matrix in coordinate data structure
649         for (CeedInt i = 0; i < elem_size; i++) {
650           for (CeedInt j = 0; j < elem_size; j++) {
651             vals[offset + count] = elem_mat[i * elem_size + j];
652             count++;
653           }
654         }
655       }
656     }
657   }
658   if (count != local_num_entries) {
659     // LCOV_EXCL_START
660     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
661     // LCOV_EXCL_STOP
662   }
663   CeedCall(CeedVectorRestoreArray(values, &vals));
664 
665   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
666   CeedCall(CeedVectorDestroy(&assembled_qf));
667 
668   return CEED_ERROR_SUCCESS;
669 }
670 
671 /**
672   @brief Count number of entries for assembled CeedOperator
673 
674   @param[in]  op          CeedOperator to assemble
675   @param[out] num_entries Number of entries in assembled representation
676 
677   @return An error code: 0 - success, otherwise - failure
678 
679   @ref Utility
680 **/
681 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
682   bool                is_composite;
683   CeedElemRestriction rstr;
684   CeedInt             num_elem, elem_size, num_comp;
685 
686   CeedCall(CeedOperatorIsComposite(op, &is_composite));
687   if (is_composite) {
688     // LCOV_EXCL_START
689     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
690     // LCOV_EXCL_STOP
691   }
692   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
693   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
694   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
695   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
696   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
697 
698   return CEED_ERROR_SUCCESS;
699 }
700 
701 /**
702   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
703 
704   @param[in]  op_fine      Fine grid operator
705   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
706   @param[in]  rstr_coarse  Coarse grid restriction
707   @param[in]  basis_coarse Coarse grid active vector basis
708   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
709   @param[out] op_coarse    Coarse grid operator
710   @param[out] op_prolong   Coarse to fine operator, or NULL
711   @param[out] op_restrict  Fine to coarse operator, or NULL
712 
713   @return An error code: 0 - success, otherwise - failure
714 
715   @ref Developer
716 **/
717 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
718                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
719   Ceed       ceed;
720   CeedVector mult_vec = NULL;
721   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
722 
723   // Check for composite operator
724   bool is_composite;
725   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
726   if (is_composite) {
727     // LCOV_EXCL_START
728     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
729     // LCOV_EXCL_STOP
730   }
731 
732   // Coarse Grid
733   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
734   CeedElemRestriction rstr_fine = NULL;
735   // -- Clone input fields
736   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
737     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
738       rstr_fine = op_fine->input_fields[i]->elem_rstr;
739       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
740     } else {
741       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
742                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
743     }
744   }
745   // -- Clone output fields
746   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
747     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
748       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
749     } else {
750       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
751                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
752     }
753   }
754   // -- Clone QFunctionAssemblyData
755   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
756 
757   // Multiplicity vector
758   if (op_restrict || op_prolong) {
759     CeedVector mult_e_vec;
760 
761     if (!p_mult_fine) {
762       // LCOV_EXCL_START
763       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
764       // LCOV_EXCL_STOP
765     }
766     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
767     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
768     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
769     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
770     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
771     CeedCall(CeedVectorDestroy(&mult_e_vec));
772     CeedCall(CeedVectorReciprocal(mult_vec));
773   }
774 
775   // Clone name
776   bool   has_name = op_fine->name;
777   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
778   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
779 
780   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
781   if ((op_restrict || op_prolong) && !basis_c_to_f) {
782     // LCOV_EXCL_START
783     return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis");
784     // LCOV_EXCL_STOP
785   }
786 
787   // Restriction/Prolongation Operators
788   CeedInt num_comp;
789   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
790 
791   // Restriction
792   if (op_restrict) {
793     CeedInt             *num_comp_r_data;
794     CeedQFunction        qf_restrict;
795     CeedQFunctionContext ctx_r;
796 
797     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
798     CeedCall(CeedCalloc(1, &num_comp_r_data));
799     num_comp_r_data[0] = num_comp;
800     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
801     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
802     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
803     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
804     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
805     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
806     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
807     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
808 
809     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
810     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
811     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
812     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
813 
814     // Set name
815     char *restriction_name;
816     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
817     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
818     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
819     CeedCall(CeedFree(&restriction_name));
820 
821     // Check
822     CeedCall(CeedOperatorCheckReady(*op_restrict));
823 
824     // Cleanup
825     CeedCall(CeedQFunctionDestroy(&qf_restrict));
826   }
827 
828   // Prolongation
829   if (op_prolong) {
830     CeedInt             *num_comp_p_data;
831     CeedQFunction        qf_prolong;
832     CeedQFunctionContext ctx_p;
833 
834     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
835     CeedCall(CeedCalloc(1, &num_comp_p_data));
836     num_comp_p_data[0] = num_comp;
837     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
838     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
839     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
840     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
841     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
842     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
843     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
844     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
845 
846     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
847     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
848     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
849     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
850 
851     // Set name
852     char *prolongation_name;
853     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
854     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
855     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
856     CeedCall(CeedFree(&prolongation_name));
857 
858     // Check
859     CeedCall(CeedOperatorCheckReady(*op_prolong));
860 
861     // Cleanup
862     CeedCall(CeedQFunctionDestroy(&qf_prolong));
863   }
864 
865   // Check
866   CeedCall(CeedOperatorCheckReady(*op_coarse));
867 
868   // Cleanup
869   CeedCall(CeedVectorDestroy(&mult_vec));
870   CeedCall(CeedBasisDestroy(&basis_c_to_f));
871 
872   return CEED_ERROR_SUCCESS;
873 }
874 
875 /**
876   @brief Build 1D mass matrix and Laplacian with perturbation
877 
878   @param[in]  interp_1d   Interpolation matrix in one dimension
879   @param[in]  grad_1d     Gradient matrix in one dimension
880   @param[in]  q_weight_1d Quadrature weights in one dimension
881   @param[in]  P_1d        Number of basis nodes in one dimension
882   @param[in]  Q_1d        Number of quadrature points in one dimension
883   @param[in]  dim         Dimension of basis
884   @param[out] mass        Assembled mass matrix in one dimension
885   @param[out] laplace     Assembled perturbed Laplacian in one dimension
886 
887   @return An error code: 0 - success, otherwise - failure
888 
889   @ref Developer
890 **/
891 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d,
892                                                       CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
893   for (CeedInt i = 0; i < P_1d; i++) {
894     for (CeedInt j = 0; j < P_1d; j++) {
895       CeedScalar sum = 0.0;
896       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
897       mass[i + j * P_1d] = sum;
898     }
899   }
900   // -- Laplacian
901   for (CeedInt i = 0; i < P_1d; i++) {
902     for (CeedInt j = 0; j < P_1d; j++) {
903       CeedScalar sum = 0.0;
904       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
905       laplace[i + j * P_1d] = sum;
906     }
907   }
908   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
909   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
910   return CEED_ERROR_SUCCESS;
911 }
912 CeedPragmaOptimizeOn;
913 
914 /// @}
915 
916 /// ----------------------------------------------------------------------------
917 /// CeedOperator Backend API
918 /// ----------------------------------------------------------------------------
919 /// @addtogroup CeedOperatorBackend
920 /// @{
921 
922 /**
923   @brief Create object holding CeedQFunction assembly data for CeedOperator
924 
925   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
926   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
927 
928   @return An error code: 0 - success, otherwise - failure
929 
930   @ref Backend
931 **/
932 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
933   CeedCall(CeedCalloc(1, data));
934   (*data)->ref_count = 1;
935   (*data)->ceed      = ceed;
936   CeedCall(CeedReference(ceed));
937 
938   return CEED_ERROR_SUCCESS;
939 }
940 
941 /**
942   @brief Increment the reference counter for a CeedQFunctionAssemblyData
943 
944   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
945 
946   @return An error code: 0 - success, otherwise - failure
947 
948   @ref Backend
949 **/
950 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
951   data->ref_count++;
952   return CEED_ERROR_SUCCESS;
953 }
954 
955 /**
956   @brief Set re-use of CeedQFunctionAssemblyData
957 
958   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
959   @param[in]     reuse_data Boolean flag indicating data re-use
960 
961   @return An error code: 0 - success, otherwise - failure
962 
963   @ref Backend
964 **/
965 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
966   data->reuse_data        = reuse_data;
967   data->needs_data_update = true;
968   return CEED_ERROR_SUCCESS;
969 }
970 
971 /**
972   @brief Mark QFunctionAssemblyData as stale
973 
974   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
975   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
976 
977   @return An error code: 0 - success, otherwise - failure
978 
979   @ref Backend
980 **/
981 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
982   data->needs_data_update = needs_data_update;
983   return CEED_ERROR_SUCCESS;
984 }
985 
986 /**
987   @brief Determine if QFunctionAssemblyData needs update
988 
989   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
990   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
991 
992   @return An error code: 0 - success, otherwise - failure
993 
994   @ref Backend
995 **/
996 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
997   *is_update_needed = !data->reuse_data || data->needs_data_update;
998   return CEED_ERROR_SUCCESS;
999 }
1000 
1001 /**
1002   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1003            Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1004            Note: If `*data_copy` is non-NULL, then it is assumed that `*data_copy` is a pointer to a CeedQFunctionAssemblyData.
1005              This CeedQFunctionAssemblyData will be destroyed if `*data_copy` is the only reference to this CeedQFunctionAssemblyData.
1006 
1007   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1008   @param[in,out] data_copy Variable to store copied reference
1009 
1010   @return An error code: 0 - success, otherwise - failure
1011 
1012   @ref Backend
1013 **/
1014 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1015   CeedCall(CeedQFunctionAssemblyDataReference(data));
1016   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1017   *data_copy = data;
1018   return CEED_ERROR_SUCCESS;
1019 }
1020 
1021 /**
1022   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1023 
1024   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1025   @param[out] is_setup Boolean flag for setup status
1026 
1027   @return An error code: 0 - success, otherwise - failure
1028 
1029   @ref Backend
1030 **/
1031 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1032   *is_setup = data->is_setup;
1033   return CEED_ERROR_SUCCESS;
1034 }
1035 
1036 /**
1037   @brief Set internal objects for CeedQFunctionAssemblyData
1038 
1039   @param[in,out] data CeedQFunctionAssemblyData to set objects
1040   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1041   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1042 
1043   @return An error code: 0 - success, otherwise - failure
1044 
1045   @ref Backend
1046 **/
1047 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1048   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1049   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1050 
1051   data->is_setup = true;
1052   return CEED_ERROR_SUCCESS;
1053 }
1054 
1055 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1056   if (!data->is_setup) {
1057     // LCOV_EXCL_START
1058     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1059     // LCOV_EXCL_STOP
1060   }
1061 
1062   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1063   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1064 
1065   return CEED_ERROR_SUCCESS;
1066 }
1067 
1068 /**
1069   @brief Destroy CeedQFunctionAssemblyData
1070 
1071   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1072 
1073   @return An error code: 0 - success, otherwise - failure
1074 
1075   @ref Backend
1076 **/
1077 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1078   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1079 
1080   CeedCall(CeedDestroy(&(*data)->ceed));
1081   CeedCall(CeedVectorDestroy(&(*data)->vec));
1082   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1083 
1084   CeedCall(CeedFree(data));
1085   return CEED_ERROR_SUCCESS;
1086 }
1087 
1088 /**
1089   @brief Get CeedOperatorAssemblyData
1090 
1091   @param[in]  op   CeedOperator to assemble
1092   @param[out] data CeedQFunctionAssemblyData
1093 
1094   @return An error code: 0 - success, otherwise - failure
1095 
1096   @ref Backend
1097 **/
1098 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1099   if (!op->op_assembled) {
1100     CeedOperatorAssemblyData data;
1101 
1102     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1103     op->op_assembled = data;
1104   }
1105   *data = op->op_assembled;
1106 
1107   return CEED_ERROR_SUCCESS;
1108 }
1109 
1110 /**
1111   @brief Create object holding CeedOperator assembly data.
1112 
1113     The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1114     An array with references to the corresponding active CeedElemRestrictions is also stored.
1115     For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1116     The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1117       CeedEvalMode.
1118     The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1119     Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1120 
1121   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1122   @param[in]  op   CeedOperator to be assembled
1123   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1124 
1125   @return An error code: 0 - success, otherwise - failure
1126 
1127   @ref Backend
1128 **/
1129 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1130   CeedInt num_active_bases = 0;
1131 
1132   // Allocate
1133   CeedCall(CeedCalloc(1, data));
1134   (*data)->ceed = ceed;
1135   CeedCall(CeedReference(ceed));
1136 
1137   // Build OperatorAssembly data
1138   CeedQFunction       qf;
1139   CeedQFunctionField *qf_fields;
1140   CeedOperatorField  *op_fields;
1141   CeedInt             num_input_fields;
1142   CeedCall(CeedOperatorGetQFunction(op, &qf));
1143   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1144   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1145 
1146   // Determine active input basis
1147   CeedInt       *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0;
1148   CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL;
1149   CeedSize     **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1150 
1151   for (CeedInt i = 0; i < num_input_fields; i++) {
1152     CeedVector vec;
1153 
1154     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1155     if (vec == CEED_VECTOR_ACTIVE) {
1156       CeedInt      index = -1, dim = 1, num_components;
1157       CeedBasis    basis_in = NULL;
1158       CeedEvalMode eval_mode;
1159 
1160       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1161       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1162       CeedCall(CeedBasisGetNumComponents(basis_in, &num_components));
1163       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1164       for (CeedInt i = 0; i < num_active_bases; i++) {
1165         if ((*data)->active_bases[i] == basis_in) index = i;
1166       }
1167       if (index == -1) {
1168         CeedElemRestriction elem_rstr_in;
1169 
1170         index = num_active_bases;
1171         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1172         (*data)->active_bases[num_active_bases] = NULL;
1173         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases]));
1174         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1175         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1176         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1177         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases]));
1178         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1179         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1180         num_eval_modes_in[index]  = 0;
1181         num_eval_modes_out[index] = 0;
1182         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1183         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1184         eval_modes_in[index]  = NULL;
1185         eval_modes_out[index] = NULL;
1186         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1187         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1188         eval_mode_offsets_in[index]  = NULL;
1189         eval_mode_offsets_out[index] = NULL;
1190         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1191         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1192         (*data)->assembled_bases_in[index]  = NULL;
1193         (*data)->assembled_bases_out[index] = NULL;
1194         num_active_bases++;
1195       }
1196       switch (eval_mode) {
1197         case CEED_EVAL_NONE:
1198         case CEED_EVAL_INTERP:
1199           CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_modes_in[index]));
1200           CeedCall(CeedRealloc(num_eval_modes_in[index] + 1, &eval_mode_offsets_in[index]));
1201           eval_modes_in[index][num_eval_modes_in[index]]        = eval_mode;
1202           eval_mode_offsets_in[index][num_eval_modes_in[index]] = offset;
1203           offset += num_components;
1204           num_eval_modes_in[index] += 1;
1205           break;
1206         case CEED_EVAL_GRAD:
1207           CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_modes_in[index]));
1208           CeedCall(CeedRealloc(num_eval_modes_in[index] + dim, &eval_mode_offsets_in[index]));
1209           for (CeedInt d = 0; d < dim; d++) {
1210             eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1211             eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1212             offset += num_components;
1213           }
1214           num_eval_modes_in[index] += dim;
1215           break;
1216         case CEED_EVAL_WEIGHT:
1217         case CEED_EVAL_DIV:
1218         case CEED_EVAL_CURL:
1219           break;  // Caught by QF Assembly
1220       }
1221     }
1222   }
1223   (*data)->num_eval_modes_in    = num_eval_modes_in;
1224   (*data)->eval_modes_in        = eval_modes_in;
1225   (*data)->eval_mode_offsets_in = eval_mode_offsets_in;
1226 
1227   // Determine active output basis
1228   CeedInt num_output_fields;
1229 
1230   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1231   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1232   offset = 0;
1233   for (CeedInt i = 0; i < num_output_fields; i++) {
1234     CeedVector vec;
1235 
1236     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1237     if (vec == CEED_VECTOR_ACTIVE) {
1238       CeedInt      index = -1, dim = 1, num_components;
1239       CeedBasis    basis_out = NULL;
1240       CeedEvalMode eval_mode;
1241 
1242       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1243       CeedCall(CeedBasisGetDimension(basis_out, &dim));
1244       CeedCall(CeedBasisGetNumComponents(basis_out, &num_components));
1245       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1246       for (CeedInt i = 0; i < num_active_bases; i++) {
1247         if ((*data)->active_bases[i] == basis_out) index = i;
1248       }
1249       if (index == -1) {
1250         CeedElemRestriction elem_rstr_out;
1251 
1252         index = num_active_bases;
1253         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1254         (*data)->active_bases[num_active_bases] = NULL;
1255         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases]));
1256         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1257         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1258         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1259         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases]));
1260         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1261         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1262         num_eval_modes_in[index]  = 0;
1263         num_eval_modes_out[index] = 0;
1264         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1265         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1266         eval_modes_in[index]  = NULL;
1267         eval_modes_out[index] = NULL;
1268         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1269         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1270         eval_mode_offsets_in[index]  = NULL;
1271         eval_mode_offsets_out[index] = NULL;
1272         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1273         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1274         (*data)->assembled_bases_in[index]  = NULL;
1275         (*data)->assembled_bases_out[index] = NULL;
1276         num_active_bases++;
1277       }
1278       switch (eval_mode) {
1279         case CEED_EVAL_NONE:
1280         case CEED_EVAL_INTERP:
1281           CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_modes_out[index]));
1282           CeedCall(CeedRealloc(num_eval_modes_out[index] + 1, &eval_mode_offsets_out[index]));
1283           eval_modes_out[index][num_eval_modes_out[index]]        = eval_mode;
1284           eval_mode_offsets_out[index][num_eval_modes_out[index]] = offset;
1285           offset += num_components;
1286           num_eval_modes_out[index] += 1;
1287           break;
1288         case CEED_EVAL_GRAD:
1289           CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_modes_out[index]));
1290           CeedCall(CeedRealloc(num_eval_modes_out[index] + dim, &eval_mode_offsets_out[index]));
1291           for (CeedInt d = 0; d < dim; d++) {
1292             eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1293             eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1294             offset += num_components;
1295           }
1296           num_eval_modes_out[index] += dim;
1297           break;
1298         case CEED_EVAL_WEIGHT:
1299         case CEED_EVAL_DIV:
1300         case CEED_EVAL_CURL:
1301           break;  // Caught by QF Assembly
1302       }
1303     }
1304   }
1305   (*data)->num_output_components = offset;
1306   (*data)->num_eval_modes_out    = num_eval_modes_out;
1307   (*data)->eval_modes_out        = eval_modes_out;
1308   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1309   (*data)->num_active_bases      = num_active_bases;
1310 
1311   return CEED_ERROR_SUCCESS;
1312 }
1313 
1314 /**
1315   @brief Get CeedOperator CeedEvalModes for assembly.
1316 
1317     Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1318 
1319   @param[in]  data                  CeedOperatorAssemblyData
1320   @param[out] num_active_bases      Total number of active bases
1321   @param[out] num_eval_mode_in      Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1322                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1323   @param[out] eval_mode_in          Pointer to hold arrays of input CeedEvalModes, or NULL.
1324   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1325   @param[out] num_eval_mode_out     Pointer to hold array of numbers of output CeedEvalModes, or NULL
1326   @param[out] eval_mode_out         Pointer to hold arrays of output CeedEvalModes, or NULL.
1327   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1328   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1329                                       including contributions of all active bases
1330 
1331   @return An error code: 0 - success, otherwise - failure
1332 
1333   @ref Backend
1334 **/
1335 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in,
1336                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out,
1337                                          const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) {
1338   if (num_active_bases) *num_active_bases = data->num_active_bases;
1339   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1340   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1341   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1342   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1343   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1344   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1345   if (num_output_components) *num_output_components = data->num_output_components;
1346 
1347   return CEED_ERROR_SUCCESS;
1348 }
1349 
1350 /**
1351   @brief Get CeedOperator CeedBasis data for assembly.
1352 
1353     Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1354 
1355   @param[in]  data                CeedOperatorAssemblyData
1356   @param[out] num_active_bases    Number of active bases, or NULL
1357   @param[out] active_bases        Pointer to hold active CeedBasis, or NULL
1358   @param[out] assembled_bases_in  Pointer to hold assembled active input B, or NULL
1359   @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL
1360 
1361   @return An error code: 0 - success, otherwise - failure
1362 
1363   @ref Backend
1364 **/
1365 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases,
1366                                      const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) {
1367   // Assemble B_in, B_out if needed
1368   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1369     CeedInt num_qpts;
1370 
1371     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1372     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1373       CeedInt           elem_size;
1374       CeedScalar       *B_in = NULL, *identity = NULL;
1375       const CeedScalar *interp_in, *grad_in;
1376       bool              has_eval_none = false;
1377 
1378       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size));
1379       CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_in[b], &B_in));
1380 
1381       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1382         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1383       }
1384       if (has_eval_none) {
1385         CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1386         for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1387           identity[i * elem_size + i] = 1.0;
1388         }
1389       }
1390       CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_in));
1391       CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_in));
1392 
1393       for (CeedInt q = 0; q < num_qpts; q++) {
1394         for (CeedInt n = 0; n < elem_size; n++) {
1395           CeedInt d_in = -1;
1396           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1397             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1398             const CeedScalar *B  = NULL;
1399 
1400             if (data->eval_modes_in[b][e_in] == CEED_EVAL_GRAD) d_in++;
1401             CeedOperatorGetBasisPointer(data->eval_modes_in[b][e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &B);
1402             B_in[(qq + e_in) * elem_size + n] = B[q * elem_size + n];
1403           }
1404         }
1405       }
1406       if (identity) CeedCall(CeedFree(identity));
1407       data->assembled_bases_in[b] = B_in;
1408     }
1409   }
1410 
1411   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1412     CeedInt num_qpts;
1413 
1414     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1415     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1416       CeedInt           elem_size;
1417       const CeedScalar *interp_out, *grad_out;
1418       bool              has_eval_none = false;
1419       CeedScalar       *B_out = NULL, *identity = NULL;
1420 
1421       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &elem_size));
1422       CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_modes_out[b], &B_out));
1423 
1424       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1425         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1426       }
1427       if (has_eval_none) {
1428         CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1429         for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1430           identity[i * elem_size + i] = 1.0;
1431         }
1432       }
1433       CeedCall(CeedBasisGetInterp(data->active_bases[b], &interp_out));
1434       CeedCall(CeedBasisGetGrad(data->active_bases[b], &grad_out));
1435 
1436       for (CeedInt q = 0; q < num_qpts; q++) {
1437         for (CeedInt n = 0; n < elem_size; n++) {
1438           CeedInt d_out = -1;
1439           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1440             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1441             const CeedScalar *B  = NULL;
1442 
1443             if (data->eval_modes_out[b][e_out] == CEED_EVAL_GRAD) d_out++;
1444             CeedOperatorGetBasisPointer(data->eval_modes_out[b][e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &B);
1445             B_out[(qq + e_out) * elem_size + n] = B[q * elem_size + n];
1446           }
1447         }
1448       }
1449       if (identity) CeedCall(CeedFree(identity));
1450       data->assembled_bases_out[b] = B_out;
1451     }
1452   }
1453 
1454   // Pass out assembled data
1455   if (active_bases) *active_bases = data->active_bases;
1456   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1457   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1458 
1459   return CEED_ERROR_SUCCESS;
1460 }
1461 
1462 /**
1463   @brief Get CeedOperator CeedBasis data for assembly.
1464 
1465   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1466 
1467   @param[in]  data                  CeedOperatorAssemblyData
1468   @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL
1469   @param[out] active_elem_rstrs     Pointer to hold active CeedElemRestrictions, or NULL
1470 
1471   @return An error code: 0 - success, otherwise - failure
1472 
1473   @ref Backend
1474 **/
1475 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs,
1476                                                 CeedElemRestriction **active_elem_rstrs) {
1477   if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases;
1478   if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs;
1479 
1480   return CEED_ERROR_SUCCESS;
1481 }
1482 
1483 /**
1484   @brief Destroy CeedOperatorAssemblyData
1485 
1486   @param[in,out] data CeedOperatorAssemblyData to destroy
1487 
1488   @return An error code: 0 - success, otherwise - failure
1489 
1490   @ref Backend
1491 **/
1492 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1493   if (!*data) return CEED_ERROR_SUCCESS;
1494 
1495   CeedCall(CeedDestroy(&(*data)->ceed));
1496   for (CeedInt b = 0; b < (*data)->num_active_bases; b++) {
1497     CeedCall(CeedBasisDestroy(&(*data)->active_bases[b]));
1498     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b]));
1499     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1500     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1501     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1502     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1503     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1504     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1505   }
1506   CeedCall(CeedFree(&(*data)->active_bases));
1507   CeedCall(CeedFree(&(*data)->active_elem_rstrs));
1508   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1509   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1510   CeedCall(CeedFree(&(*data)->eval_modes_in));
1511   CeedCall(CeedFree(&(*data)->eval_modes_out));
1512   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1513   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1514   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1515   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1516 
1517   CeedCall(CeedFree(data));
1518   return CEED_ERROR_SUCCESS;
1519 }
1520 
1521 /// @}
1522 
1523 /// ----------------------------------------------------------------------------
1524 /// CeedOperator Public API
1525 /// ----------------------------------------------------------------------------
1526 /// @addtogroup CeedOperatorUser
1527 /// @{
1528 
1529 /**
1530   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1531 
1532   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1533     The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices
1534 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the
1535 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1536 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1537 and producing the output [dv_0, dv_1, v].
1538 
1539   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1540 
1541   @param[in]  op        CeedOperator to assemble CeedQFunction
1542   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1543   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1544   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1545 
1546   @return An error code: 0 - success, otherwise - failure
1547 
1548   @ref User
1549 **/
1550 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1551   CeedCall(CeedOperatorCheckReady(op));
1552 
1553   if (op->LinearAssembleQFunction) {
1554     // Backend version
1555     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1556   } else {
1557     // Operator fallback
1558     CeedOperator op_fallback;
1559 
1560     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1561     if (op_fallback) {
1562       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1563     } else {
1564       // LCOV_EXCL_START
1565       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1566       // LCOV_EXCL_STOP
1567     }
1568   }
1569   return CEED_ERROR_SUCCESS;
1570 }
1571 
1572 /**
1573   @brief Assemble CeedQFunction and store result internally.
1574            Return copied references of stored data to the caller.
1575            Caller is responsible for ownership and destruction of the copied references.
1576            See also @ref CeedOperatorLinearAssembleQFunction
1577 
1578   @param[in]  op        CeedOperator to assemble CeedQFunction
1579   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1580   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1581   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1582 
1583   @return An error code: 0 - success, otherwise - failure
1584 
1585   @ref User
1586 **/
1587 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1588   CeedCall(CeedOperatorCheckReady(op));
1589 
1590   if (op->LinearAssembleQFunctionUpdate) {
1591     // Backend version
1592     bool                qf_assembled_is_setup;
1593     CeedVector          assembled_vec  = NULL;
1594     CeedElemRestriction assembled_rstr = NULL;
1595 
1596     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1597     if (qf_assembled_is_setup) {
1598       bool update_needed;
1599 
1600       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1601       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1602       if (update_needed) {
1603         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1604       }
1605     } else {
1606       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1607       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1608     }
1609     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1610 
1611     // Copy reference from internally held copy
1612     *assembled = NULL;
1613     *rstr      = NULL;
1614     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1615     CeedCall(CeedVectorDestroy(&assembled_vec));
1616     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1617     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1618   } else {
1619     // Operator fallback
1620     CeedOperator op_fallback;
1621 
1622     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1623     if (op_fallback) {
1624       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1625     } else {
1626       // LCOV_EXCL_START
1627       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1628       // LCOV_EXCL_STOP
1629     }
1630   }
1631 
1632   return CEED_ERROR_SUCCESS;
1633 }
1634 
1635 /**
1636   @brief Assemble the diagonal of a square linear CeedOperator
1637 
1638   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1639 
1640   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1641 
1642   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1643 
1644   @param[in]  op        CeedOperator to assemble CeedQFunction
1645   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1646   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1647 
1648   @return An error code: 0 - success, otherwise - failure
1649 
1650   @ref User
1651 **/
1652 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1653   bool is_composite;
1654   CeedCall(CeedOperatorCheckReady(op));
1655   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1656 
1657   CeedSize input_size = 0, output_size = 0;
1658   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1659   if (input_size != output_size) {
1660     // LCOV_EXCL_START
1661     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1662     // LCOV_EXCL_STOP
1663   }
1664 
1665   // Early exit for empty operator
1666   if (!is_composite) {
1667     CeedInt num_elem = 0;
1668 
1669     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1670     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1671   }
1672 
1673   if (op->LinearAssembleDiagonal) {
1674     // Backend version
1675     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1676     return CEED_ERROR_SUCCESS;
1677   } else if (op->LinearAssembleAddDiagonal) {
1678     // Backend version with zeroing first
1679     CeedCall(CeedVectorSetValue(assembled, 0.0));
1680     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1681     return CEED_ERROR_SUCCESS;
1682   } else {
1683     // Operator fallback
1684     CeedOperator op_fallback;
1685 
1686     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1687     if (op_fallback) {
1688       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1689       return CEED_ERROR_SUCCESS;
1690     }
1691   }
1692   // Default interface implementation
1693   CeedCall(CeedVectorSetValue(assembled, 0.0));
1694   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1695 
1696   return CEED_ERROR_SUCCESS;
1697 }
1698 
1699 /**
1700   @brief Assemble the diagonal of a square linear CeedOperator
1701 
1702   This sums into a CeedVector the diagonal of a linear CeedOperator.
1703 
1704   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1705 
1706   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1707 
1708   @param[in]  op        CeedOperator to assemble CeedQFunction
1709   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1710   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1711 
1712   @return An error code: 0 - success, otherwise - failure
1713 
1714   @ref User
1715 **/
1716 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1717   bool is_composite;
1718   CeedCall(CeedOperatorCheckReady(op));
1719   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1720 
1721   CeedSize input_size = 0, output_size = 0;
1722   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1723   if (input_size != output_size) {
1724     // LCOV_EXCL_START
1725     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1726     // LCOV_EXCL_STOP
1727   }
1728 
1729   // Early exit for empty operator
1730   if (!is_composite) {
1731     CeedInt num_elem = 0;
1732 
1733     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1734     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1735   }
1736 
1737   if (op->LinearAssembleAddDiagonal) {
1738     // Backend version
1739     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1740     return CEED_ERROR_SUCCESS;
1741   } else {
1742     // Operator fallback
1743     CeedOperator op_fallback;
1744 
1745     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1746     if (op_fallback) {
1747       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1748       return CEED_ERROR_SUCCESS;
1749     }
1750   }
1751   // Default interface implementation
1752   if (is_composite) {
1753     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1754   } else {
1755     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1756   }
1757 
1758   return CEED_ERROR_SUCCESS;
1759 }
1760 
1761 /**
1762   @brief Assemble the point block diagonal of a square linear CeedOperator
1763 
1764   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1765 
1766   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1767 
1768   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1769 
1770   @param[in]  op        CeedOperator to assemble CeedQFunction
1771   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1772 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1773 component in].
1774   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1775 
1776   @return An error code: 0 - success, otherwise - failure
1777 
1778   @ref User
1779 **/
1780 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1781   bool is_composite;
1782   CeedCall(CeedOperatorCheckReady(op));
1783   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1784 
1785   CeedSize input_size = 0, output_size = 0;
1786   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1787   if (input_size != output_size) {
1788     // LCOV_EXCL_START
1789     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1790     // LCOV_EXCL_STOP
1791   }
1792 
1793   // Early exit for empty operator
1794   if (!is_composite) {
1795     CeedInt num_elem = 0;
1796 
1797     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1798     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1799   }
1800 
1801   if (op->LinearAssemblePointBlockDiagonal) {
1802     // Backend version
1803     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1804     return CEED_ERROR_SUCCESS;
1805   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1806     // Backend version with zeroing first
1807     CeedCall(CeedVectorSetValue(assembled, 0.0));
1808     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1809     return CEED_ERROR_SUCCESS;
1810   } else {
1811     // Operator fallback
1812     CeedOperator op_fallback;
1813 
1814     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1815     if (op_fallback) {
1816       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1817       return CEED_ERROR_SUCCESS;
1818     }
1819   }
1820   // Default interface implementation
1821   CeedCall(CeedVectorSetValue(assembled, 0.0));
1822   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1823 
1824   return CEED_ERROR_SUCCESS;
1825 }
1826 
1827 /**
1828   @brief Assemble the point block diagonal of a square linear CeedOperator
1829 
1830   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1831 
1832   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1833 
1834   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1835 
1836   @param[in]  op        CeedOperator to assemble CeedQFunction
1837   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1838 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1839 component in].
1840   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1841 
1842   @return An error code: 0 - success, otherwise - failure
1843 
1844   @ref User
1845 **/
1846 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1847   bool is_composite;
1848   CeedCall(CeedOperatorCheckReady(op));
1849   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1850 
1851   CeedSize input_size = 0, output_size = 0;
1852   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1853   if (input_size != output_size) {
1854     // LCOV_EXCL_START
1855     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1856     // LCOV_EXCL_STOP
1857   }
1858 
1859   // Early exit for empty operator
1860   if (!is_composite) {
1861     CeedInt num_elem = 0;
1862 
1863     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1864     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1865   }
1866 
1867   if (op->LinearAssembleAddPointBlockDiagonal) {
1868     // Backend version
1869     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1870     return CEED_ERROR_SUCCESS;
1871   } else {
1872     // Operator fallback
1873     CeedOperator op_fallback;
1874 
1875     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1876     if (op_fallback) {
1877       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1878       return CEED_ERROR_SUCCESS;
1879     }
1880   }
1881   // Default interface implementation
1882   if (is_composite) {
1883     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1884   } else {
1885     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1886   }
1887 
1888   return CEED_ERROR_SUCCESS;
1889 }
1890 
1891 /**
1892    @brief Fully assemble the nonzero pattern of a linear operator.
1893 
1894    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1895 
1896    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1897 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1898 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1899 
1900    This will generally be slow unless your operator is low-order.
1901 
1902    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1903 
1904    @param[in]  op          CeedOperator to assemble
1905    @param[out] num_entries Number of entries in coordinate nonzero pattern
1906    @param[out] rows        Row number for each entry
1907    @param[out] cols        Column number for each entry
1908 
1909    @ref User
1910 **/
1911 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1912   CeedInt       num_suboperators, single_entries;
1913   CeedOperator *sub_operators;
1914   bool          is_composite;
1915   CeedCall(CeedOperatorCheckReady(op));
1916   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1917 
1918   if (op->LinearAssembleSymbolic) {
1919     // Backend version
1920     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1921     return CEED_ERROR_SUCCESS;
1922   } else {
1923     // Operator fallback
1924     CeedOperator op_fallback;
1925 
1926     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1927     if (op_fallback) {
1928       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1929       return CEED_ERROR_SUCCESS;
1930     }
1931   }
1932 
1933   // Default interface implementation
1934 
1935   // count entries and allocate rows, cols arrays
1936   *num_entries = 0;
1937   if (is_composite) {
1938     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1939     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1940     for (CeedInt k = 0; k < num_suboperators; ++k) {
1941       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1942       *num_entries += single_entries;
1943     }
1944   } else {
1945     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1946     *num_entries += single_entries;
1947   }
1948   CeedCall(CeedCalloc(*num_entries, rows));
1949   CeedCall(CeedCalloc(*num_entries, cols));
1950 
1951   // assemble nonzero locations
1952   CeedInt offset = 0;
1953   if (is_composite) {
1954     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1955     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1956     for (CeedInt k = 0; k < num_suboperators; ++k) {
1957       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1958       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1959       offset += single_entries;
1960     }
1961   } else {
1962     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1963   }
1964 
1965   return CEED_ERROR_SUCCESS;
1966 }
1967 
1968 /**
1969    @brief Fully assemble the nonzero entries of a linear operator.
1970 
1971    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1972 
1973    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1974 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1975 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1976 
1977    This will generally be slow unless your operator is low-order.
1978 
1979    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1980 
1981    @param[in]  op     CeedOperator to assemble
1982    @param[out] values Values to assemble into matrix
1983 
1984    @ref User
1985 **/
1986 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1987   CeedInt       num_suboperators, single_entries = 0;
1988   CeedOperator *sub_operators;
1989   bool          is_composite;
1990   CeedCall(CeedOperatorCheckReady(op));
1991   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1992 
1993   // Early exit for empty operator
1994   if (!is_composite) {
1995     CeedInt num_elem = 0;
1996 
1997     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1998     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1999   }
2000 
2001   if (op->LinearAssemble) {
2002     // Backend version
2003     CeedCall(op->LinearAssemble(op, values));
2004     return CEED_ERROR_SUCCESS;
2005   } else {
2006     // Operator fallback
2007     CeedOperator op_fallback;
2008 
2009     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2010     if (op_fallback) {
2011       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2012       return CEED_ERROR_SUCCESS;
2013     }
2014   }
2015 
2016   // Default interface implementation
2017   CeedInt offset = 0;
2018   CeedCall(CeedVectorSetValue(values, 0.0));
2019   if (is_composite) {
2020     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2021     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2022     for (CeedInt k = 0; k < num_suboperators; k++) {
2023       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2024       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2025       offset += single_entries;
2026     }
2027   } else {
2028     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2029   }
2030 
2031   return CEED_ERROR_SUCCESS;
2032 }
2033 
2034 /**
2035   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2036 
2037   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2038 
2039   @param[in]  op               Composite CeedOperator
2040   @param[in]  num_skip_indices Number of suboperators to skip
2041   @param[in]  skip_indices     Array of indices of suboperators to skip
2042   @param[out] mult             Vector to store multiplicity (of size l_size)
2043 
2044   @return An error code: 0 - success, otherwise - failure
2045 
2046   @ref User
2047 **/
2048 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2049   CeedCall(CeedOperatorCheckReady(op));
2050 
2051   Ceed                ceed;
2052   CeedInt             num_suboperators;
2053   CeedSize            l_vec_len;
2054   CeedScalar         *mult_array;
2055   CeedVector          ones_l_vec;
2056   CeedElemRestriction elem_rstr;
2057   CeedOperator       *sub_operators;
2058 
2059   CeedCall(CeedOperatorGetCeed(op, &ceed));
2060 
2061   // Zero mult vector
2062   CeedCall(CeedVectorSetValue(mult, 0.0));
2063 
2064   // Get suboperators
2065   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2066   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2067   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2068 
2069   // Work vector
2070   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2071   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2072   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2073   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2074 
2075   // Compute multiplicity across suboperators
2076   for (CeedInt i = 0; i < num_suboperators; i++) {
2077     const CeedScalar *sub_mult_array;
2078     CeedVector        sub_mult_l_vec, ones_e_vec;
2079 
2080     // -- Check for suboperator to skip
2081     for (CeedInt j = 0; j < num_skip_indices; j++) {
2082       if (skip_indices[j] == i) continue;
2083     }
2084 
2085     // -- Sub operator multiplicity
2086     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2087     CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2088     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2089     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2090     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2091     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2092     // ---- Flag every node present in the current suboperator
2093     for (CeedInt j = 0; j < l_vec_len; j++) {
2094       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2095     }
2096     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2097     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2098     CeedCall(CeedVectorDestroy(&ones_e_vec));
2099   }
2100   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2101   CeedCall(CeedVectorDestroy(&ones_l_vec));
2102 
2103   return CEED_ERROR_SUCCESS;
2104 }
2105 
2106 /**
2107   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2108 grid interpolation
2109 
2110   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2111 
2112   @param[in]  op_fine      Fine grid operator
2113   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2114   @param[in]  rstr_coarse  Coarse grid restriction
2115   @param[in]  basis_coarse Coarse grid active vector basis
2116   @param[out] op_coarse    Coarse grid operator
2117   @param[out] op_prolong   Coarse to fine operator, or NULL
2118   @param[out] op_restrict  Fine to coarse operator, or NULL
2119 
2120   @return An error code: 0 - success, otherwise - failure
2121 
2122   @ref User
2123 **/
2124 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2125                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2126   CeedCall(CeedOperatorCheckReady(op_fine));
2127 
2128   // Build prolongation matrix, if required
2129   CeedBasis basis_c_to_f = NULL;
2130   if (op_prolong || op_restrict) {
2131     CeedBasis basis_fine;
2132     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2133     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2134   }
2135 
2136   // Core code
2137   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2138 
2139   return CEED_ERROR_SUCCESS;
2140 }
2141 
2142 /**
2143   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2144 
2145   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2146 
2147   @param[in]  op_fine       Fine grid operator
2148   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2149   @param[in]  rstr_coarse   Coarse grid restriction
2150   @param[in]  basis_coarse  Coarse grid active vector basis
2151   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2152   @param[out] op_coarse     Coarse grid operator
2153   @param[out] op_prolong    Coarse to fine operator, or NULL
2154   @param[out] op_restrict   Fine to coarse operator, or NULL
2155 
2156   @return An error code: 0 - success, otherwise - failure
2157 
2158   @ref User
2159 **/
2160 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2161                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2162                                              CeedOperator *op_restrict) {
2163   CeedCall(CeedOperatorCheckReady(op_fine));
2164   Ceed ceed;
2165   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2166 
2167   // Check for compatible quadrature spaces
2168   CeedBasis basis_fine;
2169   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2170   CeedInt Q_f, Q_c;
2171   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2172   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2173   if (Q_f != Q_c) {
2174     // LCOV_EXCL_START
2175     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2176     // LCOV_EXCL_STOP
2177   }
2178 
2179   // Create coarse to fine basis, if required
2180   CeedBasis basis_c_to_f = NULL;
2181   if (op_prolong || op_restrict) {
2182     // Check if interpolation matrix is provided
2183     if (!interp_c_to_f) {
2184       // LCOV_EXCL_START
2185       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2186       // LCOV_EXCL_STOP
2187     }
2188     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2189     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2190     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2191     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2192     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2193     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2194     CeedScalar *q_ref, *q_weight, *grad;
2195     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2196     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2197     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2198     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2199     CeedCall(CeedFree(&q_ref));
2200     CeedCall(CeedFree(&q_weight));
2201     CeedCall(CeedFree(&grad));
2202   }
2203 
2204   // Core code
2205   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2206   return CEED_ERROR_SUCCESS;
2207 }
2208 
2209 /**
2210   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2211 
2212   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2213 
2214   @param[in]  op_fine       Fine grid operator
2215   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2216   @param[in]  rstr_coarse   Coarse grid restriction
2217   @param[in]  basis_coarse  Coarse grid active vector basis
2218   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2219   @param[out] op_coarse     Coarse grid operator
2220   @param[out] op_prolong    Coarse to fine operator, or NULL
2221   @param[out] op_restrict   Fine to coarse operator, or NULL
2222 
2223   @return An error code: 0 - success, otherwise - failure
2224 
2225   @ref User
2226 **/
2227 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2228                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2229                                        CeedOperator *op_restrict) {
2230   CeedCall(CeedOperatorCheckReady(op_fine));
2231   Ceed ceed;
2232   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2233 
2234   // Check for compatible quadrature spaces
2235   CeedBasis basis_fine;
2236   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2237   CeedInt Q_f, Q_c;
2238   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2239   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2240   if (Q_f != Q_c) {
2241     // LCOV_EXCL_START
2242     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2243     // LCOV_EXCL_STOP
2244   }
2245 
2246   // Coarse to fine basis
2247   CeedBasis basis_c_to_f = NULL;
2248   if (op_prolong || op_restrict) {
2249     // Check if interpolation matrix is provided
2250     if (!interp_c_to_f) {
2251       // LCOV_EXCL_START
2252       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2253       // LCOV_EXCL_STOP
2254     }
2255     CeedElemTopology topo;
2256     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2257     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2258     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2259     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2260     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2261     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2262     CeedScalar *q_ref, *q_weight, *grad;
2263     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2264     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2265     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2266     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2267     CeedCall(CeedFree(&q_ref));
2268     CeedCall(CeedFree(&q_weight));
2269     CeedCall(CeedFree(&grad));
2270   }
2271 
2272   // Core code
2273   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2274   return CEED_ERROR_SUCCESS;
2275 }
2276 
2277 /**
2278   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2279 
2280   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2281     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V.
2282     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T
2283 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
2284 
2285   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2286 
2287   @param[in]  op      CeedOperator to create element inverses
2288   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2289   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2290 
2291   @return An error code: 0 - success, otherwise - failure
2292 
2293   @ref User
2294 **/
2295 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2296   CeedCall(CeedOperatorCheckReady(op));
2297 
2298   if (op->CreateFDMElementInverse) {
2299     // Backend version
2300     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2301     return CEED_ERROR_SUCCESS;
2302   } else {
2303     // Operator fallback
2304     CeedOperator op_fallback;
2305 
2306     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2307     if (op_fallback) {
2308       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2309       return CEED_ERROR_SUCCESS;
2310     }
2311   }
2312 
2313   // Default interface implementation
2314   Ceed ceed, ceed_parent;
2315   CeedCall(CeedOperatorGetCeed(op, &ceed));
2316   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2317   ceed_parent = ceed_parent ? ceed_parent : ceed;
2318   CeedQFunction qf;
2319   CeedCall(CeedOperatorGetQFunction(op, &qf));
2320 
2321   // Determine active input basis
2322   bool                interp = false, grad = false;
2323   CeedBasis           basis = NULL;
2324   CeedElemRestriction rstr  = NULL;
2325   CeedOperatorField  *op_fields;
2326   CeedQFunctionField *qf_fields;
2327   CeedInt             num_input_fields;
2328   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2329   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2330   for (CeedInt i = 0; i < num_input_fields; i++) {
2331     CeedVector vec;
2332     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2333     if (vec == CEED_VECTOR_ACTIVE) {
2334       CeedEvalMode eval_mode;
2335       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2336       interp = interp || eval_mode == CEED_EVAL_INTERP;
2337       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2338       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2339       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2340     }
2341   }
2342   if (!basis) {
2343     // LCOV_EXCL_START
2344     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2345     // LCOV_EXCL_STOP
2346   }
2347   CeedSize l_size = 1;
2348   CeedInt  P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2349   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2350   CeedCall(CeedBasisGetNumNodes(basis, &elem_size));
2351   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2352   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2353   CeedCall(CeedBasisGetDimension(basis, &dim));
2354   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2355   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2356   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2357 
2358   // Build and diagonalize 1D Mass and Laplacian
2359   bool tensor_basis;
2360   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2361   if (!tensor_basis) {
2362     // LCOV_EXCL_START
2363     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2364     // LCOV_EXCL_STOP
2365   }
2366   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2367   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2368   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2369   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2370   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2371   CeedCall(CeedCalloc(P_1d, &lambda));
2372   // -- Build matrices
2373   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2374   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2375   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2376   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2377   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2378 
2379   // -- Diagonalize
2380   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2381   CeedCall(CeedFree(&mass));
2382   CeedCall(CeedFree(&laplace));
2383   for (CeedInt i = 0; i < P_1d; i++) {
2384     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2385   }
2386   CeedCall(CeedFree(&x));
2387 
2388   // Assemble QFunction
2389   CeedVector          assembled;
2390   CeedElemRestriction rstr_qf;
2391   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2392   CeedInt layout[3];
2393   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2394   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2395   CeedScalar max_norm = 0;
2396   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2397 
2398   // Calculate element averages
2399   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2400   CeedScalar       *elem_avg;
2401   const CeedScalar *assembled_array, *q_weight_array;
2402   CeedVector        q_weight;
2403   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2404   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2405   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2406   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2407   CeedCall(CeedCalloc(num_elem, &elem_avg));
2408   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2409   for (CeedInt e = 0; e < num_elem; e++) {
2410     CeedInt count = 0;
2411     for (CeedInt q = 0; q < num_qpts; q++) {
2412       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2413         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2414           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2415           count++;
2416         }
2417       }
2418     }
2419     if (count) {
2420       elem_avg[e] /= count;
2421     } else {
2422       elem_avg[e] = 1.0;
2423     }
2424   }
2425   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2426   CeedCall(CeedVectorDestroy(&assembled));
2427   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2428   CeedCall(CeedVectorDestroy(&q_weight));
2429 
2430   // Build FDM diagonal
2431   CeedVector  q_data;
2432   CeedScalar *q_data_array, *fdm_diagonal;
2433   CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal));
2434   const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON;
2435   for (CeedInt c = 0; c < num_comp; c++) {
2436     for (CeedInt n = 0; n < elem_size; n++) {
2437       if (interp) fdm_diagonal[c * elem_size + n] = 1.0;
2438       if (grad) {
2439         for (CeedInt d = 0; d < dim; d++) {
2440           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2441           fdm_diagonal[c * elem_size + n] += lambda[i];
2442         }
2443       }
2444       if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound;
2445     }
2446   }
2447   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data));
2448   CeedCall(CeedVectorSetValue(q_data, 0.0));
2449   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2450   for (CeedInt e = 0; e < num_elem; e++) {
2451     for (CeedInt c = 0; c < num_comp; c++) {
2452       for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]);
2453     }
2454   }
2455   CeedCall(CeedFree(&elem_avg));
2456   CeedCall(CeedFree(&fdm_diagonal));
2457   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2458 
2459   // Setup FDM operator
2460   // -- Basis
2461   CeedBasis   fdm_basis;
2462   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2463   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2464   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2465   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2466   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2467   CeedCall(CeedFree(&fdm_interp));
2468   CeedCall(CeedFree(&grad_dummy));
2469   CeedCall(CeedFree(&q_ref_dummy));
2470   CeedCall(CeedFree(&q_weight_dummy));
2471   CeedCall(CeedFree(&lambda));
2472 
2473   // -- Restriction
2474   CeedElemRestriction rstr_qd_i;
2475   CeedInt             strides[3] = {1, elem_size, elem_size * num_comp};
2476   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i));
2477   // -- QFunction
2478   CeedQFunction qf_fdm;
2479   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2480   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2481   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2482   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2483   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2484   // -- QFunction context
2485   CeedInt *num_comp_data;
2486   CeedCall(CeedCalloc(1, &num_comp_data));
2487   num_comp_data[0] = num_comp;
2488   CeedQFunctionContext ctx_fdm;
2489   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2490   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2491   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2492   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2493   // -- Operator
2494   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2495   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2496   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2497   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2498 
2499   // Cleanup
2500   CeedCall(CeedVectorDestroy(&q_data));
2501   CeedCall(CeedBasisDestroy(&fdm_basis));
2502   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2503   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2504 
2505   return CEED_ERROR_SUCCESS;
2506 }
2507 
2508 /// @}
2509