xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision c85e864000754611328ed43fe87e29f99787d593)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  basis     CeedBasis from which to get the basis matrix
184   @param[in]  eval_mode Current basis evaluation mode
185   @param[in]  identity  Pointer to identity matrix
186   @param[out] basis_ptr Basis pointer to set
187 
188   @ref Developer
189 **/
190 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
191   switch (eval_mode) {
192     case CEED_EVAL_NONE:
193       *basis_ptr = identity;
194       break;
195     case CEED_EVAL_INTERP:
196       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
197       break;
198     case CEED_EVAL_GRAD:
199       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
200       break;
201     case CEED_EVAL_DIV:
202       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
203       break;
204     case CEED_EVAL_CURL:
205       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
206       break;
207     case CEED_EVAL_WEIGHT:
208       break;  // Caught by QF Assembly
209   }
210   assert(*basis_ptr != NULL);
211 
212   return CEED_ERROR_SUCCESS;
213 }
214 
215 /**
216   @brief Create point block restriction for active operator field
217 
218   @param[in]  rstr            Original CeedElemRestriction for active field
219   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
220 
221   @return An error code: 0 - success, otherwise - failure
222 
223   @ref Developer
224 **/
225 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
226   Ceed ceed;
227   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
228   const CeedInt *offsets;
229   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
230 
231   // Expand offsets
232   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
233   CeedSize l_size;
234   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
235   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
236   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
237   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
238   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
239   CeedInt shift = num_comp;
240   if (comp_stride != 1) shift *= num_comp;
241   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
242   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
243     pointblock_offsets[i] = offsets[i] * shift;
244   }
245 
246   // Create new restriction
247   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
248                                      pointblock_offsets, pointblock_rstr));
249 
250   // Cleanup
251   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
252 
253   return CEED_ERROR_SUCCESS;
254 }
255 
256 /**
257   @brief Core logic for assembling operator diagonal or point block diagonal
258 
259   @param[in]  op            CeedOperator to assemble point block diagonal
260   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
261   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
262   @param[out] assembled     CeedVector to store assembled diagonal
263 
264   @return An error code: 0 - success, otherwise - failure
265 
266   @ref Developer
267 **/
268 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
269   Ceed ceed;
270   CeedCall(CeedOperatorGetCeed(op, &ceed));
271 
272   // Assemble QFunction
273   CeedQFunction       qf;
274   const CeedScalar   *assembled_qf_array;
275   CeedVector          assembled_qf;
276   CeedElemRestriction assembled_elem_rstr;
277   CeedInt             num_input_fields, num_output_fields;
278   CeedInt             layout[3];
279 
280   CeedCall(CeedOperatorGetQFunction(op, &qf));
281   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
282   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
283   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout));
284   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
285   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
286 
287   // Get assembly data
288   CeedOperatorAssemblyData data;
289   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
290   CeedInt                 *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
291   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
292   CeedBasis               *active_bases;
293   CeedElemRestriction     *active_elem_rstrs;
294   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
295   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
296                                                 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components));
297   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL));
298   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs));
299 
300   // Loop over all active bases
301   for (CeedInt b = 0; b < num_active_bases; b++) {
302     // Assemble point block diagonal restriction, if needed
303     CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b];
304 
305     if (is_pointblock) {
306       CeedElemRestriction point_block_elem_rstr;
307 
308       CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr));
309       diag_elem_rstr = point_block_elem_rstr;
310     }
311 
312     // Create diagonal vector
313     CeedVector elem_diag;
314     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
315 
316     // Assemble element operator diagonals
317     CeedScalar *elem_diag_array;
318     CeedInt     num_elem, num_nodes, num_qpts, num_components;
319 
320     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
321     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
322     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
323     CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes));
324     CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components));
325     CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts));
326 
327     // Construct identity matrix for basis if required
328     bool        has_eval_none = false;
329     CeedScalar *identity      = NULL;
330     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
331       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
332     }
333     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
334       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
335     }
336     if (has_eval_none) {
337       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
338       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
339     }
340 
341     // Compute the diagonal of B^T D B
342     // Each element
343     for (CeedInt e = 0; e < num_elem; e++) {
344       // Each basis eval mode pair
345       CeedInt      d_out              = 0, q_comp_out;
346       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
347       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
348         const CeedScalar *B_t = NULL;
349         CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t);
350         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out));
351         if (q_comp_out > 1) {
352           if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
353           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
354         }
355         eval_mode_out_prev = eval_modes_out[b][e_out];
356 
357         CeedInt      d_in              = 0, q_comp_in;
358         CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
359         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
360           const CeedScalar *B = NULL;
361           CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B);
362           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in));
363           if (q_comp_in > 1) {
364             if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
365             else B = &B[(++d_in) * num_qpts * num_nodes];
366           }
367           eval_mode_in_prev = eval_modes_in[b][e_in];
368 
369           // Each component
370           for (CeedInt c_out = 0; c_out < num_components; c_out++) {
371             // Each qpt/node pair
372             for (CeedInt q = 0; q < num_qpts; q++) {
373               if (is_pointblock) {
374                 // Point Block Diagonal
375                 for (CeedInt c_in = 0; c_in < num_components; c_in++) {
376                   const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
377                   const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
378                   for (CeedInt n = 0; n < num_nodes; n++) {
379                     elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] +=
380                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
381                   }
382                 }
383               } else {
384                 // Diagonal Only
385                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
386                 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
387                 for (CeedInt n = 0; n < num_nodes; n++) {
388                   elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
389                 }
390               }
391             }
392           }
393         }
394       }
395     }
396     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
397 
398     // Assemble local operator diagonal
399     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
400 
401     // Cleanup
402     if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
403     CeedCall(CeedVectorDestroy(&elem_diag));
404     CeedCall(CeedFree(&identity));
405   }
406   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
407   CeedCall(CeedVectorDestroy(&assembled_qf));
408 
409   return CEED_ERROR_SUCCESS;
410 }
411 
412 /**
413   @brief Core logic for assembling composite operator diagonal
414 
415   @param[in]  op            CeedOperator to assemble point block diagonal
416   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
417   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
418   @param[out] assembled     CeedVector to store assembled diagonal
419 
420   @return An error code: 0 - success, otherwise - failure
421 
422   @ref Developer
423 **/
424 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
425                                                                  CeedVector assembled) {
426   CeedInt       num_sub;
427   CeedOperator *suboperators;
428   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
429   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
430   for (CeedInt i = 0; i < num_sub; i++) {
431     if (is_pointblock) {
432       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
433     } else {
434       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
435     }
436   }
437   return CEED_ERROR_SUCCESS;
438 }
439 
440 /**
441   @brief Build nonzero pattern for non-composite operator
442 
443   Users should generally use CeedOperatorLinearAssembleSymbolic()
444 
445   @param[in]  op     CeedOperator to assemble nonzero pattern
446   @param[in]  offset Offset for number of entries
447   @param[out] rows   Row number for each entry
448   @param[out] cols   Column number for each entry
449 
450   @return An error code: 0 - success, otherwise - failure
451 
452   @ref Developer
453 **/
454 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
455   Ceed ceed;
456   bool is_composite;
457   CeedCall(CeedOperatorGetCeed(op, &ceed));
458   CeedCall(CeedOperatorIsComposite(op, &is_composite));
459 
460   if (is_composite) {
461     // LCOV_EXCL_START
462     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
463     // LCOV_EXCL_STOP
464   }
465 
466   CeedSize num_nodes;
467   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
468   CeedElemRestriction rstr_in;
469   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
470   CeedInt num_elem, elem_size, num_comp;
471   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
472   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
473   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
474   CeedInt layout_er[3];
475   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
476 
477   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
478 
479   // Determine elem_dof relation
480   CeedVector index_vec;
481   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
482   CeedScalar *array;
483   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
484   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
485   CeedCall(CeedVectorRestoreArray(index_vec, &array));
486   CeedVector elem_dof;
487   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
488   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
489   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
490   const CeedScalar *elem_dof_a;
491   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
492   CeedCall(CeedVectorDestroy(&index_vec));
493 
494   // Determine i, j locations for element matrices
495   CeedInt count = 0;
496   for (CeedInt e = 0; e < num_elem; e++) {
497     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
498       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
499         for (CeedInt i = 0; i < elem_size; i++) {
500           for (CeedInt j = 0; j < elem_size; j++) {
501             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
502             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
503 
504             const CeedInt row = elem_dof_a[elem_dof_index_row];
505             const CeedInt col = elem_dof_a[elem_dof_index_col];
506 
507             rows[offset + count] = row;
508             cols[offset + count] = col;
509             count++;
510           }
511         }
512       }
513     }
514   }
515   if (count != local_num_entries) {
516     // LCOV_EXCL_START
517     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
518     // LCOV_EXCL_STOP
519   }
520   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
521   CeedCall(CeedVectorDestroy(&elem_dof));
522 
523   return CEED_ERROR_SUCCESS;
524 }
525 
526 /**
527   @brief Assemble nonzero entries for non-composite operator
528 
529   Users should generally use CeedOperatorLinearAssemble()
530 
531   @param[in]  op     CeedOperator to assemble
532   @param[in]  offset Offset for number of entries
533   @param[out] values Values to assemble into matrix
534 
535   @return An error code: 0 - success, otherwise - failure
536 
537   @ref Developer
538 **/
539 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
540   Ceed ceed;
541   bool is_composite;
542   CeedCall(CeedOperatorGetCeed(op, &ceed));
543   CeedCall(CeedOperatorIsComposite(op, &is_composite));
544 
545   if (is_composite) {
546     // LCOV_EXCL_START
547     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
548     // LCOV_EXCL_STOP
549   }
550 
551   // Early exit for empty operator
552   {
553     CeedInt num_elem = 0;
554 
555     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
556     if (num_elem == 0) return CEED_ERROR_SUCCESS;
557   }
558 
559   if (op->LinearAssembleSingle) {
560     // Backend version
561     CeedCall(op->LinearAssembleSingle(op, offset, values));
562     return CEED_ERROR_SUCCESS;
563   } else {
564     // Operator fallback
565     CeedOperator op_fallback;
566 
567     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
568     if (op_fallback) {
569       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
570       return CEED_ERROR_SUCCESS;
571     }
572   }
573 
574   // Assemble QFunction
575   CeedQFunction qf;
576   CeedCall(CeedOperatorGetQFunction(op, &qf));
577   CeedVector          assembled_qf;
578   CeedElemRestriction rstr_q;
579   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
580   CeedSize qf_length;
581   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
582 
583   CeedInt            num_input_fields, num_output_fields;
584   CeedOperatorField *input_fields;
585   CeedOperatorField *output_fields;
586   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
587 
588   // Get assembly data
589   CeedOperatorAssemblyData data;
590   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
591   const CeedEvalMode **eval_modes_in, **eval_modes_out;
592   CeedInt             *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
593   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out,
594                                                 &eval_modes_out, NULL, NULL));
595   CeedBasis *bases;
596   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL));
597   CeedBasis basis_in = bases[0];
598 
599   if (num_active_bases > 1) {
600     // LCOV_EXCL_START
601     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases");
602     // LCOV_EXCL_STOP
603   }
604   if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) {
605     // LCOV_EXCL_START
606     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
607     // LCOV_EXCL_STOP
608   }
609 
610   CeedElemRestriction active_rstr;
611   CeedInt             num_elem, elem_size, num_qpts, num_comp;
612   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
613   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
614   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
615   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
616   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
617 
618   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
619 
620   // loop over elements and put in data structure
621   const CeedScalar *assembled_qf_array;
622   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
623 
624   CeedInt layout_qf[3];
625   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
626   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
627 
628   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
629   const CeedScalar **B_mats_in, **B_mats_out;
630   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out));
631   const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0];
632   CeedScalar        BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]];
633   CeedScalar        elem_mat[elem_size * elem_size];
634   CeedInt           count = 0;
635   CeedScalar       *vals;
636   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
637   for (CeedInt e = 0; e < num_elem; e++) {
638     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
639       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
640         // Compute B^T*D
641         for (CeedInt n = 0; n < elem_size; n++) {
642           for (CeedInt q = 0; q < num_qpts; q++) {
643             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
644               const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in);
645               CeedScalar    sum       = 0.0;
646               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
647                 const CeedInt b_out_index     = (num_eval_modes_out[0] * q + e_out) * elem_size + n;
648                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out;
649                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
650                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
651               }
652               BTD_mat[btd_index] = sum;
653             }
654           }
655         }
656         // form element matrix itself (for each block component)
657         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0]));
658 
659         // put element matrix in coordinate data structure
660         for (CeedInt i = 0; i < elem_size; i++) {
661           for (CeedInt j = 0; j < elem_size; j++) {
662             vals[offset + count] = elem_mat[i * elem_size + j];
663             count++;
664           }
665         }
666       }
667     }
668   }
669   if (count != local_num_entries) {
670     // LCOV_EXCL_START
671     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
672     // LCOV_EXCL_STOP
673   }
674   CeedCall(CeedVectorRestoreArray(values, &vals));
675 
676   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
677   CeedCall(CeedVectorDestroy(&assembled_qf));
678 
679   return CEED_ERROR_SUCCESS;
680 }
681 
682 /**
683   @brief Count number of entries for assembled CeedOperator
684 
685   @param[in]  op          CeedOperator to assemble
686   @param[out] num_entries Number of entries in assembled representation
687 
688   @return An error code: 0 - success, otherwise - failure
689 
690   @ref Utility
691 **/
692 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
693   bool                is_composite;
694   CeedElemRestriction rstr;
695   CeedInt             num_elem, elem_size, num_comp;
696 
697   CeedCall(CeedOperatorIsComposite(op, &is_composite));
698   if (is_composite) {
699     // LCOV_EXCL_START
700     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
701     // LCOV_EXCL_STOP
702   }
703   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
704   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
705   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
706   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
707   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
708 
709   return CEED_ERROR_SUCCESS;
710 }
711 
712 /**
713   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
714 
715   @param[in]  op_fine      Fine grid operator
716   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
717   @param[in]  rstr_coarse  Coarse grid restriction
718   @param[in]  basis_coarse Coarse grid active vector basis
719   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
720   @param[out] op_coarse    Coarse grid operator
721   @param[out] op_prolong   Coarse to fine operator, or NULL
722   @param[out] op_restrict  Fine to coarse operator, or NULL
723 
724   @return An error code: 0 - success, otherwise - failure
725 
726   @ref Developer
727 **/
728 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
729                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
730   Ceed       ceed;
731   CeedVector mult_vec = NULL;
732   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
733 
734   // Check for composite operator
735   bool is_composite;
736   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
737   if (is_composite) {
738     // LCOV_EXCL_START
739     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
740     // LCOV_EXCL_STOP
741   }
742 
743   // Coarse Grid
744   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
745   CeedElemRestriction rstr_fine = NULL;
746   // -- Clone input fields
747   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
748     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
749       rstr_fine = op_fine->input_fields[i]->elem_rstr;
750       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
751     } else {
752       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
753                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
754     }
755   }
756   // -- Clone output fields
757   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
758     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
759       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
760     } else {
761       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
762                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
763     }
764   }
765   // -- Clone QFunctionAssemblyData
766   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
767 
768   // Multiplicity vector
769   if (op_restrict || op_prolong) {
770     CeedVector mult_e_vec;
771 
772     if (!p_mult_fine) {
773       // LCOV_EXCL_START
774       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
775       // LCOV_EXCL_STOP
776     }
777     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
778     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
779     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
780     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
781     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
782     CeedCall(CeedVectorDestroy(&mult_e_vec));
783     CeedCall(CeedVectorReciprocal(mult_vec));
784   }
785 
786   // Clone name
787   bool   has_name = op_fine->name;
788   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
789   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
790 
791   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
792   if ((op_restrict || op_prolong) && !basis_c_to_f) {
793     // LCOV_EXCL_START
794     return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis");
795     // LCOV_EXCL_STOP
796   }
797 
798   // Restriction/Prolongation Operators
799   CeedInt num_comp;
800   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
801 
802   // Restriction
803   if (op_restrict) {
804     CeedInt             *num_comp_r_data;
805     CeedQFunction        qf_restrict;
806     CeedQFunctionContext ctx_r;
807 
808     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
809     CeedCall(CeedCalloc(1, &num_comp_r_data));
810     num_comp_r_data[0] = num_comp;
811     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
812     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
813     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
814     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
815     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
816     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
817     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
818     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
819 
820     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
821     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
822     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
823     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
824 
825     // Set name
826     char *restriction_name;
827     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
828     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
829     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
830     CeedCall(CeedFree(&restriction_name));
831 
832     // Check
833     CeedCall(CeedOperatorCheckReady(*op_restrict));
834 
835     // Cleanup
836     CeedCall(CeedQFunctionDestroy(&qf_restrict));
837   }
838 
839   // Prolongation
840   if (op_prolong) {
841     CeedInt             *num_comp_p_data;
842     CeedQFunction        qf_prolong;
843     CeedQFunctionContext ctx_p;
844 
845     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
846     CeedCall(CeedCalloc(1, &num_comp_p_data));
847     num_comp_p_data[0] = num_comp;
848     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
849     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
850     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
851     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
852     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
853     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
854     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
855     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
856 
857     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
858     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
859     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
860     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
861 
862     // Set name
863     char *prolongation_name;
864     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
865     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
866     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
867     CeedCall(CeedFree(&prolongation_name));
868 
869     // Check
870     CeedCall(CeedOperatorCheckReady(*op_prolong));
871 
872     // Cleanup
873     CeedCall(CeedQFunctionDestroy(&qf_prolong));
874   }
875 
876   // Check
877   CeedCall(CeedOperatorCheckReady(*op_coarse));
878 
879   // Cleanup
880   CeedCall(CeedVectorDestroy(&mult_vec));
881   CeedCall(CeedBasisDestroy(&basis_c_to_f));
882 
883   return CEED_ERROR_SUCCESS;
884 }
885 
886 /**
887   @brief Build 1D mass matrix and Laplacian with perturbation
888 
889   @param[in]  interp_1d   Interpolation matrix in one dimension
890   @param[in]  grad_1d     Gradient matrix in one dimension
891   @param[in]  q_weight_1d Quadrature weights in one dimension
892   @param[in]  P_1d        Number of basis nodes in one dimension
893   @param[in]  Q_1d        Number of quadrature points in one dimension
894   @param[in]  dim         Dimension of basis
895   @param[out] mass        Assembled mass matrix in one dimension
896   @param[out] laplace     Assembled perturbed Laplacian in one dimension
897 
898   @return An error code: 0 - success, otherwise - failure
899 
900   @ref Developer
901 **/
902 CeedPragmaOptimizeOff
903 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
904                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
905   for (CeedInt i = 0; i < P_1d; i++) {
906     for (CeedInt j = 0; j < P_1d; j++) {
907       CeedScalar sum = 0.0;
908       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
909       mass[i + j * P_1d] = sum;
910     }
911   }
912   // -- Laplacian
913   for (CeedInt i = 0; i < P_1d; i++) {
914     for (CeedInt j = 0; j < P_1d; j++) {
915       CeedScalar sum = 0.0;
916       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
917       laplace[i + j * P_1d] = sum;
918     }
919   }
920   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
921   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
922   return CEED_ERROR_SUCCESS;
923 }
924 CeedPragmaOptimizeOn
925 
926 /// @}
927 
928 /// ----------------------------------------------------------------------------
929 /// CeedOperator Backend API
930 /// ----------------------------------------------------------------------------
931 /// @addtogroup CeedOperatorBackend
932 /// @{
933 
934 /**
935   @brief Create object holding CeedQFunction assembly data for CeedOperator
936 
937   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
938   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
939 
940   @return An error code: 0 - success, otherwise - failure
941 
942   @ref Backend
943 **/
944 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
945   CeedCall(CeedCalloc(1, data));
946   (*data)->ref_count = 1;
947   (*data)->ceed      = ceed;
948   CeedCall(CeedReference(ceed));
949 
950   return CEED_ERROR_SUCCESS;
951 }
952 
953 /**
954   @brief Increment the reference counter for a CeedQFunctionAssemblyData
955 
956   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
957 
958   @return An error code: 0 - success, otherwise - failure
959 
960   @ref Backend
961 **/
962 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
963   data->ref_count++;
964   return CEED_ERROR_SUCCESS;
965 }
966 
967 /**
968   @brief Set re-use of CeedQFunctionAssemblyData
969 
970   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
971   @param[in]     reuse_data Boolean flag indicating data re-use
972 
973   @return An error code: 0 - success, otherwise - failure
974 
975   @ref Backend
976 **/
977 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
978   data->reuse_data        = reuse_data;
979   data->needs_data_update = true;
980   return CEED_ERROR_SUCCESS;
981 }
982 
983 /**
984   @brief Mark QFunctionAssemblyData as stale
985 
986   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
987   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
988 
989   @return An error code: 0 - success, otherwise - failure
990 
991   @ref Backend
992 **/
993 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
994   data->needs_data_update = needs_data_update;
995   return CEED_ERROR_SUCCESS;
996 }
997 
998 /**
999   @brief Determine if QFunctionAssemblyData needs update
1000 
1001   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
1002   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1003 
1004   @return An error code: 0 - success, otherwise - failure
1005 
1006   @ref Backend
1007 **/
1008 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1009   *is_update_needed = !data->reuse_data || data->needs_data_update;
1010   return CEED_ERROR_SUCCESS;
1011 }
1012 
1013 /**
1014   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1015 
1016   Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1017 
1018   Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
1019         CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
1020         CeedQFunctionAssemblyData.
1021 
1022   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1023   @param[in,out] data_copy Variable to store copied reference
1024 
1025   @return An error code: 0 - success, otherwise - failure
1026 
1027   @ref Backend
1028 **/
1029 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1030   CeedCall(CeedQFunctionAssemblyDataReference(data));
1031   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1032   *data_copy = data;
1033   return CEED_ERROR_SUCCESS;
1034 }
1035 
1036 /**
1037   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1038 
1039   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1040   @param[out] is_setup Boolean flag for setup status
1041 
1042   @return An error code: 0 - success, otherwise - failure
1043 
1044   @ref Backend
1045 **/
1046 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1047   *is_setup = data->is_setup;
1048   return CEED_ERROR_SUCCESS;
1049 }
1050 
1051 /**
1052   @brief Set internal objects for CeedQFunctionAssemblyData
1053 
1054   @param[in,out] data CeedQFunctionAssemblyData to set objects
1055   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1056   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1057 
1058   @return An error code: 0 - success, otherwise - failure
1059 
1060   @ref Backend
1061 **/
1062 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1063   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1064   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1065 
1066   data->is_setup = true;
1067   return CEED_ERROR_SUCCESS;
1068 }
1069 
1070 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1071   if (!data->is_setup) {
1072     // LCOV_EXCL_START
1073     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1074     // LCOV_EXCL_STOP
1075   }
1076 
1077   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1078   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1079 
1080   return CEED_ERROR_SUCCESS;
1081 }
1082 
1083 /**
1084   @brief Destroy CeedQFunctionAssemblyData
1085 
1086   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1087 
1088   @return An error code: 0 - success, otherwise - failure
1089 
1090   @ref Backend
1091 **/
1092 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1093   if (!*data || --(*data)->ref_count > 0) {
1094     *data = NULL;
1095     return CEED_ERROR_SUCCESS;
1096   }
1097   CeedCall(CeedDestroy(&(*data)->ceed));
1098   CeedCall(CeedVectorDestroy(&(*data)->vec));
1099   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1100 
1101   CeedCall(CeedFree(data));
1102   return CEED_ERROR_SUCCESS;
1103 }
1104 
1105 /**
1106   @brief Get CeedOperatorAssemblyData
1107 
1108   @param[in]  op   CeedOperator to assemble
1109   @param[out] data CeedQFunctionAssemblyData
1110 
1111   @return An error code: 0 - success, otherwise - failure
1112 
1113   @ref Backend
1114 **/
1115 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1116   if (!op->op_assembled) {
1117     CeedOperatorAssemblyData data;
1118 
1119     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1120     op->op_assembled = data;
1121   }
1122   *data = op->op_assembled;
1123 
1124   return CEED_ERROR_SUCCESS;
1125 }
1126 
1127 /**
1128   @brief Create object holding CeedOperator assembly data.
1129 
1130   The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1131   An array with references to the corresponding active CeedElemRestrictions is also stored.
1132   For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1133   The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1134 CeedEvalMode. The number of input columns across all active bases for the assembled CeedQFunction is also stored. Lastly, the CeedOperatorAssembly
1135 data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1136 
1137   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1138   @param[in]  op   CeedOperator to be assembled
1139   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1140 
1141   @return An error code: 0 - success, otherwise - failure
1142 
1143   @ref Backend
1144 **/
1145 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1146   CeedInt num_active_bases = 0;
1147 
1148   // Allocate
1149   CeedCall(CeedCalloc(1, data));
1150   (*data)->ceed = ceed;
1151   CeedCall(CeedReference(ceed));
1152 
1153   // Build OperatorAssembly data
1154   CeedQFunction       qf;
1155   CeedQFunctionField *qf_fields;
1156   CeedOperatorField  *op_fields;
1157   CeedInt             num_input_fields;
1158   CeedCall(CeedOperatorGetQFunction(op, &qf));
1159   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1160   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1161 
1162   // Determine active input basis
1163   CeedInt       *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0;
1164   CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL;
1165   CeedSize     **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1166   for (CeedInt i = 0; i < num_input_fields; i++) {
1167     CeedVector vec;
1168     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1169     if (vec == CEED_VECTOR_ACTIVE) {
1170       CeedBasis    basis_in = NULL;
1171       CeedEvalMode eval_mode;
1172       CeedInt      index = -1, dim, num_comp, q_comp;
1173       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1174       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1175       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1176       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1177       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1178       for (CeedInt i = 0; i < num_active_bases; i++) {
1179         if ((*data)->active_bases[i] == basis_in) index = i;
1180       }
1181       if (index == -1) {
1182         CeedElemRestriction elem_rstr_in;
1183         index = num_active_bases;
1184         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1185         (*data)->active_bases[num_active_bases] = NULL;
1186         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases]));
1187         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1188         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1189         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1190         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases]));
1191         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1192         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1193         num_eval_modes_in[index]  = 0;
1194         num_eval_modes_out[index] = 0;
1195         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1196         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1197         eval_modes_in[index]  = NULL;
1198         eval_modes_out[index] = NULL;
1199         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1200         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1201         eval_mode_offsets_in[index]  = NULL;
1202         eval_mode_offsets_out[index] = NULL;
1203         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1204         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1205         (*data)->assembled_bases_in[index]  = NULL;
1206         (*data)->assembled_bases_out[index] = NULL;
1207         num_active_bases++;
1208       }
1209       if (eval_mode != CEED_EVAL_WEIGHT) {
1210         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1211         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1212         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1213         for (CeedInt d = 0; d < q_comp; d++) {
1214           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1215           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1216           offset += num_comp;
1217         }
1218         num_eval_modes_in[index] += q_comp;
1219       }
1220     }
1221   }
1222   (*data)->num_eval_modes_in    = num_eval_modes_in;
1223   (*data)->eval_modes_in        = eval_modes_in;
1224   (*data)->eval_mode_offsets_in = eval_mode_offsets_in;
1225 
1226   // Determine active output basis
1227   CeedInt num_output_fields;
1228   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1229   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1230   offset = 0;
1231   for (CeedInt i = 0; i < num_output_fields; i++) {
1232     CeedVector vec;
1233     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1234     if (vec == CEED_VECTOR_ACTIVE) {
1235       CeedBasis    basis_out = NULL;
1236       CeedEvalMode eval_mode;
1237       CeedInt      index = -1, dim, num_comp, q_comp;
1238       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1239       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1240       CeedCall(CeedBasisGetDimension(basis_out, &dim));
1241       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1242       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1243       for (CeedInt i = 0; i < num_active_bases; i++) {
1244         if ((*data)->active_bases[i] == basis_out) index = i;
1245       }
1246       if (index == -1) {
1247         CeedElemRestriction elem_rstr_out;
1248 
1249         index = num_active_bases;
1250         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1251         (*data)->active_bases[num_active_bases] = NULL;
1252         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases]));
1253         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1254         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1255         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1256         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases]));
1257         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1258         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1259         num_eval_modes_in[index]  = 0;
1260         num_eval_modes_out[index] = 0;
1261         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1262         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1263         eval_modes_in[index]  = NULL;
1264         eval_modes_out[index] = NULL;
1265         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1266         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1267         eval_mode_offsets_in[index]  = NULL;
1268         eval_mode_offsets_out[index] = NULL;
1269         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1270         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1271         (*data)->assembled_bases_in[index]  = NULL;
1272         (*data)->assembled_bases_out[index] = NULL;
1273         num_active_bases++;
1274       }
1275       if (eval_mode != CEED_EVAL_WEIGHT) {
1276         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1277         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1278         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1279         for (CeedInt d = 0; d < q_comp; d++) {
1280           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1281           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1282           offset += num_comp;
1283         }
1284         num_eval_modes_out[index] += q_comp;
1285       }
1286     }
1287   }
1288   (*data)->num_output_components = offset;
1289   (*data)->num_eval_modes_out    = num_eval_modes_out;
1290   (*data)->eval_modes_out        = eval_modes_out;
1291   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1292   (*data)->num_active_bases      = num_active_bases;
1293 
1294   return CEED_ERROR_SUCCESS;
1295 }
1296 
1297 /**
1298   @brief Get CeedOperator CeedEvalModes for assembly.
1299 
1300   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1301 
1302   @param[in]  data                  CeedOperatorAssemblyData
1303   @param[out] num_active_bases      Total number of active bases
1304   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1305                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1306   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1307   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1308   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1309   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1310   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1311   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1312                                       including contributions of all active bases
1313 
1314   @return An error code: 0 - success, otherwise - failure
1315 
1316 
1317   @ref Backend
1318 **/
1319 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in,
1320                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out,
1321                                          const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) {
1322   if (num_active_bases) *num_active_bases = data->num_active_bases;
1323   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1324   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1325   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1326   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1327   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1328   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1329   if (num_output_components) *num_output_components = data->num_output_components;
1330 
1331   return CEED_ERROR_SUCCESS;
1332 }
1333 
1334 /**
1335   @brief Get CeedOperator CeedBasis data for assembly.
1336 
1337   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1338 
1339   @param[in]  data                CeedOperatorAssemblyData
1340   @param[out] num_active_bases    Number of active bases, or NULL
1341   @param[out] active_bases        Pointer to hold active CeedBasis, or NULL
1342   @param[out] assembled_bases_in  Pointer to hold assembled active input B, or NULL
1343   @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL
1344 
1345   @return An error code: 0 - success, otherwise - failure
1346 
1347   @ref Backend
1348 **/
1349 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases,
1350                                      const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) {
1351   // Assemble B_in, B_out if needed
1352   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1353     CeedInt num_qpts;
1354 
1355     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1356     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1357       CeedInt     num_nodes;
1358       CeedScalar *B_in = NULL, *identity = NULL;
1359       bool        has_eval_none = false;
1360 
1361       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1362       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1363 
1364       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1365         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1366       }
1367       if (has_eval_none) {
1368         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1369         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1370           identity[i * num_nodes + i] = 1.0;
1371         }
1372       }
1373 
1374       for (CeedInt q = 0; q < num_qpts; q++) {
1375         for (CeedInt n = 0; n < num_nodes; n++) {
1376           CeedInt      d_in              = 0, q_comp_in;
1377           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1378           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1379             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1380             const CeedScalar *B  = NULL;
1381             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B);
1382             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in));
1383             if (q_comp_in > 1) {
1384               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1385               else B = &B[(++d_in) * num_qpts * num_nodes];
1386             }
1387             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1388             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1389           }
1390         }
1391       }
1392       if (identity) CeedCall(CeedFree(identity));
1393       data->assembled_bases_in[b] = B_in;
1394     }
1395   }
1396 
1397   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1398     CeedInt num_qpts;
1399 
1400     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1401     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1402       CeedInt     num_nodes;
1403       bool        has_eval_none = false;
1404       CeedScalar *B_out = NULL, *identity = NULL;
1405 
1406       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1407       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1408 
1409       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1410         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1411       }
1412       if (has_eval_none) {
1413         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1414         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1415           identity[i * num_nodes + i] = 1.0;
1416         }
1417       }
1418 
1419       for (CeedInt q = 0; q < num_qpts; q++) {
1420         for (CeedInt n = 0; n < num_nodes; n++) {
1421           CeedInt      d_out              = 0, q_comp_out;
1422           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1423           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1424             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1425             const CeedScalar *B  = NULL;
1426             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B);
1427             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out));
1428             if (q_comp_out > 1) {
1429               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1430               else B = &B[(++d_out) * num_qpts * num_nodes];
1431             }
1432             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1433             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1434           }
1435         }
1436       }
1437       if (identity) CeedCall(CeedFree(identity));
1438       data->assembled_bases_out[b] = B_out;
1439     }
1440   }
1441 
1442   // Pass out assembled data
1443   if (active_bases) *active_bases = data->active_bases;
1444   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1445   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1446 
1447   return CEED_ERROR_SUCCESS;
1448 }
1449 
1450 /**
1451   @brief Get CeedOperator CeedBasis data for assembly.
1452 
1453   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1454 
1455   @param[in]  data                  CeedOperatorAssemblyData
1456   @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL
1457   @param[out] active_elem_rstrs     Pointer to hold active CeedElemRestrictions, or NULL
1458 
1459   @return An error code: 0 - success, otherwise - failure
1460 
1461   @ref Backend
1462 **/
1463 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs,
1464                                                 CeedElemRestriction **active_elem_rstrs) {
1465   if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases;
1466   if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs;
1467 
1468   return CEED_ERROR_SUCCESS;
1469 }
1470 
1471 /**
1472   @brief Destroy CeedOperatorAssemblyData
1473 
1474   @param[in,out] data CeedOperatorAssemblyData to destroy
1475 
1476   @return An error code: 0 - success, otherwise - failure
1477 
1478   @ref Backend
1479 **/
1480 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1481   if (!*data) {
1482     *data = NULL;
1483     return CEED_ERROR_SUCCESS;
1484   }
1485   CeedCall(CeedDestroy(&(*data)->ceed));
1486   for (CeedInt b = 0; b < (*data)->num_active_bases; b++) {
1487     CeedCall(CeedBasisDestroy(&(*data)->active_bases[b]));
1488     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b]));
1489     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1490     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1491     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1492     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1493     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1494     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1495   }
1496   CeedCall(CeedFree(&(*data)->active_bases));
1497   CeedCall(CeedFree(&(*data)->active_elem_rstrs));
1498   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1499   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1500   CeedCall(CeedFree(&(*data)->eval_modes_in));
1501   CeedCall(CeedFree(&(*data)->eval_modes_out));
1502   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1503   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1504   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1505   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1506 
1507   CeedCall(CeedFree(data));
1508   return CEED_ERROR_SUCCESS;
1509 }
1510 
1511 /// @}
1512 
1513 /// ----------------------------------------------------------------------------
1514 /// CeedOperator Public API
1515 /// ----------------------------------------------------------------------------
1516 /// @addtogroup CeedOperatorUser
1517 /// @{
1518 
1519 /**
1520   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1521 
1522   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1523     The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1524 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1525 
1526   Inputs and outputs are in the order provided by the
1527 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1528 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1529 and producing the output [dv_0, dv_1, v].
1530 
1531   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1532 
1533   @param[in]  op        CeedOperator to assemble CeedQFunction
1534   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1535   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1536   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1537 
1538   @return An error code: 0 - success, otherwise - failure
1539 
1540   @ref User
1541 **/
1542 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1543   CeedCall(CeedOperatorCheckReady(op));
1544 
1545   if (op->LinearAssembleQFunction) {
1546     // Backend version
1547     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1548   } else {
1549     // Operator fallback
1550     CeedOperator op_fallback;
1551 
1552     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1553     if (op_fallback) {
1554       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1555     } else {
1556       // LCOV_EXCL_START
1557       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1558       // LCOV_EXCL_STOP
1559     }
1560   }
1561   return CEED_ERROR_SUCCESS;
1562 }
1563 
1564 /**
1565   @brief Assemble CeedQFunction and store result internally.
1566 
1567   Return copied references of stored data to the caller.
1568   Caller is responsible for ownership and destruction of the copied references.
1569   See also @ref CeedOperatorLinearAssembleQFunction
1570 
1571   @param[in]  op        CeedOperator to assemble CeedQFunction
1572   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1573   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1574   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1575 
1576   @return An error code: 0 - success, otherwise - failure
1577 
1578   @ref User
1579 **/
1580 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1581   CeedCall(CeedOperatorCheckReady(op));
1582 
1583   if (op->LinearAssembleQFunctionUpdate) {
1584     // Backend version
1585     bool                qf_assembled_is_setup;
1586     CeedVector          assembled_vec  = NULL;
1587     CeedElemRestriction assembled_rstr = NULL;
1588 
1589     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1590     if (qf_assembled_is_setup) {
1591       bool update_needed;
1592 
1593       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1594       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1595       if (update_needed) {
1596         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1597       }
1598     } else {
1599       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1600       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1601     }
1602     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1603 
1604     // Copy reference from internally held copy
1605     *assembled = NULL;
1606     *rstr      = NULL;
1607     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1608     CeedCall(CeedVectorDestroy(&assembled_vec));
1609     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1610     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1611   } else {
1612     // Operator fallback
1613     CeedOperator op_fallback;
1614 
1615     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1616     if (op_fallback) {
1617       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1618     } else {
1619       // LCOV_EXCL_START
1620       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1621       // LCOV_EXCL_STOP
1622     }
1623   }
1624 
1625   return CEED_ERROR_SUCCESS;
1626 }
1627 
1628 /**
1629   @brief Assemble the diagonal of a square linear CeedOperator
1630 
1631   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1632 
1633   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1634 
1635   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1636 
1637   @param[in]  op        CeedOperator to assemble CeedQFunction
1638   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1639   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1640 
1641   @return An error code: 0 - success, otherwise - failure
1642 
1643   @ref User
1644 **/
1645 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1646   bool is_composite;
1647   CeedCall(CeedOperatorCheckReady(op));
1648   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1649 
1650   CeedSize input_size = 0, output_size = 0;
1651   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1652   if (input_size != output_size) {
1653     // LCOV_EXCL_START
1654     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1655     // LCOV_EXCL_STOP
1656   }
1657 
1658   // Early exit for empty operator
1659   if (!is_composite) {
1660     CeedInt num_elem = 0;
1661 
1662     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1663     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1664   }
1665 
1666   if (op->LinearAssembleDiagonal) {
1667     // Backend version
1668     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1669     return CEED_ERROR_SUCCESS;
1670   } else if (op->LinearAssembleAddDiagonal) {
1671     // Backend version with zeroing first
1672     CeedCall(CeedVectorSetValue(assembled, 0.0));
1673     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1674     return CEED_ERROR_SUCCESS;
1675   } else {
1676     // Operator fallback
1677     CeedOperator op_fallback;
1678 
1679     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1680     if (op_fallback) {
1681       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1682       return CEED_ERROR_SUCCESS;
1683     }
1684   }
1685   // Default interface implementation
1686   CeedCall(CeedVectorSetValue(assembled, 0.0));
1687   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1688 
1689   return CEED_ERROR_SUCCESS;
1690 }
1691 
1692 /**
1693   @brief Assemble the diagonal of a square linear CeedOperator
1694 
1695   This sums into a CeedVector the diagonal of a linear CeedOperator.
1696 
1697   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1698 
1699   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1700 
1701   @param[in]  op        CeedOperator to assemble CeedQFunction
1702   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1703   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1704 
1705   @return An error code: 0 - success, otherwise - failure
1706 
1707   @ref User
1708 **/
1709 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1710   bool is_composite;
1711   CeedCall(CeedOperatorCheckReady(op));
1712   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1713 
1714   CeedSize input_size = 0, output_size = 0;
1715   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1716   if (input_size != output_size) {
1717     // LCOV_EXCL_START
1718     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1719     // LCOV_EXCL_STOP
1720   }
1721 
1722   // Early exit for empty operator
1723   if (!is_composite) {
1724     CeedInt num_elem = 0;
1725 
1726     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1727     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1728   }
1729 
1730   if (op->LinearAssembleAddDiagonal) {
1731     // Backend version
1732     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1733     return CEED_ERROR_SUCCESS;
1734   } else {
1735     // Operator fallback
1736     CeedOperator op_fallback;
1737 
1738     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1739     if (op_fallback) {
1740       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1741       return CEED_ERROR_SUCCESS;
1742     }
1743   }
1744   // Default interface implementation
1745   if (is_composite) {
1746     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1747   } else {
1748     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1749   }
1750 
1751   return CEED_ERROR_SUCCESS;
1752 }
1753 
1754 /**
1755   @brief Assemble the point block diagonal of a square linear CeedOperator
1756 
1757   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1758 
1759   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1760 
1761   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1762 
1763   @param[in]  op        CeedOperator to assemble CeedQFunction
1764   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1765 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1766 component in].
1767   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1768 
1769   @return An error code: 0 - success, otherwise - failure
1770 
1771   @ref User
1772 **/
1773 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1774   bool is_composite;
1775   CeedCall(CeedOperatorCheckReady(op));
1776   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1777 
1778   CeedSize input_size = 0, output_size = 0;
1779   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1780   if (input_size != output_size) {
1781     // LCOV_EXCL_START
1782     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1783     // LCOV_EXCL_STOP
1784   }
1785 
1786   // Early exit for empty operator
1787   if (!is_composite) {
1788     CeedInt num_elem = 0;
1789 
1790     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1791     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1792   }
1793 
1794   if (op->LinearAssemblePointBlockDiagonal) {
1795     // Backend version
1796     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1797     return CEED_ERROR_SUCCESS;
1798   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1799     // Backend version with zeroing first
1800     CeedCall(CeedVectorSetValue(assembled, 0.0));
1801     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1802     return CEED_ERROR_SUCCESS;
1803   } else {
1804     // Operator fallback
1805     CeedOperator op_fallback;
1806 
1807     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1808     if (op_fallback) {
1809       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1810       return CEED_ERROR_SUCCESS;
1811     }
1812   }
1813   // Default interface implementation
1814   CeedCall(CeedVectorSetValue(assembled, 0.0));
1815   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1816 
1817   return CEED_ERROR_SUCCESS;
1818 }
1819 
1820 /**
1821   @brief Assemble the point block diagonal of a square linear CeedOperator
1822 
1823   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1824 
1825   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1826 
1827   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1828 
1829   @param[in]  op        CeedOperator to assemble CeedQFunction
1830   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1831 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1832 component in].
1833   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1834 
1835   @return An error code: 0 - success, otherwise - failure
1836 
1837   @ref User
1838 **/
1839 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1840   bool is_composite;
1841   CeedCall(CeedOperatorCheckReady(op));
1842   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1843 
1844   CeedSize input_size = 0, output_size = 0;
1845   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1846   if (input_size != output_size) {
1847     // LCOV_EXCL_START
1848     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1849     // LCOV_EXCL_STOP
1850   }
1851 
1852   // Early exit for empty operator
1853   if (!is_composite) {
1854     CeedInt num_elem = 0;
1855 
1856     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1857     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1858   }
1859 
1860   if (op->LinearAssembleAddPointBlockDiagonal) {
1861     // Backend version
1862     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1863     return CEED_ERROR_SUCCESS;
1864   } else {
1865     // Operator fallback
1866     CeedOperator op_fallback;
1867 
1868     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1869     if (op_fallback) {
1870       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1871       return CEED_ERROR_SUCCESS;
1872     }
1873   }
1874   // Default interface implementation
1875   if (is_composite) {
1876     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1877   } else {
1878     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1879   }
1880 
1881   return CEED_ERROR_SUCCESS;
1882 }
1883 
1884 /**
1885    @brief Fully assemble the nonzero pattern of a linear operator.
1886 
1887    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1888 
1889    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1890 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1891 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1892 
1893    This will generally be slow unless your operator is low-order.
1894 
1895    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1896 
1897    @param[in]  op          CeedOperator to assemble
1898    @param[out] num_entries Number of entries in coordinate nonzero pattern
1899    @param[out] rows        Row number for each entry
1900    @param[out] cols        Column number for each entry
1901 
1902    @ref User
1903 **/
1904 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1905   CeedInt       num_suboperators, single_entries;
1906   CeedOperator *sub_operators;
1907   bool          is_composite;
1908   CeedCall(CeedOperatorCheckReady(op));
1909   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1910 
1911   if (op->LinearAssembleSymbolic) {
1912     // Backend version
1913     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1914     return CEED_ERROR_SUCCESS;
1915   } else {
1916     // Operator fallback
1917     CeedOperator op_fallback;
1918 
1919     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1920     if (op_fallback) {
1921       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1922       return CEED_ERROR_SUCCESS;
1923     }
1924   }
1925 
1926   // Default interface implementation
1927 
1928   // count entries and allocate rows, cols arrays
1929   *num_entries = 0;
1930   if (is_composite) {
1931     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1932     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1933     for (CeedInt k = 0; k < num_suboperators; ++k) {
1934       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1935       *num_entries += single_entries;
1936     }
1937   } else {
1938     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1939     *num_entries += single_entries;
1940   }
1941   CeedCall(CeedCalloc(*num_entries, rows));
1942   CeedCall(CeedCalloc(*num_entries, cols));
1943 
1944   // assemble nonzero locations
1945   CeedInt offset = 0;
1946   if (is_composite) {
1947     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1948     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1949     for (CeedInt k = 0; k < num_suboperators; ++k) {
1950       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1951       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1952       offset += single_entries;
1953     }
1954   } else {
1955     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1956   }
1957 
1958   return CEED_ERROR_SUCCESS;
1959 }
1960 
1961 /**
1962    @brief Fully assemble the nonzero entries of a linear operator.
1963 
1964    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1965 
1966    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1967 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1968 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1969 
1970    This will generally be slow unless your operator is low-order.
1971 
1972    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1973 
1974    @param[in]  op     CeedOperator to assemble
1975    @param[out] values Values to assemble into matrix
1976 
1977    @ref User
1978 **/
1979 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1980   CeedInt       num_suboperators, single_entries = 0;
1981   CeedOperator *sub_operators;
1982   bool          is_composite;
1983   CeedCall(CeedOperatorCheckReady(op));
1984   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1985 
1986   // Early exit for empty operator
1987   if (!is_composite) {
1988     CeedInt num_elem = 0;
1989 
1990     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1991     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1992   }
1993 
1994   if (op->LinearAssemble) {
1995     // Backend version
1996     CeedCall(op->LinearAssemble(op, values));
1997     return CEED_ERROR_SUCCESS;
1998   } else {
1999     // Operator fallback
2000     CeedOperator op_fallback;
2001 
2002     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2003     if (op_fallback) {
2004       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2005       return CEED_ERROR_SUCCESS;
2006     }
2007   }
2008 
2009   // Default interface implementation
2010   CeedInt offset = 0;
2011   CeedCall(CeedVectorSetValue(values, 0.0));
2012   if (is_composite) {
2013     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2014     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2015     for (CeedInt k = 0; k < num_suboperators; k++) {
2016       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2017       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2018       offset += single_entries;
2019     }
2020   } else {
2021     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2022   }
2023 
2024   return CEED_ERROR_SUCCESS;
2025 }
2026 
2027 /**
2028   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2029 
2030   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2031 
2032   @param[in]  op               Composite CeedOperator
2033   @param[in]  num_skip_indices Number of suboperators to skip
2034   @param[in]  skip_indices     Array of indices of suboperators to skip
2035   @param[out] mult             Vector to store multiplicity (of size l_size)
2036 
2037   @return An error code: 0 - success, otherwise - failure
2038 
2039   @ref User
2040 **/
2041 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2042   CeedCall(CeedOperatorCheckReady(op));
2043 
2044   Ceed                ceed;
2045   CeedInt             num_suboperators;
2046   CeedSize            l_vec_len;
2047   CeedScalar         *mult_array;
2048   CeedVector          ones_l_vec;
2049   CeedElemRestriction elem_rstr;
2050   CeedOperator       *sub_operators;
2051 
2052   CeedCall(CeedOperatorGetCeed(op, &ceed));
2053 
2054   // Zero mult vector
2055   CeedCall(CeedVectorSetValue(mult, 0.0));
2056 
2057   // Get suboperators
2058   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2059   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2060   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2061 
2062   // Work vector
2063   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2064   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2065   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2066   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2067 
2068   // Compute multiplicity across suboperators
2069   for (CeedInt i = 0; i < num_suboperators; i++) {
2070     const CeedScalar *sub_mult_array;
2071     CeedVector        sub_mult_l_vec, ones_e_vec;
2072 
2073     // -- Check for suboperator to skip
2074     for (CeedInt j = 0; j < num_skip_indices; j++) {
2075       if (skip_indices[j] == i) continue;
2076     }
2077 
2078     // -- Sub operator multiplicity
2079     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2080     CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2081     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2082     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2083     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2084     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2085     // ---- Flag every node present in the current suboperator
2086     for (CeedInt j = 0; j < l_vec_len; j++) {
2087       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2088     }
2089     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2090     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2091     CeedCall(CeedVectorDestroy(&ones_e_vec));
2092   }
2093   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2094   CeedCall(CeedVectorDestroy(&ones_l_vec));
2095 
2096   return CEED_ERROR_SUCCESS;
2097 }
2098 
2099 /**
2100   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2101 grid interpolation
2102 
2103   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2104 
2105   @param[in]  op_fine      Fine grid operator
2106   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2107   @param[in]  rstr_coarse  Coarse grid restriction
2108   @param[in]  basis_coarse Coarse grid active vector basis
2109   @param[out] op_coarse    Coarse grid operator
2110   @param[out] op_prolong   Coarse to fine operator, or NULL
2111   @param[out] op_restrict  Fine to coarse operator, or NULL
2112 
2113   @return An error code: 0 - success, otherwise - failure
2114 
2115   @ref User
2116 **/
2117 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2118                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2119   CeedCall(CeedOperatorCheckReady(op_fine));
2120 
2121   // Build prolongation matrix, if required
2122   CeedBasis basis_c_to_f = NULL;
2123   if (op_prolong || op_restrict) {
2124     CeedBasis basis_fine;
2125     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2126     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2127   }
2128 
2129   // Core code
2130   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2131 
2132   return CEED_ERROR_SUCCESS;
2133 }
2134 
2135 /**
2136   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2137 
2138   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2139 
2140   @param[in]  op_fine       Fine grid operator
2141   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2142   @param[in]  rstr_coarse   Coarse grid restriction
2143   @param[in]  basis_coarse  Coarse grid active vector basis
2144   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2145   @param[out] op_coarse     Coarse grid operator
2146   @param[out] op_prolong    Coarse to fine operator, or NULL
2147   @param[out] op_restrict   Fine to coarse operator, or NULL
2148 
2149   @return An error code: 0 - success, otherwise - failure
2150 
2151   @ref User
2152 **/
2153 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2154                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2155                                              CeedOperator *op_restrict) {
2156   CeedCall(CeedOperatorCheckReady(op_fine));
2157   Ceed ceed;
2158   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2159 
2160   // Check for compatible quadrature spaces
2161   CeedBasis basis_fine;
2162   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2163   CeedInt Q_f, Q_c;
2164   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2165   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2166   if (Q_f != Q_c) {
2167     // LCOV_EXCL_START
2168     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2169     // LCOV_EXCL_STOP
2170   }
2171 
2172   // Create coarse to fine basis, if required
2173   CeedBasis basis_c_to_f = NULL;
2174   if (op_prolong || op_restrict) {
2175     // Check if interpolation matrix is provided
2176     if (!interp_c_to_f) {
2177       // LCOV_EXCL_START
2178       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2179       // LCOV_EXCL_STOP
2180     }
2181     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2182     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2183     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2184     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2185     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2186     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2187     CeedScalar *q_ref, *q_weight, *grad;
2188     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2189     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2190     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2191     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2192     CeedCall(CeedFree(&q_ref));
2193     CeedCall(CeedFree(&q_weight));
2194     CeedCall(CeedFree(&grad));
2195   }
2196 
2197   // Core code
2198   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2199   return CEED_ERROR_SUCCESS;
2200 }
2201 
2202 /**
2203   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2204 
2205   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2206 
2207   @param[in]  op_fine       Fine grid operator
2208   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2209   @param[in]  rstr_coarse   Coarse grid restriction
2210   @param[in]  basis_coarse  Coarse grid active vector basis
2211   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2212   @param[out] op_coarse     Coarse grid operator
2213   @param[out] op_prolong    Coarse to fine operator, or NULL
2214   @param[out] op_restrict   Fine to coarse operator, or NULL
2215 
2216   @return An error code: 0 - success, otherwise - failure
2217 
2218   @ref User
2219 **/
2220 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2221                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2222                                        CeedOperator *op_restrict) {
2223   CeedCall(CeedOperatorCheckReady(op_fine));
2224   Ceed ceed;
2225   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2226 
2227   // Check for compatible quadrature spaces
2228   CeedBasis basis_fine;
2229   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2230   CeedInt Q_f, Q_c;
2231   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2232   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2233   if (Q_f != Q_c) {
2234     // LCOV_EXCL_START
2235     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2236     // LCOV_EXCL_STOP
2237   }
2238 
2239   // Coarse to fine basis
2240   CeedBasis basis_c_to_f = NULL;
2241   if (op_prolong || op_restrict) {
2242     // Check if interpolation matrix is provided
2243     if (!interp_c_to_f) {
2244       // LCOV_EXCL_START
2245       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2246       // LCOV_EXCL_STOP
2247     }
2248     CeedElemTopology topo;
2249     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2250     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2251     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2252     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2253     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2254     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2255     CeedScalar *q_ref, *q_weight, *grad;
2256     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2257     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2258     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2259     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2260     CeedCall(CeedFree(&q_ref));
2261     CeedCall(CeedFree(&q_weight));
2262     CeedCall(CeedFree(&grad));
2263   }
2264 
2265   // Core code
2266   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2267   return CEED_ERROR_SUCCESS;
2268 }
2269 
2270 /**
2271   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2272 
2273   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2274     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2275     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2276 \hat S V\f$. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
2277 
2278   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2279 
2280   @param[in]  op      CeedOperator to create element inverses
2281   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2282   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2283 
2284   @return An error code: 0 - success, otherwise - failure
2285 
2286   @ref User
2287 **/
2288 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2289   CeedCall(CeedOperatorCheckReady(op));
2290 
2291   if (op->CreateFDMElementInverse) {
2292     // Backend version
2293     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2294     return CEED_ERROR_SUCCESS;
2295   } else {
2296     // Operator fallback
2297     CeedOperator op_fallback;
2298 
2299     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2300     if (op_fallback) {
2301       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2302       return CEED_ERROR_SUCCESS;
2303     }
2304   }
2305 
2306   // Default interface implementation
2307   Ceed ceed, ceed_parent;
2308   CeedCall(CeedOperatorGetCeed(op, &ceed));
2309   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2310   ceed_parent = ceed_parent ? ceed_parent : ceed;
2311   CeedQFunction qf;
2312   CeedCall(CeedOperatorGetQFunction(op, &qf));
2313 
2314   // Determine active input basis
2315   bool                interp = false, grad = false;
2316   CeedBasis           basis = NULL;
2317   CeedElemRestriction rstr  = NULL;
2318   CeedOperatorField  *op_fields;
2319   CeedQFunctionField *qf_fields;
2320   CeedInt             num_input_fields;
2321   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2322   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2323   for (CeedInt i = 0; i < num_input_fields; i++) {
2324     CeedVector vec;
2325     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2326     if (vec == CEED_VECTOR_ACTIVE) {
2327       CeedEvalMode eval_mode;
2328       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2329       interp = interp || eval_mode == CEED_EVAL_INTERP;
2330       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2331       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2332       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2333     }
2334   }
2335   if (!basis) {
2336     // LCOV_EXCL_START
2337     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2338     // LCOV_EXCL_STOP
2339   }
2340   CeedSize l_size = 1;
2341   CeedInt  P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2342   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2343   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2344   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2345   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2346   CeedCall(CeedBasisGetDimension(basis, &dim));
2347   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2348   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2349   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2350 
2351   // Build and diagonalize 1D Mass and Laplacian
2352   bool tensor_basis;
2353   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2354   if (!tensor_basis) {
2355     // LCOV_EXCL_START
2356     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2357     // LCOV_EXCL_STOP
2358   }
2359   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2360   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2361   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2362   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2363   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2364   CeedCall(CeedCalloc(P_1d, &lambda));
2365   // -- Build matrices
2366   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2367   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2368   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2369   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2370   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2371 
2372   // -- Diagonalize
2373   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2374   CeedCall(CeedFree(&mass));
2375   CeedCall(CeedFree(&laplace));
2376   for (CeedInt i = 0; i < P_1d; i++) {
2377     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2378   }
2379   CeedCall(CeedFree(&x));
2380 
2381   // Assemble QFunction
2382   CeedVector          assembled;
2383   CeedElemRestriction rstr_qf;
2384   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2385   CeedInt layout[3];
2386   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2387   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2388   CeedScalar max_norm = 0;
2389   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2390 
2391   // Calculate element averages
2392   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2393   CeedScalar       *elem_avg;
2394   const CeedScalar *assembled_array, *q_weight_array;
2395   CeedVector        q_weight;
2396   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2397   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2398   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2399   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2400   CeedCall(CeedCalloc(num_elem, &elem_avg));
2401   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2402   for (CeedInt e = 0; e < num_elem; e++) {
2403     CeedInt count = 0;
2404     for (CeedInt q = 0; q < num_qpts; q++) {
2405       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2406         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2407           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2408           count++;
2409         }
2410       }
2411     }
2412     if (count) {
2413       elem_avg[e] /= count;
2414     } else {
2415       elem_avg[e] = 1.0;
2416     }
2417   }
2418   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2419   CeedCall(CeedVectorDestroy(&assembled));
2420   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2421   CeedCall(CeedVectorDestroy(&q_weight));
2422 
2423   // Build FDM diagonal
2424   CeedVector  q_data;
2425   CeedScalar *q_data_array, *fdm_diagonal;
2426   CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2427   const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2428   for (CeedInt c = 0; c < num_comp; c++) {
2429     for (CeedInt n = 0; n < num_nodes; n++) {
2430       if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2431       if (grad) {
2432         for (CeedInt d = 0; d < dim; d++) {
2433           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2434           fdm_diagonal[c * num_nodes + n] += lambda[i];
2435         }
2436       }
2437       if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2438     }
2439   }
2440   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2441   CeedCall(CeedVectorSetValue(q_data, 0.0));
2442   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2443   for (CeedInt e = 0; e < num_elem; e++) {
2444     for (CeedInt c = 0; c < num_comp; c++) {
2445       for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2446     }
2447   }
2448   CeedCall(CeedFree(&elem_avg));
2449   CeedCall(CeedFree(&fdm_diagonal));
2450   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2451 
2452   // Setup FDM operator
2453   // -- Basis
2454   CeedBasis   fdm_basis;
2455   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2456   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2457   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2458   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2459   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2460   CeedCall(CeedFree(&fdm_interp));
2461   CeedCall(CeedFree(&grad_dummy));
2462   CeedCall(CeedFree(&q_ref_dummy));
2463   CeedCall(CeedFree(&q_weight_dummy));
2464   CeedCall(CeedFree(&lambda));
2465 
2466   // -- Restriction
2467   CeedElemRestriction rstr_qd_i;
2468   CeedInt             strides[3] = {1, num_nodes, num_nodes * num_comp};
2469   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2470   // -- QFunction
2471   CeedQFunction qf_fdm;
2472   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2473   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2474   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2475   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2476   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2477   // -- QFunction context
2478   CeedInt *num_comp_data;
2479   CeedCall(CeedCalloc(1, &num_comp_data));
2480   num_comp_data[0] = num_comp;
2481   CeedQFunctionContext ctx_fdm;
2482   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2483   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2484   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2485   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2486   // -- Operator
2487   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2488   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2489   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2490   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2491 
2492   // Cleanup
2493   CeedCall(CeedVectorDestroy(&q_data));
2494   CeedCall(CeedBasisDestroy(&fdm_basis));
2495   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2496   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2497 
2498   return CEED_ERROR_SUCCESS;
2499 }
2500 
2501 /// @}
2502