xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision 9fd66db6d1a7a1c9032418479851d404799ab3ba)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  basis     CeedBasis from which to get the basis matrix
184   @param[in]  eval_mode Current basis evaluation mode
185   @param[in]  identity  Pointer to identity matrix
186   @param[out] basis_ptr Basis pointer to set
187 
188   @ref Developer
189 **/
190 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
191   switch (eval_mode) {
192     case CEED_EVAL_NONE:
193       *basis_ptr = identity;
194       break;
195     case CEED_EVAL_INTERP:
196       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
197       break;
198     case CEED_EVAL_GRAD:
199       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
200       break;
201     case CEED_EVAL_DIV:
202       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
203       break;
204     case CEED_EVAL_CURL:
205       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
206       break;
207     case CEED_EVAL_WEIGHT:
208       break;  // Caught by QF Assembly
209   }
210   assert(*basis_ptr != NULL);
211 
212   return CEED_ERROR_SUCCESS;
213 }
214 
215 /**
216   @brief Create point block restriction for active operator field
217 
218   @param[in]  rstr            Original CeedElemRestriction for active field
219   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
220 
221   @return An error code: 0 - success, otherwise - failure
222 
223   @ref Developer
224 **/
225 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
226   Ceed ceed;
227   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
228   const CeedInt *offsets;
229   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
230 
231   // Expand offsets
232   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
233   CeedSize l_size;
234   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
235   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
236   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
237   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
238   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
239   CeedInt shift = num_comp;
240   if (comp_stride != 1) shift *= num_comp;
241   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
242   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
243     pointblock_offsets[i] = offsets[i] * shift;
244   }
245 
246   // Create new restriction
247   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
248                                      pointblock_offsets, pointblock_rstr));
249 
250   // Cleanup
251   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
252 
253   return CEED_ERROR_SUCCESS;
254 }
255 
256 /**
257   @brief Core logic for assembling operator diagonal or point block diagonal
258 
259   @param[in]  op            CeedOperator to assemble point block diagonal
260   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
261   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
262   @param[out] assembled     CeedVector to store assembled diagonal
263 
264   @return An error code: 0 - success, otherwise - failure
265 
266   @ref Developer
267 **/
268 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
269   Ceed ceed;
270   CeedCall(CeedOperatorGetCeed(op, &ceed));
271 
272   // Assemble QFunction
273   CeedQFunction       qf;
274   const CeedScalar   *assembled_qf_array;
275   CeedVector          assembled_qf;
276   CeedElemRestriction assembled_elem_rstr;
277   CeedInt             num_input_fields, num_output_fields;
278   CeedInt             layout[3];
279 
280   CeedCall(CeedOperatorGetQFunction(op, &qf));
281   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
282   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
283   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout));
284   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
285   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
286 
287   // Get assembly data
288   CeedOperatorAssemblyData data;
289   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
290   CeedInt                 *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
291   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
292   CeedBasis               *active_bases;
293   CeedElemRestriction     *active_elem_rstrs;
294   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
295   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
296                                                 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components));
297   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL));
298   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs));
299 
300   // Loop over all active bases
301   for (CeedInt b = 0; b < num_active_bases; b++) {
302     // Assemble point block diagonal restriction, if needed
303     CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b];
304 
305     if (is_pointblock) {
306       CeedElemRestriction point_block_elem_rstr;
307 
308       CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr));
309       diag_elem_rstr = point_block_elem_rstr;
310     }
311 
312     // Create diagonal vector
313     CeedVector elem_diag;
314     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
315 
316     // Assemble element operator diagonals
317     CeedScalar *elem_diag_array;
318     CeedInt     num_elem, num_nodes, num_qpts, num_components;
319 
320     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
321     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
322     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
323     CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes));
324     CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components));
325     CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts));
326 
327     // Construct identity matrix for basis if required
328     bool        has_eval_none = false;
329     CeedScalar *identity      = NULL;
330     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
331       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
332     }
333     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
334       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
335     }
336     if (has_eval_none) {
337       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
338       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
339     }
340 
341     // Compute the diagonal of B^T D B
342     // Each element
343     for (CeedInt e = 0; e < num_elem; e++) {
344       // Each basis eval mode pair
345       CeedInt      d_out              = 0, q_comp_out;
346       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
347       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
348         const CeedScalar *B_t = NULL;
349         CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t);
350         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out));
351         if (q_comp_out > 1) {
352           if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
353           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
354         }
355         eval_mode_out_prev = eval_modes_out[b][e_out];
356 
357         CeedInt      d_in              = 0, q_comp_in;
358         CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
359         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
360           const CeedScalar *B = NULL;
361           CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B);
362           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in));
363           if (q_comp_in > 1) {
364             if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
365             else B = &B[(++d_in) * num_qpts * num_nodes];
366           }
367           eval_mode_in_prev = eval_modes_in[b][e_in];
368 
369           // Each component
370           for (CeedInt c_out = 0; c_out < num_components; c_out++) {
371             // Each qpt/node pair
372             for (CeedInt q = 0; q < num_qpts; q++) {
373               if (is_pointblock) {
374                 // Point Block Diagonal
375                 for (CeedInt c_in = 0; c_in < num_components; c_in++) {
376                   const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
377                   const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
378                   for (CeedInt n = 0; n < num_nodes; n++) {
379                     elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] +=
380                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
381                   }
382                 }
383               } else {
384                 // Diagonal Only
385                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
386                 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
387                 for (CeedInt n = 0; n < num_nodes; n++) {
388                   elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
389                 }
390               }
391             }
392           }
393         }
394       }
395     }
396     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
397 
398     // Assemble local operator diagonal
399     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
400 
401     // Cleanup
402     if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
403     CeedCall(CeedVectorDestroy(&elem_diag));
404     CeedCall(CeedFree(&identity));
405   }
406   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
407   CeedCall(CeedVectorDestroy(&assembled_qf));
408 
409   return CEED_ERROR_SUCCESS;
410 }
411 
412 /**
413   @brief Core logic for assembling composite operator diagonal
414 
415   @param[in]  op            CeedOperator to assemble point block diagonal
416   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
417   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
418   @param[out] assembled     CeedVector to store assembled diagonal
419 
420   @return An error code: 0 - success, otherwise - failure
421 
422   @ref Developer
423 **/
424 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
425                                                                  CeedVector assembled) {
426   CeedInt       num_sub;
427   CeedOperator *suboperators;
428   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
429   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
430   for (CeedInt i = 0; i < num_sub; i++) {
431     if (is_pointblock) {
432       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
433     } else {
434       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
435     }
436   }
437   return CEED_ERROR_SUCCESS;
438 }
439 
440 /**
441   @brief Build nonzero pattern for non-composite operator
442 
443   Users should generally use CeedOperatorLinearAssembleSymbolic()
444 
445   @param[in]  op     CeedOperator to assemble nonzero pattern
446   @param[in]  offset Offset for number of entries
447   @param[out] rows   Row number for each entry
448   @param[out] cols   Column number for each entry
449 
450   @return An error code: 0 - success, otherwise - failure
451 
452   @ref Developer
453 **/
454 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
455   Ceed ceed;
456   bool is_composite;
457   CeedCall(CeedOperatorGetCeed(op, &ceed));
458   CeedCall(CeedOperatorIsComposite(op, &is_composite));
459 
460   if (is_composite) {
461     // LCOV_EXCL_START
462     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
463     // LCOV_EXCL_STOP
464   }
465 
466   CeedSize num_nodes;
467   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
468   CeedElemRestriction rstr_in;
469   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
470   CeedInt num_elem, elem_size, num_comp;
471   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
472   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
473   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
474   CeedInt layout_er[3];
475   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
476 
477   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
478 
479   // Determine elem_dof relation
480   CeedVector index_vec;
481   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
482   CeedScalar *array;
483   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
484   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
485   CeedCall(CeedVectorRestoreArray(index_vec, &array));
486   CeedVector elem_dof;
487   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
488   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
489   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
490   const CeedScalar *elem_dof_a;
491   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
492   CeedCall(CeedVectorDestroy(&index_vec));
493 
494   // Determine i, j locations for element matrices
495   CeedInt count = 0;
496   for (CeedInt e = 0; e < num_elem; e++) {
497     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
498       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
499         for (CeedInt i = 0; i < elem_size; i++) {
500           for (CeedInt j = 0; j < elem_size; j++) {
501             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
502             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
503 
504             const CeedInt row = elem_dof_a[elem_dof_index_row];
505             const CeedInt col = elem_dof_a[elem_dof_index_col];
506 
507             rows[offset + count] = row;
508             cols[offset + count] = col;
509             count++;
510           }
511         }
512       }
513     }
514   }
515   if (count != local_num_entries) {
516     // LCOV_EXCL_START
517     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
518     // LCOV_EXCL_STOP
519   }
520   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
521   CeedCall(CeedVectorDestroy(&elem_dof));
522 
523   return CEED_ERROR_SUCCESS;
524 }
525 
526 /**
527   @brief Assemble nonzero entries for non-composite operator
528 
529   Users should generally use CeedOperatorLinearAssemble()
530 
531   @param[in]  op     CeedOperator to assemble
532   @param[in]  offset Offset for number of entries
533   @param[out] values Values to assemble into matrix
534 
535   @return An error code: 0 - success, otherwise - failure
536 
537   @ref Developer
538 **/
539 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
540   Ceed ceed;
541   bool is_composite;
542   CeedCall(CeedOperatorGetCeed(op, &ceed));
543   CeedCall(CeedOperatorIsComposite(op, &is_composite));
544 
545   if (is_composite) {
546     // LCOV_EXCL_START
547     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
548     // LCOV_EXCL_STOP
549   }
550 
551   // Early exit for empty operator
552   {
553     CeedInt num_elem = 0;
554 
555     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
556     if (num_elem == 0) return CEED_ERROR_SUCCESS;
557   }
558 
559   if (op->LinearAssembleSingle) {
560     // Backend version
561     CeedCall(op->LinearAssembleSingle(op, offset, values));
562     return CEED_ERROR_SUCCESS;
563   } else {
564     // Operator fallback
565     CeedOperator op_fallback;
566 
567     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
568     if (op_fallback) {
569       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
570       return CEED_ERROR_SUCCESS;
571     }
572   }
573 
574   // Assemble QFunction
575   CeedQFunction qf;
576   CeedCall(CeedOperatorGetQFunction(op, &qf));
577   CeedVector          assembled_qf;
578   CeedElemRestriction rstr_q;
579   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
580   CeedSize qf_length;
581   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
582 
583   CeedInt            num_input_fields, num_output_fields;
584   CeedOperatorField *input_fields;
585   CeedOperatorField *output_fields;
586   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
587 
588   // Get assembly data
589   CeedOperatorAssemblyData data;
590   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
591   const CeedEvalMode **eval_modes_in, **eval_modes_out;
592   CeedInt             *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
593   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out,
594                                                 &eval_modes_out, NULL, NULL));
595   CeedBasis *bases;
596   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL));
597   CeedBasis basis_in = bases[0];
598 
599   if (num_active_bases > 1) {
600     // LCOV_EXCL_START
601     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases");
602     // LCOV_EXCL_STOP
603   }
604   if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) {
605     // LCOV_EXCL_START
606     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
607     // LCOV_EXCL_STOP
608   }
609 
610   CeedElemRestriction active_rstr;
611   CeedInt             num_elem, elem_size, num_qpts, num_comp;
612   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
613   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
614   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
615   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
616   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
617 
618   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
619 
620   // loop over elements and put in data structure
621   const CeedScalar *assembled_qf_array;
622   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
623 
624   CeedInt layout_qf[3];
625   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
626   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
627 
628   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
629   const CeedScalar **B_mats_in, **B_mats_out;
630   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out));
631   const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0];
632   CeedScalar        BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]];
633   CeedScalar        elem_mat[elem_size * elem_size];
634   CeedInt           count = 0;
635   CeedScalar       *vals;
636   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
637   for (CeedInt e = 0; e < num_elem; e++) {
638     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
639       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
640         // Compute B^T*D
641         for (CeedInt n = 0; n < elem_size; n++) {
642           for (CeedInt q = 0; q < num_qpts; q++) {
643             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
644               const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in);
645               CeedScalar    sum       = 0.0;
646               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
647                 const CeedInt b_out_index     = (num_eval_modes_out[0] * q + e_out) * elem_size + n;
648                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out;
649                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
650                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
651               }
652               BTD_mat[btd_index] = sum;
653             }
654           }
655         }
656         // form element matrix itself (for each block component)
657         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0]));
658 
659         // put element matrix in coordinate data structure
660         for (CeedInt i = 0; i < elem_size; i++) {
661           for (CeedInt j = 0; j < elem_size; j++) {
662             vals[offset + count] = elem_mat[i * elem_size + j];
663             count++;
664           }
665         }
666       }
667     }
668   }
669   if (count != local_num_entries) {
670     // LCOV_EXCL_START
671     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
672     // LCOV_EXCL_STOP
673   }
674   CeedCall(CeedVectorRestoreArray(values, &vals));
675 
676   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
677   CeedCall(CeedVectorDestroy(&assembled_qf));
678 
679   return CEED_ERROR_SUCCESS;
680 }
681 
682 /**
683   @brief Count number of entries for assembled CeedOperator
684 
685   @param[in]  op          CeedOperator to assemble
686   @param[out] num_entries Number of entries in assembled representation
687 
688   @return An error code: 0 - success, otherwise - failure
689 
690   @ref Utility
691 **/
692 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
693   bool                is_composite;
694   CeedElemRestriction rstr;
695   CeedInt             num_elem, elem_size, num_comp;
696 
697   CeedCall(CeedOperatorIsComposite(op, &is_composite));
698   if (is_composite) {
699     // LCOV_EXCL_START
700     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
701     // LCOV_EXCL_STOP
702   }
703   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
704   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
705   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
706   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
707   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
708 
709   return CEED_ERROR_SUCCESS;
710 }
711 
712 /**
713   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
714 
715   @param[in]  op_fine      Fine grid operator
716   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
717   @param[in]  rstr_coarse  Coarse grid restriction
718   @param[in]  basis_coarse Coarse grid active vector basis
719   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
720   @param[out] op_coarse    Coarse grid operator
721   @param[out] op_prolong   Coarse to fine operator, or NULL
722   @param[out] op_restrict  Fine to coarse operator, or NULL
723 
724   @return An error code: 0 - success, otherwise - failure
725 
726   @ref Developer
727 **/
728 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
729                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
730   Ceed       ceed;
731   CeedVector mult_vec = NULL;
732   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
733 
734   // Check for composite operator
735   bool is_composite;
736   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
737   if (is_composite) {
738     // LCOV_EXCL_START
739     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
740     // LCOV_EXCL_STOP
741   }
742 
743   // Coarse Grid
744   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
745   CeedElemRestriction rstr_fine = NULL;
746   // -- Clone input fields
747   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
748     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
749       rstr_fine = op_fine->input_fields[i]->elem_rstr;
750       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
751     } else {
752       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
753                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
754     }
755   }
756   // -- Clone output fields
757   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
758     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
759       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
760     } else {
761       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
762                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
763     }
764   }
765   // -- Clone QFunctionAssemblyData
766   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
767 
768   // Multiplicity vector
769   if (op_restrict || op_prolong) {
770     CeedVector mult_e_vec;
771 
772     if (!p_mult_fine) {
773       // LCOV_EXCL_START
774       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
775       // LCOV_EXCL_STOP
776     }
777     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
778     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
779     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
780     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
781     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
782     CeedCall(CeedVectorDestroy(&mult_e_vec));
783     CeedCall(CeedVectorReciprocal(mult_vec));
784   }
785 
786   // Clone name
787   bool   has_name = op_fine->name;
788   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
789   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
790 
791   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
792   if ((op_restrict || op_prolong) && !basis_c_to_f) {
793     // LCOV_EXCL_START
794     return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis");
795     // LCOV_EXCL_STOP
796   }
797 
798   // Restriction/Prolongation Operators
799   CeedInt num_comp;
800   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
801 
802   // Restriction
803   if (op_restrict) {
804     CeedInt             *num_comp_r_data;
805     CeedQFunction        qf_restrict;
806     CeedQFunctionContext ctx_r;
807 
808     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
809     CeedCall(CeedCalloc(1, &num_comp_r_data));
810     num_comp_r_data[0] = num_comp;
811     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
812     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
813     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
814     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
815     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
816     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
817     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
818     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
819 
820     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
821     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
822     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
823     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
824 
825     // Set name
826     char *restriction_name;
827     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
828     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
829     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
830     CeedCall(CeedFree(&restriction_name));
831 
832     // Check
833     CeedCall(CeedOperatorCheckReady(*op_restrict));
834 
835     // Cleanup
836     CeedCall(CeedQFunctionDestroy(&qf_restrict));
837   }
838 
839   // Prolongation
840   if (op_prolong) {
841     CeedInt             *num_comp_p_data;
842     CeedQFunction        qf_prolong;
843     CeedQFunctionContext ctx_p;
844 
845     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
846     CeedCall(CeedCalloc(1, &num_comp_p_data));
847     num_comp_p_data[0] = num_comp;
848     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
849     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
850     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
851     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
852     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
853     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
854     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
855     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
856 
857     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
858     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
859     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
860     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
861 
862     // Set name
863     char *prolongation_name;
864     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
865     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
866     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
867     CeedCall(CeedFree(&prolongation_name));
868 
869     // Check
870     CeedCall(CeedOperatorCheckReady(*op_prolong));
871 
872     // Cleanup
873     CeedCall(CeedQFunctionDestroy(&qf_prolong));
874   }
875 
876   // Check
877   CeedCall(CeedOperatorCheckReady(*op_coarse));
878 
879   // Cleanup
880   CeedCall(CeedVectorDestroy(&mult_vec));
881   CeedCall(CeedBasisDestroy(&basis_c_to_f));
882 
883   return CEED_ERROR_SUCCESS;
884 }
885 
886 /**
887   @brief Build 1D mass matrix and Laplacian with perturbation
888 
889   @param[in]  interp_1d   Interpolation matrix in one dimension
890   @param[in]  grad_1d     Gradient matrix in one dimension
891   @param[in]  q_weight_1d Quadrature weights in one dimension
892   @param[in]  P_1d        Number of basis nodes in one dimension
893   @param[in]  Q_1d        Number of quadrature points in one dimension
894   @param[in]  dim         Dimension of basis
895   @param[out] mass        Assembled mass matrix in one dimension
896   @param[out] laplace     Assembled perturbed Laplacian in one dimension
897 
898   @return An error code: 0 - success, otherwise - failure
899 
900   @ref Developer
901 **/
902 CeedPragmaOptimizeOff
903 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
904                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
905   for (CeedInt i = 0; i < P_1d; i++) {
906     for (CeedInt j = 0; j < P_1d; j++) {
907       CeedScalar sum = 0.0;
908       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
909       mass[i + j * P_1d] = sum;
910     }
911   }
912   // -- Laplacian
913   for (CeedInt i = 0; i < P_1d; i++) {
914     for (CeedInt j = 0; j < P_1d; j++) {
915       CeedScalar sum = 0.0;
916       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
917       laplace[i + j * P_1d] = sum;
918     }
919   }
920   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
921   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
922   return CEED_ERROR_SUCCESS;
923 }
924 CeedPragmaOptimizeOn
925 
926 /// @}
927 
928 /// ----------------------------------------------------------------------------
929 /// CeedOperator Backend API
930 /// ----------------------------------------------------------------------------
931 /// @addtogroup CeedOperatorBackend
932 /// @{
933 
934 /**
935   @brief Create object holding CeedQFunction assembly data for CeedOperator
936 
937   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
938   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
939 
940   @return An error code: 0 - success, otherwise - failure
941 
942   @ref Backend
943 **/
944 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
945   CeedCall(CeedCalloc(1, data));
946   (*data)->ref_count = 1;
947   (*data)->ceed      = ceed;
948   CeedCall(CeedReference(ceed));
949 
950   return CEED_ERROR_SUCCESS;
951 }
952 
953 /**
954   @brief Increment the reference counter for a CeedQFunctionAssemblyData
955 
956   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
957 
958   @return An error code: 0 - success, otherwise - failure
959 
960   @ref Backend
961 **/
962 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
963   data->ref_count++;
964   return CEED_ERROR_SUCCESS;
965 }
966 
967 /**
968   @brief Set re-use of CeedQFunctionAssemblyData
969 
970   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
971   @param[in]     reuse_data Boolean flag indicating data re-use
972 
973   @return An error code: 0 - success, otherwise - failure
974 
975   @ref Backend
976 **/
977 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
978   data->reuse_data        = reuse_data;
979   data->needs_data_update = true;
980   return CEED_ERROR_SUCCESS;
981 }
982 
983 /**
984   @brief Mark QFunctionAssemblyData as stale
985 
986   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
987   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
988 
989   @return An error code: 0 - success, otherwise - failure
990 
991   @ref Backend
992 **/
993 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
994   data->needs_data_update = needs_data_update;
995   return CEED_ERROR_SUCCESS;
996 }
997 
998 /**
999   @brief Determine if QFunctionAssemblyData needs update
1000 
1001   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
1002   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1003 
1004   @return An error code: 0 - success, otherwise - failure
1005 
1006   @ref Backend
1007 **/
1008 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1009   *is_update_needed = !data->reuse_data || data->needs_data_update;
1010   return CEED_ERROR_SUCCESS;
1011 }
1012 
1013 /**
1014   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1015 
1016   Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1017 
1018   Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
1019         CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
1020         CeedQFunctionAssemblyData.
1021 
1022   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1023   @param[in,out] data_copy Variable to store copied reference
1024 
1025   @return An error code: 0 - success, otherwise - failure
1026 
1027   @ref Backend
1028 **/
1029 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1030   CeedCall(CeedQFunctionAssemblyDataReference(data));
1031   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1032   *data_copy = data;
1033   return CEED_ERROR_SUCCESS;
1034 }
1035 
1036 /**
1037   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1038 
1039   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1040   @param[out] is_setup Boolean flag for setup status
1041 
1042   @return An error code: 0 - success, otherwise - failure
1043 
1044   @ref Backend
1045 **/
1046 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1047   *is_setup = data->is_setup;
1048   return CEED_ERROR_SUCCESS;
1049 }
1050 
1051 /**
1052   @brief Set internal objects for CeedQFunctionAssemblyData
1053 
1054   @param[in,out] data CeedQFunctionAssemblyData to set objects
1055   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1056   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1057 
1058   @return An error code: 0 - success, otherwise - failure
1059 
1060   @ref Backend
1061 **/
1062 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1063   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1064   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1065 
1066   data->is_setup = true;
1067   return CEED_ERROR_SUCCESS;
1068 }
1069 
1070 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1071   if (!data->is_setup) {
1072     // LCOV_EXCL_START
1073     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1074     // LCOV_EXCL_STOP
1075   }
1076 
1077   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1078   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1079 
1080   return CEED_ERROR_SUCCESS;
1081 }
1082 
1083 /**
1084   @brief Destroy CeedQFunctionAssemblyData
1085 
1086   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1087 
1088   @return An error code: 0 - success, otherwise - failure
1089 
1090   @ref Backend
1091 **/
1092 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1093   if (!*data || --(*data)->ref_count > 0) {
1094     *data = NULL;
1095     return CEED_ERROR_SUCCESS;
1096   }
1097   CeedCall(CeedDestroy(&(*data)->ceed));
1098   CeedCall(CeedVectorDestroy(&(*data)->vec));
1099   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1100 
1101   CeedCall(CeedFree(data));
1102   return CEED_ERROR_SUCCESS;
1103 }
1104 
1105 /**
1106   @brief Get CeedOperatorAssemblyData
1107 
1108   @param[in]  op   CeedOperator to assemble
1109   @param[out] data CeedQFunctionAssemblyData
1110 
1111   @return An error code: 0 - success, otherwise - failure
1112 
1113   @ref Backend
1114 **/
1115 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1116   if (!op->op_assembled) {
1117     CeedOperatorAssemblyData data;
1118 
1119     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1120     op->op_assembled = data;
1121   }
1122   *data = op->op_assembled;
1123 
1124   return CEED_ERROR_SUCCESS;
1125 }
1126 
1127 /**
1128   @brief Create object holding CeedOperator assembly data.
1129 
1130   The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1131   An array with references to the corresponding active CeedElemRestrictions is also stored.
1132   For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1133   The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1134 CeedEvalMode.
1135   The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1136   Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1137 
1138   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1139   @param[in]  op   CeedOperator to be assembled
1140   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1141 
1142   @return An error code: 0 - success, otherwise - failure
1143 
1144   @ref Backend
1145 **/
1146 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1147   CeedInt num_active_bases = 0;
1148 
1149   // Allocate
1150   CeedCall(CeedCalloc(1, data));
1151   (*data)->ceed = ceed;
1152   CeedCall(CeedReference(ceed));
1153 
1154   // Build OperatorAssembly data
1155   CeedQFunction       qf;
1156   CeedQFunctionField *qf_fields;
1157   CeedOperatorField  *op_fields;
1158   CeedInt             num_input_fields;
1159   CeedCall(CeedOperatorGetQFunction(op, &qf));
1160   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1161   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1162 
1163   // Determine active input basis
1164   CeedInt       *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0;
1165   CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL;
1166   CeedSize     **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1167   for (CeedInt i = 0; i < num_input_fields; i++) {
1168     CeedVector vec;
1169     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1170     if (vec == CEED_VECTOR_ACTIVE) {
1171       CeedBasis    basis_in = NULL;
1172       CeedEvalMode eval_mode;
1173       CeedInt      index = -1, dim, num_comp, q_comp;
1174       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1175       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1176       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1177       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1178       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1179       for (CeedInt i = 0; i < num_active_bases; i++) {
1180         if ((*data)->active_bases[i] == basis_in) index = i;
1181       }
1182       if (index == -1) {
1183         CeedElemRestriction elem_rstr_in;
1184         index = num_active_bases;
1185         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1186         (*data)->active_bases[num_active_bases] = NULL;
1187         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases]));
1188         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1189         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1190         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1191         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases]));
1192         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1193         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1194         num_eval_modes_in[index]  = 0;
1195         num_eval_modes_out[index] = 0;
1196         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1197         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1198         eval_modes_in[index]  = NULL;
1199         eval_modes_out[index] = NULL;
1200         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1201         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1202         eval_mode_offsets_in[index]  = NULL;
1203         eval_mode_offsets_out[index] = NULL;
1204         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1205         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1206         (*data)->assembled_bases_in[index]  = NULL;
1207         (*data)->assembled_bases_out[index] = NULL;
1208         num_active_bases++;
1209       }
1210       if (eval_mode != CEED_EVAL_WEIGHT) {
1211         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1212         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1213         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1214         for (CeedInt d = 0; d < q_comp; d++) {
1215           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1216           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1217           offset += num_comp;
1218         }
1219         num_eval_modes_in[index] += q_comp;
1220       }
1221     }
1222   }
1223   (*data)->num_eval_modes_in    = num_eval_modes_in;
1224   (*data)->eval_modes_in        = eval_modes_in;
1225   (*data)->eval_mode_offsets_in = eval_mode_offsets_in;
1226 
1227   // Determine active output basis
1228   CeedInt num_output_fields;
1229   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1230   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1231   offset = 0;
1232   for (CeedInt i = 0; i < num_output_fields; i++) {
1233     CeedVector vec;
1234     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1235     if (vec == CEED_VECTOR_ACTIVE) {
1236       CeedBasis    basis_out = NULL;
1237       CeedEvalMode eval_mode;
1238       CeedInt      index = -1, dim, num_comp, q_comp;
1239       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1240       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1241       CeedCall(CeedBasisGetDimension(basis_out, &dim));
1242       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1243       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1244       for (CeedInt i = 0; i < num_active_bases; i++) {
1245         if ((*data)->active_bases[i] == basis_out) index = i;
1246       }
1247       if (index == -1) {
1248         CeedElemRestriction elem_rstr_out;
1249 
1250         index = num_active_bases;
1251         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1252         (*data)->active_bases[num_active_bases] = NULL;
1253         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases]));
1254         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1255         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1256         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1257         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases]));
1258         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1259         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1260         num_eval_modes_in[index]  = 0;
1261         num_eval_modes_out[index] = 0;
1262         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1263         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1264         eval_modes_in[index]  = NULL;
1265         eval_modes_out[index] = NULL;
1266         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1267         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1268         eval_mode_offsets_in[index]  = NULL;
1269         eval_mode_offsets_out[index] = NULL;
1270         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1271         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1272         (*data)->assembled_bases_in[index]  = NULL;
1273         (*data)->assembled_bases_out[index] = NULL;
1274         num_active_bases++;
1275       }
1276       if (eval_mode != CEED_EVAL_WEIGHT) {
1277         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1278         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1279         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1280         for (CeedInt d = 0; d < q_comp; d++) {
1281           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1282           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1283           offset += num_comp;
1284         }
1285         num_eval_modes_out[index] += q_comp;
1286       }
1287     }
1288   }
1289   (*data)->num_output_components = offset;
1290   (*data)->num_eval_modes_out    = num_eval_modes_out;
1291   (*data)->eval_modes_out        = eval_modes_out;
1292   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1293   (*data)->num_active_bases      = num_active_bases;
1294 
1295   return CEED_ERROR_SUCCESS;
1296 }
1297 
1298 /**
1299   @brief Get CeedOperator CeedEvalModes for assembly.
1300 
1301   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1302 
1303   @param[in]  data                  CeedOperatorAssemblyData
1304   @param[out] num_active_bases      Total number of active bases
1305   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1306                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1307   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1308   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1309   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1310   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1311   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1312   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1313                                       including contributions of all active bases
1314 
1315   @return An error code: 0 - success, otherwise - failure
1316 
1317 
1318   @ref Backend
1319 **/
1320 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in,
1321                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out,
1322                                          const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) {
1323   if (num_active_bases) *num_active_bases = data->num_active_bases;
1324   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1325   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1326   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1327   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1328   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1329   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1330   if (num_output_components) *num_output_components = data->num_output_components;
1331 
1332   return CEED_ERROR_SUCCESS;
1333 }
1334 
1335 /**
1336   @brief Get CeedOperator CeedBasis data for assembly.
1337 
1338   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1339 
1340   @param[in]  data                CeedOperatorAssemblyData
1341   @param[out] num_active_bases    Number of active bases, or NULL
1342   @param[out] active_bases        Pointer to hold active CeedBasis, or NULL
1343   @param[out] assembled_bases_in  Pointer to hold assembled active input B, or NULL
1344   @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL
1345 
1346   @return An error code: 0 - success, otherwise - failure
1347 
1348   @ref Backend
1349 **/
1350 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases,
1351                                      const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) {
1352   // Assemble B_in, B_out if needed
1353   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1354     CeedInt num_qpts;
1355 
1356     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1357     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1358       CeedInt     num_nodes;
1359       CeedScalar *B_in = NULL, *identity = NULL;
1360       bool        has_eval_none = false;
1361 
1362       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1363       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1364 
1365       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1366         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1367       }
1368       if (has_eval_none) {
1369         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1370         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1371           identity[i * num_nodes + i] = 1.0;
1372         }
1373       }
1374 
1375       for (CeedInt q = 0; q < num_qpts; q++) {
1376         for (CeedInt n = 0; n < num_nodes; n++) {
1377           CeedInt      d_in              = 0, q_comp_in;
1378           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1379           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1380             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1381             const CeedScalar *B  = NULL;
1382             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B);
1383             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in));
1384             if (q_comp_in > 1) {
1385               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1386               else B = &B[(++d_in) * num_qpts * num_nodes];
1387             }
1388             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1389             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1390           }
1391         }
1392       }
1393       if (identity) CeedCall(CeedFree(identity));
1394       data->assembled_bases_in[b] = B_in;
1395     }
1396   }
1397 
1398   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1399     CeedInt num_qpts;
1400 
1401     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1402     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1403       CeedInt     num_nodes;
1404       bool        has_eval_none = false;
1405       CeedScalar *B_out = NULL, *identity = NULL;
1406 
1407       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1408       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1409 
1410       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1411         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1412       }
1413       if (has_eval_none) {
1414         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1415         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1416           identity[i * num_nodes + i] = 1.0;
1417         }
1418       }
1419 
1420       for (CeedInt q = 0; q < num_qpts; q++) {
1421         for (CeedInt n = 0; n < num_nodes; n++) {
1422           CeedInt      d_out              = 0, q_comp_out;
1423           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1424           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1425             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1426             const CeedScalar *B  = NULL;
1427             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B);
1428             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out));
1429             if (q_comp_out > 1) {
1430               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1431               else B = &B[(++d_out) * num_qpts * num_nodes];
1432             }
1433             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1434             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1435           }
1436         }
1437       }
1438       if (identity) CeedCall(CeedFree(identity));
1439       data->assembled_bases_out[b] = B_out;
1440     }
1441   }
1442 
1443   // Pass out assembled data
1444   if (active_bases) *active_bases = data->active_bases;
1445   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1446   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1447 
1448   return CEED_ERROR_SUCCESS;
1449 }
1450 
1451 /**
1452   @brief Get CeedOperator CeedBasis data for assembly.
1453 
1454   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1455 
1456   @param[in]  data                  CeedOperatorAssemblyData
1457   @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL
1458   @param[out] active_elem_rstrs     Pointer to hold active CeedElemRestrictions, or NULL
1459 
1460   @return An error code: 0 - success, otherwise - failure
1461 
1462   @ref Backend
1463 **/
1464 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs,
1465                                                 CeedElemRestriction **active_elem_rstrs) {
1466   if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases;
1467   if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs;
1468 
1469   return CEED_ERROR_SUCCESS;
1470 }
1471 
1472 /**
1473   @brief Destroy CeedOperatorAssemblyData
1474 
1475   @param[in,out] data CeedOperatorAssemblyData to destroy
1476 
1477   @return An error code: 0 - success, otherwise - failure
1478 
1479   @ref Backend
1480 **/
1481 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1482   if (!*data) {
1483     *data = NULL;
1484     return CEED_ERROR_SUCCESS;
1485   }
1486   CeedCall(CeedDestroy(&(*data)->ceed));
1487   for (CeedInt b = 0; b < (*data)->num_active_bases; b++) {
1488     CeedCall(CeedBasisDestroy(&(*data)->active_bases[b]));
1489     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b]));
1490     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1491     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1492     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1493     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1494     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1495     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1496   }
1497   CeedCall(CeedFree(&(*data)->active_bases));
1498   CeedCall(CeedFree(&(*data)->active_elem_rstrs));
1499   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1500   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1501   CeedCall(CeedFree(&(*data)->eval_modes_in));
1502   CeedCall(CeedFree(&(*data)->eval_modes_out));
1503   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1504   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1505   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1506   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1507 
1508   CeedCall(CeedFree(data));
1509   return CEED_ERROR_SUCCESS;
1510 }
1511 
1512 /// @}
1513 
1514 /// ----------------------------------------------------------------------------
1515 /// CeedOperator Public API
1516 /// ----------------------------------------------------------------------------
1517 /// @addtogroup CeedOperatorUser
1518 /// @{
1519 
1520 /**
1521   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1522 
1523   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1524   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1525 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1526 
1527   Inputs and outputs are in the order provided by the user when adding CeedOperator fields.
1528   For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction
1529 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1530 
1531   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1532 
1533   @param[in]  op        CeedOperator to assemble CeedQFunction
1534   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1535   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1536   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1537 
1538   @return An error code: 0 - success, otherwise - failure
1539 
1540   @ref User
1541 **/
1542 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1543   CeedCall(CeedOperatorCheckReady(op));
1544 
1545   if (op->LinearAssembleQFunction) {
1546     // Backend version
1547     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1548   } else {
1549     // Operator fallback
1550     CeedOperator op_fallback;
1551 
1552     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1553     if (op_fallback) {
1554       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1555     } else {
1556       // LCOV_EXCL_START
1557       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1558       // LCOV_EXCL_STOP
1559     }
1560   }
1561   return CEED_ERROR_SUCCESS;
1562 }
1563 
1564 /**
1565   @brief Assemble CeedQFunction and store result internally.
1566 
1567   Return copied references of stored data to the caller.
1568   Caller is responsible for ownership and destruction of the copied references.
1569   See also @ref CeedOperatorLinearAssembleQFunction
1570 
1571   @param[in]  op        CeedOperator to assemble CeedQFunction
1572   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1573   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1574   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1575 
1576   @return An error code: 0 - success, otherwise - failure
1577 
1578   @ref User
1579 **/
1580 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1581   CeedCall(CeedOperatorCheckReady(op));
1582 
1583   if (op->LinearAssembleQFunctionUpdate) {
1584     // Backend version
1585     bool                qf_assembled_is_setup;
1586     CeedVector          assembled_vec  = NULL;
1587     CeedElemRestriction assembled_rstr = NULL;
1588 
1589     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1590     if (qf_assembled_is_setup) {
1591       bool update_needed;
1592 
1593       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1594       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1595       if (update_needed) {
1596         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1597       }
1598     } else {
1599       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1600       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1601     }
1602     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1603 
1604     // Copy reference from internally held copy
1605     *assembled = NULL;
1606     *rstr      = NULL;
1607     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1608     CeedCall(CeedVectorDestroy(&assembled_vec));
1609     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1610     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1611   } else {
1612     // Operator fallback
1613     CeedOperator op_fallback;
1614 
1615     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1616     if (op_fallback) {
1617       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1618     } else {
1619       // LCOV_EXCL_START
1620       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1621       // LCOV_EXCL_STOP
1622     }
1623   }
1624 
1625   return CEED_ERROR_SUCCESS;
1626 }
1627 
1628 /**
1629   @brief Assemble the diagonal of a square linear CeedOperator
1630 
1631   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1632 
1633   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1634 
1635   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1636 
1637   @param[in]  op        CeedOperator to assemble CeedQFunction
1638   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1639   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1640 
1641   @return An error code: 0 - success, otherwise - failure
1642 
1643   @ref User
1644 **/
1645 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1646   bool is_composite;
1647   CeedCall(CeedOperatorCheckReady(op));
1648   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1649 
1650   CeedSize input_size = 0, output_size = 0;
1651   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1652   if (input_size != output_size) {
1653     // LCOV_EXCL_START
1654     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1655     // LCOV_EXCL_STOP
1656   }
1657 
1658   // Early exit for empty operator
1659   if (!is_composite) {
1660     CeedInt num_elem = 0;
1661 
1662     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1663     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1664   }
1665 
1666   if (op->LinearAssembleDiagonal) {
1667     // Backend version
1668     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1669     return CEED_ERROR_SUCCESS;
1670   } else if (op->LinearAssembleAddDiagonal) {
1671     // Backend version with zeroing first
1672     CeedCall(CeedVectorSetValue(assembled, 0.0));
1673     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1674     return CEED_ERROR_SUCCESS;
1675   } else {
1676     // Operator fallback
1677     CeedOperator op_fallback;
1678 
1679     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1680     if (op_fallback) {
1681       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1682       return CEED_ERROR_SUCCESS;
1683     }
1684   }
1685   // Default interface implementation
1686   CeedCall(CeedVectorSetValue(assembled, 0.0));
1687   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1688 
1689   return CEED_ERROR_SUCCESS;
1690 }
1691 
1692 /**
1693   @brief Assemble the diagonal of a square linear CeedOperator
1694 
1695   This sums into a CeedVector the diagonal of a linear CeedOperator.
1696 
1697   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1698 
1699   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1700 
1701   @param[in]  op        CeedOperator to assemble CeedQFunction
1702   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1703   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1704 
1705   @return An error code: 0 - success, otherwise - failure
1706 
1707   @ref User
1708 **/
1709 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1710   bool is_composite;
1711   CeedCall(CeedOperatorCheckReady(op));
1712   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1713 
1714   CeedSize input_size = 0, output_size = 0;
1715   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1716   if (input_size != output_size) {
1717     // LCOV_EXCL_START
1718     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1719     // LCOV_EXCL_STOP
1720   }
1721 
1722   // Early exit for empty operator
1723   if (!is_composite) {
1724     CeedInt num_elem = 0;
1725 
1726     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1727     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1728   }
1729 
1730   if (op->LinearAssembleAddDiagonal) {
1731     // Backend version
1732     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1733     return CEED_ERROR_SUCCESS;
1734   } else {
1735     // Operator fallback
1736     CeedOperator op_fallback;
1737 
1738     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1739     if (op_fallback) {
1740       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1741       return CEED_ERROR_SUCCESS;
1742     }
1743   }
1744   // Default interface implementation
1745   if (is_composite) {
1746     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1747   } else {
1748     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1749   }
1750 
1751   return CEED_ERROR_SUCCESS;
1752 }
1753 
1754 /**
1755   @brief Assemble the point block diagonal of a square linear CeedOperator
1756 
1757   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1758 
1759   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1760 
1761   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1762 
1763   @param[in]  op        CeedOperator to assemble CeedQFunction
1764   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1765 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1766 component in].
1767   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1768 
1769   @return An error code: 0 - success, otherwise - failure
1770 
1771   @ref User
1772 **/
1773 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1774   bool is_composite;
1775   CeedCall(CeedOperatorCheckReady(op));
1776   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1777 
1778   CeedSize input_size = 0, output_size = 0;
1779   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1780   if (input_size != output_size) {
1781     // LCOV_EXCL_START
1782     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1783     // LCOV_EXCL_STOP
1784   }
1785 
1786   // Early exit for empty operator
1787   if (!is_composite) {
1788     CeedInt num_elem = 0;
1789 
1790     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1791     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1792   }
1793 
1794   if (op->LinearAssemblePointBlockDiagonal) {
1795     // Backend version
1796     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1797     return CEED_ERROR_SUCCESS;
1798   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1799     // Backend version with zeroing first
1800     CeedCall(CeedVectorSetValue(assembled, 0.0));
1801     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1802     return CEED_ERROR_SUCCESS;
1803   } else {
1804     // Operator fallback
1805     CeedOperator op_fallback;
1806 
1807     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1808     if (op_fallback) {
1809       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1810       return CEED_ERROR_SUCCESS;
1811     }
1812   }
1813   // Default interface implementation
1814   CeedCall(CeedVectorSetValue(assembled, 0.0));
1815   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1816 
1817   return CEED_ERROR_SUCCESS;
1818 }
1819 
1820 /**
1821   @brief Assemble the point block diagonal of a square linear CeedOperator
1822 
1823   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1824 
1825   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1826 
1827   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1828 
1829   @param[in]  op        CeedOperator to assemble CeedQFunction
1830   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1831 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1832 component in].
1833   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1834 
1835   @return An error code: 0 - success, otherwise - failure
1836 
1837   @ref User
1838 **/
1839 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1840   bool is_composite;
1841   CeedCall(CeedOperatorCheckReady(op));
1842   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1843 
1844   CeedSize input_size = 0, output_size = 0;
1845   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1846   if (input_size != output_size) {
1847     // LCOV_EXCL_START
1848     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1849     // LCOV_EXCL_STOP
1850   }
1851 
1852   // Early exit for empty operator
1853   if (!is_composite) {
1854     CeedInt num_elem = 0;
1855 
1856     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1857     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1858   }
1859 
1860   if (op->LinearAssembleAddPointBlockDiagonal) {
1861     // Backend version
1862     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1863     return CEED_ERROR_SUCCESS;
1864   } else {
1865     // Operator fallback
1866     CeedOperator op_fallback;
1867 
1868     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1869     if (op_fallback) {
1870       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1871       return CEED_ERROR_SUCCESS;
1872     }
1873   }
1874   // Default interface implementation
1875   if (is_composite) {
1876     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1877   } else {
1878     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1879   }
1880 
1881   return CEED_ERROR_SUCCESS;
1882 }
1883 
1884 /**
1885    @brief Fully assemble the nonzero pattern of a linear operator.
1886 
1887    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1888 
1889    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1890 matrix in entry (i, j).
1891   Note that the (i, j) pairs are not unique and may repeat.
1892   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1893 
1894    This will generally be slow unless your operator is low-order.
1895 
1896    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1897 
1898    @param[in]  op          CeedOperator to assemble
1899    @param[out] num_entries Number of entries in coordinate nonzero pattern
1900    @param[out] rows        Row number for each entry
1901    @param[out] cols        Column number for each entry
1902 
1903    @ref User
1904 **/
1905 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1906   CeedInt       num_suboperators, single_entries;
1907   CeedOperator *sub_operators;
1908   bool          is_composite;
1909   CeedCall(CeedOperatorCheckReady(op));
1910   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1911 
1912   if (op->LinearAssembleSymbolic) {
1913     // Backend version
1914     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1915     return CEED_ERROR_SUCCESS;
1916   } else {
1917     // Operator fallback
1918     CeedOperator op_fallback;
1919 
1920     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1921     if (op_fallback) {
1922       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1923       return CEED_ERROR_SUCCESS;
1924     }
1925   }
1926 
1927   // Default interface implementation
1928 
1929   // count entries and allocate rows, cols arrays
1930   *num_entries = 0;
1931   if (is_composite) {
1932     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1933     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1934     for (CeedInt k = 0; k < num_suboperators; ++k) {
1935       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1936       *num_entries += single_entries;
1937     }
1938   } else {
1939     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1940     *num_entries += single_entries;
1941   }
1942   CeedCall(CeedCalloc(*num_entries, rows));
1943   CeedCall(CeedCalloc(*num_entries, cols));
1944 
1945   // assemble nonzero locations
1946   CeedInt offset = 0;
1947   if (is_composite) {
1948     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1949     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1950     for (CeedInt k = 0; k < num_suboperators; ++k) {
1951       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1952       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1953       offset += single_entries;
1954     }
1955   } else {
1956     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1957   }
1958 
1959   return CEED_ERROR_SUCCESS;
1960 }
1961 
1962 /**
1963    @brief Fully assemble the nonzero entries of a linear operator.
1964 
1965    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1966 
1967    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1968 matrix in entry (i, j).
1969   Note that the (i, j) pairs are not unique and may repeat.
1970   This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1971 
1972    This will generally be slow unless your operator is low-order.
1973 
1974    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1975 
1976    @param[in]  op     CeedOperator to assemble
1977    @param[out] values Values to assemble into matrix
1978 
1979    @ref User
1980 **/
1981 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1982   CeedInt       num_suboperators, single_entries = 0;
1983   CeedOperator *sub_operators;
1984   bool          is_composite;
1985   CeedCall(CeedOperatorCheckReady(op));
1986   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1987 
1988   // Early exit for empty operator
1989   if (!is_composite) {
1990     CeedInt num_elem = 0;
1991 
1992     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1993     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1994   }
1995 
1996   if (op->LinearAssemble) {
1997     // Backend version
1998     CeedCall(op->LinearAssemble(op, values));
1999     return CEED_ERROR_SUCCESS;
2000   } else {
2001     // Operator fallback
2002     CeedOperator op_fallback;
2003 
2004     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2005     if (op_fallback) {
2006       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2007       return CEED_ERROR_SUCCESS;
2008     }
2009   }
2010 
2011   // Default interface implementation
2012   CeedInt offset = 0;
2013   CeedCall(CeedVectorSetValue(values, 0.0));
2014   if (is_composite) {
2015     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2016     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2017     for (CeedInt k = 0; k < num_suboperators; k++) {
2018       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2019       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2020       offset += single_entries;
2021     }
2022   } else {
2023     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2024   }
2025 
2026   return CEED_ERROR_SUCCESS;
2027 }
2028 
2029 /**
2030   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2031 
2032   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2033 
2034   @param[in]  op               Composite CeedOperator
2035   @param[in]  num_skip_indices Number of suboperators to skip
2036   @param[in]  skip_indices     Array of indices of suboperators to skip
2037   @param[out] mult             Vector to store multiplicity (of size l_size)
2038 
2039   @return An error code: 0 - success, otherwise - failure
2040 
2041   @ref User
2042 **/
2043 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2044   CeedCall(CeedOperatorCheckReady(op));
2045 
2046   Ceed                ceed;
2047   CeedInt             num_suboperators;
2048   CeedSize            l_vec_len;
2049   CeedScalar         *mult_array;
2050   CeedVector          ones_l_vec;
2051   CeedElemRestriction elem_rstr;
2052   CeedOperator       *sub_operators;
2053 
2054   CeedCall(CeedOperatorGetCeed(op, &ceed));
2055 
2056   // Zero mult vector
2057   CeedCall(CeedVectorSetValue(mult, 0.0));
2058 
2059   // Get suboperators
2060   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2061   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2062   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2063 
2064   // Work vector
2065   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2066   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2067   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2068   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2069 
2070   // Compute multiplicity across suboperators
2071   for (CeedInt i = 0; i < num_suboperators; i++) {
2072     const CeedScalar *sub_mult_array;
2073     CeedVector        sub_mult_l_vec, ones_e_vec;
2074 
2075     // -- Check for suboperator to skip
2076     for (CeedInt j = 0; j < num_skip_indices; j++) {
2077       if (skip_indices[j] == i) continue;
2078     }
2079 
2080     // -- Sub operator multiplicity
2081     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2082     CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2083     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2084     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2085     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2086     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2087     // ---- Flag every node present in the current suboperator
2088     for (CeedInt j = 0; j < l_vec_len; j++) {
2089       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2090     }
2091     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2092     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2093     CeedCall(CeedVectorDestroy(&ones_e_vec));
2094   }
2095   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2096   CeedCall(CeedVectorDestroy(&ones_l_vec));
2097 
2098   return CEED_ERROR_SUCCESS;
2099 }
2100 
2101 /**
2102   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2103 grid interpolation
2104 
2105   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2106 
2107   @param[in]  op_fine      Fine grid operator
2108   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2109   @param[in]  rstr_coarse  Coarse grid restriction
2110   @param[in]  basis_coarse Coarse grid active vector basis
2111   @param[out] op_coarse    Coarse grid operator
2112   @param[out] op_prolong   Coarse to fine operator, or NULL
2113   @param[out] op_restrict  Fine to coarse operator, or NULL
2114 
2115   @return An error code: 0 - success, otherwise - failure
2116 
2117   @ref User
2118 **/
2119 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2120                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2121   CeedCall(CeedOperatorCheckReady(op_fine));
2122 
2123   // Build prolongation matrix, if required
2124   CeedBasis basis_c_to_f = NULL;
2125   if (op_prolong || op_restrict) {
2126     CeedBasis basis_fine;
2127     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2128     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2129   }
2130 
2131   // Core code
2132   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2133 
2134   return CEED_ERROR_SUCCESS;
2135 }
2136 
2137 /**
2138   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2139 
2140   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2141 
2142   @param[in]  op_fine       Fine grid operator
2143   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2144   @param[in]  rstr_coarse   Coarse grid restriction
2145   @param[in]  basis_coarse  Coarse grid active vector basis
2146   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2147   @param[out] op_coarse     Coarse grid operator
2148   @param[out] op_prolong    Coarse to fine operator, or NULL
2149   @param[out] op_restrict   Fine to coarse operator, or NULL
2150 
2151   @return An error code: 0 - success, otherwise - failure
2152 
2153   @ref User
2154 **/
2155 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2156                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2157                                              CeedOperator *op_restrict) {
2158   CeedCall(CeedOperatorCheckReady(op_fine));
2159   Ceed ceed;
2160   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2161 
2162   // Check for compatible quadrature spaces
2163   CeedBasis basis_fine;
2164   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2165   CeedInt Q_f, Q_c;
2166   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2167   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2168   if (Q_f != Q_c) {
2169     // LCOV_EXCL_START
2170     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2171     // LCOV_EXCL_STOP
2172   }
2173 
2174   // Create coarse to fine basis, if required
2175   CeedBasis basis_c_to_f = NULL;
2176   if (op_prolong || op_restrict) {
2177     // Check if interpolation matrix is provided
2178     if (!interp_c_to_f) {
2179       // LCOV_EXCL_START
2180       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2181       // LCOV_EXCL_STOP
2182     }
2183     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2184     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2185     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2186     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2187     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2188     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2189     CeedScalar *q_ref, *q_weight, *grad;
2190     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2191     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2192     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2193     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2194     CeedCall(CeedFree(&q_ref));
2195     CeedCall(CeedFree(&q_weight));
2196     CeedCall(CeedFree(&grad));
2197   }
2198 
2199   // Core code
2200   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2201   return CEED_ERROR_SUCCESS;
2202 }
2203 
2204 /**
2205   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2206 
2207   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2208 
2209   @param[in]  op_fine       Fine grid operator
2210   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2211   @param[in]  rstr_coarse   Coarse grid restriction
2212   @param[in]  basis_coarse  Coarse grid active vector basis
2213   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2214   @param[out] op_coarse     Coarse grid operator
2215   @param[out] op_prolong    Coarse to fine operator, or NULL
2216   @param[out] op_restrict   Fine to coarse operator, or NULL
2217 
2218   @return An error code: 0 - success, otherwise - failure
2219 
2220   @ref User
2221 **/
2222 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2223                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2224                                        CeedOperator *op_restrict) {
2225   CeedCall(CeedOperatorCheckReady(op_fine));
2226   Ceed ceed;
2227   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2228 
2229   // Check for compatible quadrature spaces
2230   CeedBasis basis_fine;
2231   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2232   CeedInt Q_f, Q_c;
2233   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2234   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2235   if (Q_f != Q_c) {
2236     // LCOV_EXCL_START
2237     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2238     // LCOV_EXCL_STOP
2239   }
2240 
2241   // Coarse to fine basis
2242   CeedBasis basis_c_to_f = NULL;
2243   if (op_prolong || op_restrict) {
2244     // Check if interpolation matrix is provided
2245     if (!interp_c_to_f) {
2246       // LCOV_EXCL_START
2247       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2248       // LCOV_EXCL_STOP
2249     }
2250     CeedElemTopology topo;
2251     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2252     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2253     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2254     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2255     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2256     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2257     CeedScalar *q_ref, *q_weight, *grad;
2258     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2259     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2260     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2261     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2262     CeedCall(CeedFree(&q_ref));
2263     CeedCall(CeedFree(&q_weight));
2264     CeedCall(CeedFree(&grad));
2265   }
2266 
2267   // Core code
2268   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2269   return CEED_ERROR_SUCCESS;
2270 }
2271 
2272 /**
2273   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2274 
2275   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2276   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2277   The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2278 \hat S V\f$.
2279   The CeedOperator must be linear and non-composite.
2280   The associated CeedQFunction must therefore also be linear.
2281 
2282   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2283 
2284   @param[in]  op      CeedOperator to create element inverses
2285   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2286   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2287 
2288   @return An error code: 0 - success, otherwise - failure
2289 
2290   @ref User
2291 **/
2292 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2293   CeedCall(CeedOperatorCheckReady(op));
2294 
2295   if (op->CreateFDMElementInverse) {
2296     // Backend version
2297     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2298     return CEED_ERROR_SUCCESS;
2299   } else {
2300     // Operator fallback
2301     CeedOperator op_fallback;
2302 
2303     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2304     if (op_fallback) {
2305       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2306       return CEED_ERROR_SUCCESS;
2307     }
2308   }
2309 
2310   // Default interface implementation
2311   Ceed ceed, ceed_parent;
2312   CeedCall(CeedOperatorGetCeed(op, &ceed));
2313   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2314   ceed_parent = ceed_parent ? ceed_parent : ceed;
2315   CeedQFunction qf;
2316   CeedCall(CeedOperatorGetQFunction(op, &qf));
2317 
2318   // Determine active input basis
2319   bool                interp = false, grad = false;
2320   CeedBasis           basis = NULL;
2321   CeedElemRestriction rstr  = NULL;
2322   CeedOperatorField  *op_fields;
2323   CeedQFunctionField *qf_fields;
2324   CeedInt             num_input_fields;
2325   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2326   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2327   for (CeedInt i = 0; i < num_input_fields; i++) {
2328     CeedVector vec;
2329     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2330     if (vec == CEED_VECTOR_ACTIVE) {
2331       CeedEvalMode eval_mode;
2332       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2333       interp = interp || eval_mode == CEED_EVAL_INTERP;
2334       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2335       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2336       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2337     }
2338   }
2339   if (!basis) {
2340     // LCOV_EXCL_START
2341     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2342     // LCOV_EXCL_STOP
2343   }
2344   CeedSize l_size = 1;
2345   CeedInt  P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2346   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2347   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2348   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2349   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2350   CeedCall(CeedBasisGetDimension(basis, &dim));
2351   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2352   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2353   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2354 
2355   // Build and diagonalize 1D Mass and Laplacian
2356   bool tensor_basis;
2357   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2358   if (!tensor_basis) {
2359     // LCOV_EXCL_START
2360     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2361     // LCOV_EXCL_STOP
2362   }
2363   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2364   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2365   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2366   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2367   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2368   CeedCall(CeedCalloc(P_1d, &lambda));
2369   // -- Build matrices
2370   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2371   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2372   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2373   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2374   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2375 
2376   // -- Diagonalize
2377   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2378   CeedCall(CeedFree(&mass));
2379   CeedCall(CeedFree(&laplace));
2380   for (CeedInt i = 0; i < P_1d; i++) {
2381     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2382   }
2383   CeedCall(CeedFree(&x));
2384 
2385   // Assemble QFunction
2386   CeedVector          assembled;
2387   CeedElemRestriction rstr_qf;
2388   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2389   CeedInt layout[3];
2390   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2391   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2392   CeedScalar max_norm = 0;
2393   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2394 
2395   // Calculate element averages
2396   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2397   CeedScalar       *elem_avg;
2398   const CeedScalar *assembled_array, *q_weight_array;
2399   CeedVector        q_weight;
2400   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2401   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2402   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2403   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2404   CeedCall(CeedCalloc(num_elem, &elem_avg));
2405   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2406   for (CeedInt e = 0; e < num_elem; e++) {
2407     CeedInt count = 0;
2408     for (CeedInt q = 0; q < num_qpts; q++) {
2409       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2410         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2411           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2412           count++;
2413         }
2414       }
2415     }
2416     if (count) {
2417       elem_avg[e] /= count;
2418     } else {
2419       elem_avg[e] = 1.0;
2420     }
2421   }
2422   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2423   CeedCall(CeedVectorDestroy(&assembled));
2424   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2425   CeedCall(CeedVectorDestroy(&q_weight));
2426 
2427   // Build FDM diagonal
2428   CeedVector  q_data;
2429   CeedScalar *q_data_array, *fdm_diagonal;
2430   CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2431   const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2432   for (CeedInt c = 0; c < num_comp; c++) {
2433     for (CeedInt n = 0; n < num_nodes; n++) {
2434       if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2435       if (grad) {
2436         for (CeedInt d = 0; d < dim; d++) {
2437           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2438           fdm_diagonal[c * num_nodes + n] += lambda[i];
2439         }
2440       }
2441       if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2442     }
2443   }
2444   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2445   CeedCall(CeedVectorSetValue(q_data, 0.0));
2446   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2447   for (CeedInt e = 0; e < num_elem; e++) {
2448     for (CeedInt c = 0; c < num_comp; c++) {
2449       for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2450     }
2451   }
2452   CeedCall(CeedFree(&elem_avg));
2453   CeedCall(CeedFree(&fdm_diagonal));
2454   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2455 
2456   // Setup FDM operator
2457   // -- Basis
2458   CeedBasis   fdm_basis;
2459   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2460   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2461   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2462   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2463   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2464   CeedCall(CeedFree(&fdm_interp));
2465   CeedCall(CeedFree(&grad_dummy));
2466   CeedCall(CeedFree(&q_ref_dummy));
2467   CeedCall(CeedFree(&q_weight_dummy));
2468   CeedCall(CeedFree(&lambda));
2469 
2470   // -- Restriction
2471   CeedElemRestriction rstr_qd_i;
2472   CeedInt             strides[3] = {1, num_nodes, num_nodes * num_comp};
2473   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2474   // -- QFunction
2475   CeedQFunction qf_fdm;
2476   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2477   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2478   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2479   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2480   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2481   // -- QFunction context
2482   CeedInt *num_comp_data;
2483   CeedCall(CeedCalloc(1, &num_comp_data));
2484   num_comp_data[0] = num_comp;
2485   CeedQFunctionContext ctx_fdm;
2486   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2487   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2488   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2489   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2490   // -- Operator
2491   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2492   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2493   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2494   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2495 
2496   // Cleanup
2497   CeedCall(CeedVectorDestroy(&q_data));
2498   CeedCall(CeedBasisDestroy(&fdm_basis));
2499   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2500   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2501 
2502   return CEED_ERROR_SUCCESS;
2503 }
2504 
2505 /// @}
2506