xref: /libCEED/interface/ceed-preconditioning.c (revision b2e3f8ecbfa285d0f4ffde9b24c57cc13f0319fb)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed.h>
11 #include <ceed/backend.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  basis     CeedBasis from which to get the basis matrix
184   @param[in]  eval_mode Current basis evaluation mode
185   @param[in]  identity  Pointer to identity matrix
186   @param[out] basis_ptr Basis pointer to set
187 
188   @ref Developer
189 **/
190 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
191   switch (eval_mode) {
192     case CEED_EVAL_NONE:
193       *basis_ptr = identity;
194       break;
195     case CEED_EVAL_INTERP:
196       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
197       break;
198     case CEED_EVAL_GRAD:
199       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
200       break;
201     case CEED_EVAL_DIV:
202       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
203       break;
204     case CEED_EVAL_CURL:
205       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
206       break;
207     case CEED_EVAL_WEIGHT:
208       break;  // Caught by QF Assembly
209   }
210   assert(*basis_ptr != NULL);
211 
212   return CEED_ERROR_SUCCESS;
213 }
214 
215 /**
216   @brief Create point block restriction for active operator field
217 
218   @param[in]  rstr            Original CeedElemRestriction for active field
219   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
220 
221   @return An error code: 0 - success, otherwise - failure
222 
223   @ref Developer
224 **/
225 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
226   Ceed ceed;
227   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
228   const CeedInt *offsets;
229   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
230 
231   // Expand offsets
232   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
233   CeedSize l_size;
234   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
235   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
236   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
237   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
238   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
239   CeedInt shift = num_comp;
240   if (comp_stride != 1) shift *= num_comp;
241   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
242   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
243     pointblock_offsets[i] = offsets[i] * shift;
244   }
245 
246   // Create new restriction
247   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
248                                      pointblock_offsets, pointblock_rstr));
249 
250   // Cleanup
251   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
252 
253   return CEED_ERROR_SUCCESS;
254 }
255 
256 /**
257   @brief Core logic for assembling operator diagonal or point block diagonal
258 
259   @param[in]  op            CeedOperator to assemble point block diagonal
260   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
261   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
262   @param[out] assembled     CeedVector to store assembled diagonal
263 
264   @return An error code: 0 - success, otherwise - failure
265 
266   @ref Developer
267 **/
268 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
269   Ceed ceed;
270   CeedCall(CeedOperatorGetCeed(op, &ceed));
271 
272   // Assemble QFunction
273   CeedQFunction       qf;
274   const CeedScalar   *assembled_qf_array;
275   CeedVector          assembled_qf;
276   CeedElemRestriction assembled_elem_rstr;
277   CeedInt             num_input_fields, num_output_fields;
278   CeedInt             layout[3];
279 
280   CeedCall(CeedOperatorGetQFunction(op, &qf));
281   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
282   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
283   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout));
284   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
285   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
286 
287   // Get assembly data
288   CeedOperatorAssemblyData data;
289   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
290   CeedInt                 *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
291   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
292   CeedBasis               *active_bases;
293   CeedElemRestriction     *active_elem_rstrs;
294   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
295   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
296                                                 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components));
297   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL));
298   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs));
299 
300   // Loop over all active bases
301   for (CeedInt b = 0; b < num_active_bases; b++) {
302     // Assemble point block diagonal restriction, if needed
303     CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b];
304 
305     if (is_pointblock) {
306       CeedElemRestriction point_block_elem_rstr;
307 
308       CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr));
309       diag_elem_rstr = point_block_elem_rstr;
310     }
311 
312     // Create diagonal vector
313     CeedVector elem_diag;
314     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
315 
316     // Assemble element operator diagonals
317     CeedScalar *elem_diag_array;
318     CeedInt     num_elem, num_nodes, num_qpts, num_components;
319 
320     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
321     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
322     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
323     CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes));
324     CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components));
325     CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts));
326 
327     // Construct identity matrix for basis if required
328     bool        has_eval_none = false;
329     CeedScalar *identity      = NULL;
330     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
331       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
332     }
333     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
334       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
335     }
336     if (has_eval_none) {
337       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
338       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
339     }
340 
341     // Compute the diagonal of B^T D B
342     // Each element
343     for (CeedInt e = 0; e < num_elem; e++) {
344       // Each basis eval mode pair
345       CeedInt      d_out              = 0, q_comp_out;
346       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
347       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
348         const CeedScalar *B_t = NULL;
349         CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t);
350         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out));
351         if (q_comp_out > 1) {
352           if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
353           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
354         }
355         eval_mode_out_prev = eval_modes_out[b][e_out];
356 
357         CeedInt      d_in              = 0, q_comp_in;
358         CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
359         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
360           const CeedScalar *B = NULL;
361           CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B);
362           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in));
363           if (q_comp_in > 1) {
364             if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
365             else B = &B[(++d_in) * num_qpts * num_nodes];
366           }
367           eval_mode_in_prev = eval_modes_in[b][e_in];
368 
369           // Each component
370           for (CeedInt c_out = 0; c_out < num_components; c_out++) {
371             // Each qpt/node pair
372             for (CeedInt q = 0; q < num_qpts; q++) {
373               if (is_pointblock) {
374                 // Point Block Diagonal
375                 for (CeedInt c_in = 0; c_in < num_components; c_in++) {
376                   const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
377                   const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
378                   for (CeedInt n = 0; n < num_nodes; n++) {
379                     elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] +=
380                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
381                   }
382                 }
383               } else {
384                 // Diagonal Only
385                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
386                 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
387                 for (CeedInt n = 0; n < num_nodes; n++) {
388                   elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
389                 }
390               }
391             }
392           }
393         }
394       }
395     }
396     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
397 
398     // Assemble local operator diagonal
399     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
400 
401     // Cleanup
402     if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
403     CeedCall(CeedVectorDestroy(&elem_diag));
404     CeedCall(CeedFree(&identity));
405   }
406   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
407   CeedCall(CeedVectorDestroy(&assembled_qf));
408 
409   return CEED_ERROR_SUCCESS;
410 }
411 
412 /**
413   @brief Core logic for assembling composite operator diagonal
414 
415   @param[in]  op            CeedOperator to assemble point block diagonal
416   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
417   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
418   @param[out] assembled     CeedVector to store assembled diagonal
419 
420   @return An error code: 0 - success, otherwise - failure
421 
422   @ref Developer
423 **/
424 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
425                                                                  CeedVector assembled) {
426   CeedInt       num_sub;
427   CeedOperator *suboperators;
428   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
429   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
430   for (CeedInt i = 0; i < num_sub; i++) {
431     if (is_pointblock) {
432       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
433     } else {
434       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
435     }
436   }
437   return CEED_ERROR_SUCCESS;
438 }
439 
440 /**
441   @brief Build nonzero pattern for non-composite operator
442 
443   Users should generally use CeedOperatorLinearAssembleSymbolic()
444 
445   @param[in]  op     CeedOperator to assemble nonzero pattern
446   @param[in]  offset Offset for number of entries
447   @param[out] rows   Row number for each entry
448   @param[out] cols   Column number for each entry
449 
450   @return An error code: 0 - success, otherwise - failure
451 
452   @ref Developer
453 **/
454 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
455   Ceed ceed;
456   bool is_composite;
457   CeedCall(CeedOperatorGetCeed(op, &ceed));
458   CeedCall(CeedOperatorIsComposite(op, &is_composite));
459 
460   if (is_composite) {
461     // LCOV_EXCL_START
462     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
463     // LCOV_EXCL_STOP
464   }
465 
466   CeedSize num_nodes;
467   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
468   CeedElemRestriction rstr_in;
469   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
470   CeedInt num_elem, elem_size, num_comp;
471   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
472   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
473   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
474   CeedInt layout_er[3];
475   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
476 
477   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
478 
479   // Determine elem_dof relation
480   CeedVector index_vec;
481   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
482   CeedScalar *array;
483   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
484   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
485   CeedCall(CeedVectorRestoreArray(index_vec, &array));
486   CeedVector elem_dof;
487   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
488   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
489   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
490   const CeedScalar *elem_dof_a;
491   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
492   CeedCall(CeedVectorDestroy(&index_vec));
493 
494   // Determine i, j locations for element matrices
495   CeedInt count = 0;
496   for (CeedInt e = 0; e < num_elem; e++) {
497     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
498       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
499         for (CeedInt i = 0; i < elem_size; i++) {
500           for (CeedInt j = 0; j < elem_size; j++) {
501             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
502             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
503 
504             const CeedInt row = elem_dof_a[elem_dof_index_row];
505             const CeedInt col = elem_dof_a[elem_dof_index_col];
506 
507             rows[offset + count] = row;
508             cols[offset + count] = col;
509             count++;
510           }
511         }
512       }
513     }
514   }
515   if (count != local_num_entries) {
516     // LCOV_EXCL_START
517     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
518     // LCOV_EXCL_STOP
519   }
520   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
521   CeedCall(CeedVectorDestroy(&elem_dof));
522 
523   return CEED_ERROR_SUCCESS;
524 }
525 
526 /**
527   @brief Assemble nonzero entries for non-composite operator
528 
529   Users should generally use CeedOperatorLinearAssemble()
530 
531   @param[in]  op     CeedOperator to assemble
532   @param[in]  offset Offset for number of entries
533   @param[out] values Values to assemble into matrix
534 
535   @return An error code: 0 - success, otherwise - failure
536 
537   @ref Developer
538 **/
539 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
540   Ceed ceed;
541   bool is_composite;
542   CeedCall(CeedOperatorGetCeed(op, &ceed));
543   CeedCall(CeedOperatorIsComposite(op, &is_composite));
544 
545   if (is_composite) {
546     // LCOV_EXCL_START
547     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
548     // LCOV_EXCL_STOP
549   }
550 
551   // Early exit for empty operator
552   {
553     CeedInt num_elem = 0;
554 
555     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
556     if (num_elem == 0) return CEED_ERROR_SUCCESS;
557   }
558 
559   if (op->LinearAssembleSingle) {
560     // Backend version
561     CeedCall(op->LinearAssembleSingle(op, offset, values));
562     return CEED_ERROR_SUCCESS;
563   } else {
564     // Operator fallback
565     CeedOperator op_fallback;
566 
567     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
568     if (op_fallback) {
569       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
570       return CEED_ERROR_SUCCESS;
571     }
572   }
573 
574   // Assemble QFunction
575   CeedQFunction qf;
576   CeedCall(CeedOperatorGetQFunction(op, &qf));
577   CeedVector          assembled_qf;
578   CeedElemRestriction rstr_q;
579   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
580   CeedSize qf_length;
581   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
582 
583   CeedInt            num_input_fields, num_output_fields;
584   CeedOperatorField *input_fields;
585   CeedOperatorField *output_fields;
586   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
587 
588   // Get assembly data
589   CeedOperatorAssemblyData data;
590   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
591   const CeedEvalMode **eval_modes_in, **eval_modes_out;
592   CeedInt             *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
593   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out,
594                                                 &eval_modes_out, NULL, NULL));
595   CeedBasis *bases;
596   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL));
597   CeedBasis basis_in = bases[0];
598 
599   if (num_active_bases > 1) {
600     // LCOV_EXCL_START
601     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases");
602     // LCOV_EXCL_STOP
603   }
604   if (num_eval_modes_in[0] == 0 || num_eval_modes_out[0] == 0) {
605     // LCOV_EXCL_START
606     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
607     // LCOV_EXCL_STOP
608   }
609 
610   CeedElemRestriction active_rstr;
611   CeedInt             num_elem, elem_size, num_qpts, num_comp;
612   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
613   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
614   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
615   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
616   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
617 
618   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
619 
620   // loop over elements and put in data structure
621   const CeedScalar *assembled_qf_array;
622   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
623 
624   CeedInt layout_qf[3];
625   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
626   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
627 
628   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
629   const CeedScalar **B_mats_in, **B_mats_out;
630   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out));
631   const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0];
632   CeedScalar        BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]];
633   CeedScalar        elem_mat[elem_size * elem_size];
634   CeedInt           count = 0;
635   CeedScalar       *vals;
636   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
637   for (CeedInt e = 0; e < num_elem; e++) {
638     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
639       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
640         // Compute B^T*D
641         for (CeedInt n = 0; n < elem_size; n++) {
642           for (CeedInt q = 0; q < num_qpts; q++) {
643             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
644               const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in);
645               CeedScalar    sum       = 0.0;
646               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
647                 const CeedInt b_out_index     = (num_eval_modes_out[0] * q + e_out) * elem_size + n;
648                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out;
649                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
650                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
651               }
652               BTD_mat[btd_index] = sum;
653             }
654           }
655         }
656         // form element matrix itself (for each block component)
657         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0]));
658 
659         // put element matrix in coordinate data structure
660         for (CeedInt i = 0; i < elem_size; i++) {
661           for (CeedInt j = 0; j < elem_size; j++) {
662             vals[offset + count] = elem_mat[i * elem_size + j];
663             count++;
664           }
665         }
666       }
667     }
668   }
669   if (count != local_num_entries) {
670     // LCOV_EXCL_START
671     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
672     // LCOV_EXCL_STOP
673   }
674   CeedCall(CeedVectorRestoreArray(values, &vals));
675 
676   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
677   CeedCall(CeedVectorDestroy(&assembled_qf));
678 
679   return CEED_ERROR_SUCCESS;
680 }
681 
682 /**
683   @brief Count number of entries for assembled CeedOperator
684 
685   @param[in]  op          CeedOperator to assemble
686   @param[out] num_entries Number of entries in assembled representation
687 
688   @return An error code: 0 - success, otherwise - failure
689 
690   @ref Utility
691 **/
692 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
693   bool                is_composite;
694   CeedElemRestriction rstr;
695   CeedInt             num_elem, elem_size, num_comp;
696 
697   CeedCall(CeedOperatorIsComposite(op, &is_composite));
698   if (is_composite) {
699     // LCOV_EXCL_START
700     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
701     // LCOV_EXCL_STOP
702   }
703   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
704   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
705   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
706   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
707   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
708 
709   return CEED_ERROR_SUCCESS;
710 }
711 
712 /**
713   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
714 
715   @param[in]  op_fine      Fine grid operator
716   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
717   @param[in]  rstr_coarse  Coarse grid restriction
718   @param[in]  basis_coarse Coarse grid active vector basis
719   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
720   @param[out] op_coarse    Coarse grid operator
721   @param[out] op_prolong   Coarse to fine operator, or NULL
722   @param[out] op_restrict  Fine to coarse operator, or NULL
723 
724   @return An error code: 0 - success, otherwise - failure
725 
726   @ref Developer
727 **/
728 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
729                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
730   Ceed       ceed;
731   CeedVector mult_vec = NULL;
732   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
733 
734   // Check for composite operator
735   bool is_composite;
736   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
737   if (is_composite) {
738     // LCOV_EXCL_START
739     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
740     // LCOV_EXCL_STOP
741   }
742 
743   // Coarse Grid
744   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
745   CeedElemRestriction rstr_fine = NULL;
746   // -- Clone input fields
747   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
748     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
749       rstr_fine = op_fine->input_fields[i]->elem_rstr;
750       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
751     } else {
752       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
753                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
754     }
755   }
756   // -- Clone output fields
757   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
758     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
759       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
760     } else {
761       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
762                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
763     }
764   }
765   // -- Clone QFunctionAssemblyData
766   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
767 
768   // Multiplicity vector
769   if (op_restrict || op_prolong) {
770     CeedVector mult_e_vec;
771 
772     if (!p_mult_fine) {
773       // LCOV_EXCL_START
774       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
775       // LCOV_EXCL_STOP
776     }
777     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
778     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
779     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
780     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
781     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
782     CeedCall(CeedVectorDestroy(&mult_e_vec));
783     CeedCall(CeedVectorReciprocal(mult_vec));
784   }
785 
786   // Clone name
787   bool   has_name = op_fine->name;
788   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
789   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
790 
791   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
792   if ((op_restrict || op_prolong) && !basis_c_to_f) {
793     // LCOV_EXCL_START
794     return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis");
795     // LCOV_EXCL_STOP
796   }
797 
798   // Restriction/Prolongation Operators
799   CeedInt num_comp;
800   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
801 
802   // Restriction
803   if (op_restrict) {
804     CeedInt             *num_comp_r_data;
805     CeedQFunction        qf_restrict;
806     CeedQFunctionContext ctx_r;
807 
808     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
809     CeedCall(CeedCalloc(1, &num_comp_r_data));
810     num_comp_r_data[0] = num_comp;
811     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
812     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
813     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
814     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
815     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
816     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
817     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
818     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
819 
820     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
821     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
822     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
823     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
824 
825     // Set name
826     char *restriction_name;
827     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
828     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
829     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
830     CeedCall(CeedFree(&restriction_name));
831 
832     // Check
833     CeedCall(CeedOperatorCheckReady(*op_restrict));
834 
835     // Cleanup
836     CeedCall(CeedQFunctionDestroy(&qf_restrict));
837   }
838 
839   // Prolongation
840   if (op_prolong) {
841     CeedInt             *num_comp_p_data;
842     CeedQFunction        qf_prolong;
843     CeedQFunctionContext ctx_p;
844 
845     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
846     CeedCall(CeedCalloc(1, &num_comp_p_data));
847     num_comp_p_data[0] = num_comp;
848     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
849     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
850     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
851     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
852     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
853     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
854     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
855     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
856 
857     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
858     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
859     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
860     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
861 
862     // Set name
863     char *prolongation_name;
864     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
865     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
866     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
867     CeedCall(CeedFree(&prolongation_name));
868 
869     // Check
870     CeedCall(CeedOperatorCheckReady(*op_prolong));
871 
872     // Cleanup
873     CeedCall(CeedQFunctionDestroy(&qf_prolong));
874   }
875 
876   // Check
877   CeedCall(CeedOperatorCheckReady(*op_coarse));
878 
879   // Cleanup
880   CeedCall(CeedVectorDestroy(&mult_vec));
881   CeedCall(CeedBasisDestroy(&basis_c_to_f));
882 
883   return CEED_ERROR_SUCCESS;
884 }
885 
886 /**
887   @brief Build 1D mass matrix and Laplacian with perturbation
888 
889   @param[in]  interp_1d   Interpolation matrix in one dimension
890   @param[in]  grad_1d     Gradient matrix in one dimension
891   @param[in]  q_weight_1d Quadrature weights in one dimension
892   @param[in]  P_1d        Number of basis nodes in one dimension
893   @param[in]  Q_1d        Number of quadrature points in one dimension
894   @param[in]  dim         Dimension of basis
895   @param[out] mass        Assembled mass matrix in one dimension
896   @param[out] laplace     Assembled perturbed Laplacian in one dimension
897 
898   @return An error code: 0 - success, otherwise - failure
899 
900   @ref Developer
901 **/
902 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d,
903                                                       CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
904   for (CeedInt i = 0; i < P_1d; i++) {
905     for (CeedInt j = 0; j < P_1d; j++) {
906       CeedScalar sum = 0.0;
907       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
908       mass[i + j * P_1d] = sum;
909     }
910   }
911   // -- Laplacian
912   for (CeedInt i = 0; i < P_1d; i++) {
913     for (CeedInt j = 0; j < P_1d; j++) {
914       CeedScalar sum = 0.0;
915       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
916       laplace[i + j * P_1d] = sum;
917     }
918   }
919   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
920   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
921   return CEED_ERROR_SUCCESS;
922 }
923 CeedPragmaOptimizeOn;
924 
925 /// @}
926 
927 /// ----------------------------------------------------------------------------
928 /// CeedOperator Backend API
929 /// ----------------------------------------------------------------------------
930 /// @addtogroup CeedOperatorBackend
931 /// @{
932 
933 /**
934   @brief Create object holding CeedQFunction assembly data for CeedOperator
935 
936   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
937   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
938 
939   @return An error code: 0 - success, otherwise - failure
940 
941   @ref Backend
942 **/
943 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
944   CeedCall(CeedCalloc(1, data));
945   (*data)->ref_count = 1;
946   (*data)->ceed      = ceed;
947   CeedCall(CeedReference(ceed));
948 
949   return CEED_ERROR_SUCCESS;
950 }
951 
952 /**
953   @brief Increment the reference counter for a CeedQFunctionAssemblyData
954 
955   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
956 
957   @return An error code: 0 - success, otherwise - failure
958 
959   @ref Backend
960 **/
961 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
962   data->ref_count++;
963   return CEED_ERROR_SUCCESS;
964 }
965 
966 /**
967   @brief Set re-use of CeedQFunctionAssemblyData
968 
969   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
970   @param[in]     reuse_data Boolean flag indicating data re-use
971 
972   @return An error code: 0 - success, otherwise - failure
973 
974   @ref Backend
975 **/
976 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
977   data->reuse_data        = reuse_data;
978   data->needs_data_update = true;
979   return CEED_ERROR_SUCCESS;
980 }
981 
982 /**
983   @brief Mark QFunctionAssemblyData as stale
984 
985   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
986   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
987 
988   @return An error code: 0 - success, otherwise - failure
989 
990   @ref Backend
991 **/
992 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
993   data->needs_data_update = needs_data_update;
994   return CEED_ERROR_SUCCESS;
995 }
996 
997 /**
998   @brief Determine if QFunctionAssemblyData needs update
999 
1000   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
1001   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1002 
1003   @return An error code: 0 - success, otherwise - failure
1004 
1005   @ref Backend
1006 **/
1007 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1008   *is_update_needed = !data->reuse_data || data->needs_data_update;
1009   return CEED_ERROR_SUCCESS;
1010 }
1011 
1012 /**
1013   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1014            Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1015 
1016            Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
1017              CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
1018              CeedQFunctionAssemblyData.
1019 
1020   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1021   @param[in,out] data_copy Variable to store copied reference
1022 
1023   @return An error code: 0 - success, otherwise - failure
1024 
1025   @ref Backend
1026 **/
1027 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1028   CeedCall(CeedQFunctionAssemblyDataReference(data));
1029   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1030   *data_copy = data;
1031   return CEED_ERROR_SUCCESS;
1032 }
1033 
1034 /**
1035   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1036 
1037   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1038   @param[out] is_setup Boolean flag for setup status
1039 
1040   @return An error code: 0 - success, otherwise - failure
1041 
1042   @ref Backend
1043 **/
1044 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1045   *is_setup = data->is_setup;
1046   return CEED_ERROR_SUCCESS;
1047 }
1048 
1049 /**
1050   @brief Set internal objects for CeedQFunctionAssemblyData
1051 
1052   @param[in,out] data CeedQFunctionAssemblyData to set objects
1053   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1054   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1055 
1056   @return An error code: 0 - success, otherwise - failure
1057 
1058   @ref Backend
1059 **/
1060 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1061   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1062   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1063 
1064   data->is_setup = true;
1065   return CEED_ERROR_SUCCESS;
1066 }
1067 
1068 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1069   if (!data->is_setup) {
1070     // LCOV_EXCL_START
1071     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1072     // LCOV_EXCL_STOP
1073   }
1074 
1075   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1076   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1077 
1078   return CEED_ERROR_SUCCESS;
1079 }
1080 
1081 /**
1082   @brief Destroy CeedQFunctionAssemblyData
1083 
1084   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1085 
1086   @return An error code: 0 - success, otherwise - failure
1087 
1088   @ref Backend
1089 **/
1090 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1091   if (!*data || --(*data)->ref_count > 0) {
1092     *data = NULL;
1093     return CEED_ERROR_SUCCESS;
1094   }
1095   CeedCall(CeedDestroy(&(*data)->ceed));
1096   CeedCall(CeedVectorDestroy(&(*data)->vec));
1097   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1098 
1099   CeedCall(CeedFree(data));
1100   return CEED_ERROR_SUCCESS;
1101 }
1102 
1103 /**
1104   @brief Get CeedOperatorAssemblyData
1105 
1106   @param[in]  op   CeedOperator to assemble
1107   @param[out] data CeedQFunctionAssemblyData
1108 
1109   @return An error code: 0 - success, otherwise - failure
1110 
1111   @ref Backend
1112 **/
1113 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1114   if (!op->op_assembled) {
1115     CeedOperatorAssemblyData data;
1116 
1117     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1118     op->op_assembled = data;
1119   }
1120   *data = op->op_assembled;
1121 
1122   return CEED_ERROR_SUCCESS;
1123 }
1124 
1125 /**
1126   @brief Create object holding CeedOperator assembly data.
1127 
1128     The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1129     An array with references to the corresponding active CeedElemRestrictions is also stored.
1130     For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1131     The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1132       CeedEvalMode.
1133     The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1134     Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1135 
1136   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1137   @param[in]  op   CeedOperator to be assembled
1138   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1139 
1140   @return An error code: 0 - success, otherwise - failure
1141 
1142   @ref Backend
1143 **/
1144 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1145   CeedInt num_active_bases = 0;
1146 
1147   // Allocate
1148   CeedCall(CeedCalloc(1, data));
1149   (*data)->ceed = ceed;
1150   CeedCall(CeedReference(ceed));
1151 
1152   // Build OperatorAssembly data
1153   CeedQFunction       qf;
1154   CeedQFunctionField *qf_fields;
1155   CeedOperatorField  *op_fields;
1156   CeedInt             num_input_fields;
1157   CeedCall(CeedOperatorGetQFunction(op, &qf));
1158   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1159   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1160 
1161   // Determine active input basis
1162   CeedInt       *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0;
1163   CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL;
1164   CeedSize     **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1165   for (CeedInt i = 0; i < num_input_fields; i++) {
1166     CeedVector vec;
1167     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1168     if (vec == CEED_VECTOR_ACTIVE) {
1169       CeedBasis    basis_in = NULL;
1170       CeedEvalMode eval_mode;
1171       CeedInt      index = -1, dim, num_comp, q_comp;
1172       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1173       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1174       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1175       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1176       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1177       for (CeedInt i = 0; i < num_active_bases; i++) {
1178         if ((*data)->active_bases[i] == basis_in) index = i;
1179       }
1180       if (index == -1) {
1181         CeedElemRestriction elem_rstr_in;
1182         index = num_active_bases;
1183         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1184         (*data)->active_bases[num_active_bases] = NULL;
1185         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases]));
1186         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1187         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1188         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1189         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases]));
1190         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1191         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1192         num_eval_modes_in[index]  = 0;
1193         num_eval_modes_out[index] = 0;
1194         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1195         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1196         eval_modes_in[index]  = NULL;
1197         eval_modes_out[index] = NULL;
1198         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1199         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1200         eval_mode_offsets_in[index]  = NULL;
1201         eval_mode_offsets_out[index] = NULL;
1202         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1203         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1204         (*data)->assembled_bases_in[index]  = NULL;
1205         (*data)->assembled_bases_out[index] = NULL;
1206         num_active_bases++;
1207       }
1208       if (eval_mode != CEED_EVAL_WEIGHT) {
1209         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1210         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1211         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1212         for (CeedInt d = 0; d < q_comp; d++) {
1213           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1214           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1215           offset += num_comp;
1216         }
1217         num_eval_modes_in[index] += q_comp;
1218       }
1219     }
1220   }
1221   (*data)->num_eval_modes_in    = num_eval_modes_in;
1222   (*data)->eval_modes_in        = eval_modes_in;
1223   (*data)->eval_mode_offsets_in = eval_mode_offsets_in;
1224 
1225   // Determine active output basis
1226   CeedInt num_output_fields;
1227   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1228   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1229   offset = 0;
1230   for (CeedInt i = 0; i < num_output_fields; i++) {
1231     CeedVector vec;
1232     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1233     if (vec == CEED_VECTOR_ACTIVE) {
1234       CeedBasis    basis_out = NULL;
1235       CeedEvalMode eval_mode;
1236       CeedInt      index = -1, dim, num_comp, q_comp;
1237       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1238       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1239       CeedCall(CeedBasisGetDimension(basis_out, &dim));
1240       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1241       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1242       for (CeedInt i = 0; i < num_active_bases; i++) {
1243         if ((*data)->active_bases[i] == basis_out) index = i;
1244       }
1245       if (index == -1) {
1246         CeedElemRestriction elem_rstr_out;
1247 
1248         index = num_active_bases;
1249         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1250         (*data)->active_bases[num_active_bases] = NULL;
1251         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases]));
1252         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1253         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1254         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1255         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases]));
1256         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1257         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1258         num_eval_modes_in[index]  = 0;
1259         num_eval_modes_out[index] = 0;
1260         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1261         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1262         eval_modes_in[index]  = NULL;
1263         eval_modes_out[index] = NULL;
1264         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1265         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1266         eval_mode_offsets_in[index]  = NULL;
1267         eval_mode_offsets_out[index] = NULL;
1268         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1269         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1270         (*data)->assembled_bases_in[index]  = NULL;
1271         (*data)->assembled_bases_out[index] = NULL;
1272         num_active_bases++;
1273       }
1274       if (eval_mode != CEED_EVAL_WEIGHT) {
1275         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1276         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1277         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1278         for (CeedInt d = 0; d < q_comp; d++) {
1279           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1280           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1281           offset += num_comp;
1282         }
1283         num_eval_modes_out[index] += q_comp;
1284       }
1285     }
1286   }
1287   (*data)->num_output_components = offset;
1288   (*data)->num_eval_modes_out    = num_eval_modes_out;
1289   (*data)->eval_modes_out        = eval_modes_out;
1290   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1291   (*data)->num_active_bases      = num_active_bases;
1292 
1293   return CEED_ERROR_SUCCESS;
1294 }
1295 
1296 /**
1297   @brief Get CeedOperator CeedEvalModes for assembly.
1298 
1299     Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1300 
1301   @param[in]  data                  CeedOperatorAssemblyData
1302   @param[out] num_active_bases      Total number of active bases
1303   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1304                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1305   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1306   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1307   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1308   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1309   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1310   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1311                                       including contributions of all active bases
1312 
1313   @return An error code: 0 - success, otherwise - failure
1314 
1315 
1316   @ref Backend
1317 **/
1318 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in,
1319                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out,
1320                                          const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) {
1321   if (num_active_bases) *num_active_bases = data->num_active_bases;
1322   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1323   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1324   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1325   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1326   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1327   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1328   if (num_output_components) *num_output_components = data->num_output_components;
1329 
1330   return CEED_ERROR_SUCCESS;
1331 }
1332 
1333 /**
1334   @brief Get CeedOperator CeedBasis data for assembly.
1335 
1336     Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1337 
1338   @param[in]  data                CeedOperatorAssemblyData
1339   @param[out] num_active_bases    Number of active bases, or NULL
1340   @param[out] active_bases        Pointer to hold active CeedBasis, or NULL
1341   @param[out] assembled_bases_in  Pointer to hold assembled active input B, or NULL
1342   @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL
1343 
1344   @return An error code: 0 - success, otherwise - failure
1345 
1346   @ref Backend
1347 **/
1348 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases,
1349                                      const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) {
1350   // Assemble B_in, B_out if needed
1351   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1352     CeedInt num_qpts;
1353 
1354     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1355     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1356       CeedInt     num_nodes;
1357       CeedScalar *B_in = NULL, *identity = NULL;
1358       bool        has_eval_none = false;
1359 
1360       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1361       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1362 
1363       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1364         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1365       }
1366       if (has_eval_none) {
1367         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1368         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1369           identity[i * num_nodes + i] = 1.0;
1370         }
1371       }
1372 
1373       for (CeedInt q = 0; q < num_qpts; q++) {
1374         for (CeedInt n = 0; n < num_nodes; n++) {
1375           CeedInt      d_in              = 0, q_comp_in;
1376           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1377           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1378             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1379             const CeedScalar *B  = NULL;
1380             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B);
1381             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in));
1382             if (q_comp_in > 1) {
1383               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1384               else B = &B[(++d_in) * num_qpts * num_nodes];
1385             }
1386             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1387             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1388           }
1389         }
1390       }
1391       if (identity) CeedCall(CeedFree(identity));
1392       data->assembled_bases_in[b] = B_in;
1393     }
1394   }
1395 
1396   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1397     CeedInt num_qpts;
1398 
1399     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1400     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1401       CeedInt     num_nodes;
1402       bool        has_eval_none = false;
1403       CeedScalar *B_out = NULL, *identity = NULL;
1404 
1405       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1406       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1407 
1408       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1409         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1410       }
1411       if (has_eval_none) {
1412         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1413         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1414           identity[i * num_nodes + i] = 1.0;
1415         }
1416       }
1417 
1418       for (CeedInt q = 0; q < num_qpts; q++) {
1419         for (CeedInt n = 0; n < num_nodes; n++) {
1420           CeedInt      d_out              = 0, q_comp_out;
1421           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1422           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1423             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1424             const CeedScalar *B  = NULL;
1425             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B);
1426             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out));
1427             if (q_comp_out > 1) {
1428               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1429               else B = &B[(++d_out) * num_qpts * num_nodes];
1430             }
1431             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1432             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1433           }
1434         }
1435       }
1436       if (identity) CeedCall(CeedFree(identity));
1437       data->assembled_bases_out[b] = B_out;
1438     }
1439   }
1440 
1441   // Pass out assembled data
1442   if (active_bases) *active_bases = data->active_bases;
1443   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1444   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1445 
1446   return CEED_ERROR_SUCCESS;
1447 }
1448 
1449 /**
1450   @brief Get CeedOperator CeedBasis data for assembly.
1451 
1452   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1453 
1454   @param[in]  data                  CeedOperatorAssemblyData
1455   @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL
1456   @param[out] active_elem_rstrs     Pointer to hold active CeedElemRestrictions, or NULL
1457 
1458   @return An error code: 0 - success, otherwise - failure
1459 
1460   @ref Backend
1461 **/
1462 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs,
1463                                                 CeedElemRestriction **active_elem_rstrs) {
1464   if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases;
1465   if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs;
1466 
1467   return CEED_ERROR_SUCCESS;
1468 }
1469 
1470 /**
1471   @brief Destroy CeedOperatorAssemblyData
1472 
1473   @param[in,out] data CeedOperatorAssemblyData to destroy
1474 
1475   @return An error code: 0 - success, otherwise - failure
1476 
1477   @ref Backend
1478 **/
1479 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1480   if (!*data) {
1481     *data = NULL;
1482     return CEED_ERROR_SUCCESS;
1483   }
1484   CeedCall(CeedDestroy(&(*data)->ceed));
1485   for (CeedInt b = 0; b < (*data)->num_active_bases; b++) {
1486     CeedCall(CeedBasisDestroy(&(*data)->active_bases[b]));
1487     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b]));
1488     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1489     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1490     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1491     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1492     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1493     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1494   }
1495   CeedCall(CeedFree(&(*data)->active_bases));
1496   CeedCall(CeedFree(&(*data)->active_elem_rstrs));
1497   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1498   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1499   CeedCall(CeedFree(&(*data)->eval_modes_in));
1500   CeedCall(CeedFree(&(*data)->eval_modes_out));
1501   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1502   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1503   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1504   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1505 
1506   CeedCall(CeedFree(data));
1507   return CEED_ERROR_SUCCESS;
1508 }
1509 
1510 /// @}
1511 
1512 /// ----------------------------------------------------------------------------
1513 /// CeedOperator Public API
1514 /// ----------------------------------------------------------------------------
1515 /// @addtogroup CeedOperatorUser
1516 /// @{
1517 
1518 /**
1519   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1520 
1521   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1522     The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1523 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1524 
1525   Inputs and outputs are in the order provided by the
1526 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1527 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1528 and producing the output [dv_0, dv_1, v].
1529 
1530   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1531 
1532   @param[in]  op        CeedOperator to assemble CeedQFunction
1533   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1534   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1535   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1536 
1537   @return An error code: 0 - success, otherwise - failure
1538 
1539   @ref User
1540 **/
1541 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1542   CeedCall(CeedOperatorCheckReady(op));
1543 
1544   if (op->LinearAssembleQFunction) {
1545     // Backend version
1546     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1547   } else {
1548     // Operator fallback
1549     CeedOperator op_fallback;
1550 
1551     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1552     if (op_fallback) {
1553       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1554     } else {
1555       // LCOV_EXCL_START
1556       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1557       // LCOV_EXCL_STOP
1558     }
1559   }
1560   return CEED_ERROR_SUCCESS;
1561 }
1562 
1563 /**
1564   @brief Assemble CeedQFunction and store result internally.
1565            Return copied references of stored data to the caller.
1566            Caller is responsible for ownership and destruction of the copied references.
1567            See also @ref CeedOperatorLinearAssembleQFunction
1568 
1569   @param[in]  op        CeedOperator to assemble CeedQFunction
1570   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1571   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1572   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1573 
1574   @return An error code: 0 - success, otherwise - failure
1575 
1576   @ref User
1577 **/
1578 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1579   CeedCall(CeedOperatorCheckReady(op));
1580 
1581   if (op->LinearAssembleQFunctionUpdate) {
1582     // Backend version
1583     bool                qf_assembled_is_setup;
1584     CeedVector          assembled_vec  = NULL;
1585     CeedElemRestriction assembled_rstr = NULL;
1586 
1587     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1588     if (qf_assembled_is_setup) {
1589       bool update_needed;
1590 
1591       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1592       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1593       if (update_needed) {
1594         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1595       }
1596     } else {
1597       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1598       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1599     }
1600     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1601 
1602     // Copy reference from internally held copy
1603     *assembled = NULL;
1604     *rstr      = NULL;
1605     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1606     CeedCall(CeedVectorDestroy(&assembled_vec));
1607     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1608     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1609   } else {
1610     // Operator fallback
1611     CeedOperator op_fallback;
1612 
1613     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1614     if (op_fallback) {
1615       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1616     } else {
1617       // LCOV_EXCL_START
1618       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1619       // LCOV_EXCL_STOP
1620     }
1621   }
1622 
1623   return CEED_ERROR_SUCCESS;
1624 }
1625 
1626 /**
1627   @brief Assemble the diagonal of a square linear CeedOperator
1628 
1629   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1630 
1631   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1632 
1633   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1634 
1635   @param[in]  op        CeedOperator to assemble CeedQFunction
1636   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1637   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1638 
1639   @return An error code: 0 - success, otherwise - failure
1640 
1641   @ref User
1642 **/
1643 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1644   bool is_composite;
1645   CeedCall(CeedOperatorCheckReady(op));
1646   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1647 
1648   CeedSize input_size = 0, output_size = 0;
1649   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1650   if (input_size != output_size) {
1651     // LCOV_EXCL_START
1652     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1653     // LCOV_EXCL_STOP
1654   }
1655 
1656   // Early exit for empty operator
1657   if (!is_composite) {
1658     CeedInt num_elem = 0;
1659 
1660     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1661     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1662   }
1663 
1664   if (op->LinearAssembleDiagonal) {
1665     // Backend version
1666     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1667     return CEED_ERROR_SUCCESS;
1668   } else if (op->LinearAssembleAddDiagonal) {
1669     // Backend version with zeroing first
1670     CeedCall(CeedVectorSetValue(assembled, 0.0));
1671     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1672     return CEED_ERROR_SUCCESS;
1673   } else {
1674     // Operator fallback
1675     CeedOperator op_fallback;
1676 
1677     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1678     if (op_fallback) {
1679       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1680       return CEED_ERROR_SUCCESS;
1681     }
1682   }
1683   // Default interface implementation
1684   CeedCall(CeedVectorSetValue(assembled, 0.0));
1685   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1686 
1687   return CEED_ERROR_SUCCESS;
1688 }
1689 
1690 /**
1691   @brief Assemble the diagonal of a square linear CeedOperator
1692 
1693   This sums into a CeedVector the diagonal of a linear CeedOperator.
1694 
1695   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1696 
1697   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1698 
1699   @param[in]  op        CeedOperator to assemble CeedQFunction
1700   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1701   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1702 
1703   @return An error code: 0 - success, otherwise - failure
1704 
1705   @ref User
1706 **/
1707 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1708   bool is_composite;
1709   CeedCall(CeedOperatorCheckReady(op));
1710   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1711 
1712   CeedSize input_size = 0, output_size = 0;
1713   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1714   if (input_size != output_size) {
1715     // LCOV_EXCL_START
1716     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1717     // LCOV_EXCL_STOP
1718   }
1719 
1720   // Early exit for empty operator
1721   if (!is_composite) {
1722     CeedInt num_elem = 0;
1723 
1724     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1725     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1726   }
1727 
1728   if (op->LinearAssembleAddDiagonal) {
1729     // Backend version
1730     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1731     return CEED_ERROR_SUCCESS;
1732   } else {
1733     // Operator fallback
1734     CeedOperator op_fallback;
1735 
1736     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1737     if (op_fallback) {
1738       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1739       return CEED_ERROR_SUCCESS;
1740     }
1741   }
1742   // Default interface implementation
1743   if (is_composite) {
1744     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1745   } else {
1746     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1747   }
1748 
1749   return CEED_ERROR_SUCCESS;
1750 }
1751 
1752 /**
1753   @brief Assemble the point block diagonal of a square linear CeedOperator
1754 
1755   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1756 
1757   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1758 
1759   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1760 
1761   @param[in]  op        CeedOperator to assemble CeedQFunction
1762   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1763 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1764 component in].
1765   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1766 
1767   @return An error code: 0 - success, otherwise - failure
1768 
1769   @ref User
1770 **/
1771 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1772   bool is_composite;
1773   CeedCall(CeedOperatorCheckReady(op));
1774   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1775 
1776   CeedSize input_size = 0, output_size = 0;
1777   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1778   if (input_size != output_size) {
1779     // LCOV_EXCL_START
1780     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1781     // LCOV_EXCL_STOP
1782   }
1783 
1784   // Early exit for empty operator
1785   if (!is_composite) {
1786     CeedInt num_elem = 0;
1787 
1788     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1789     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1790   }
1791 
1792   if (op->LinearAssemblePointBlockDiagonal) {
1793     // Backend version
1794     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1795     return CEED_ERROR_SUCCESS;
1796   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1797     // Backend version with zeroing first
1798     CeedCall(CeedVectorSetValue(assembled, 0.0));
1799     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1800     return CEED_ERROR_SUCCESS;
1801   } else {
1802     // Operator fallback
1803     CeedOperator op_fallback;
1804 
1805     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1806     if (op_fallback) {
1807       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1808       return CEED_ERROR_SUCCESS;
1809     }
1810   }
1811   // Default interface implementation
1812   CeedCall(CeedVectorSetValue(assembled, 0.0));
1813   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1814 
1815   return CEED_ERROR_SUCCESS;
1816 }
1817 
1818 /**
1819   @brief Assemble the point block diagonal of a square linear CeedOperator
1820 
1821   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1822 
1823   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1824 
1825   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1826 
1827   @param[in]  op        CeedOperator to assemble CeedQFunction
1828   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1829 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1830 component in].
1831   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1832 
1833   @return An error code: 0 - success, otherwise - failure
1834 
1835   @ref User
1836 **/
1837 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1838   bool is_composite;
1839   CeedCall(CeedOperatorCheckReady(op));
1840   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1841 
1842   CeedSize input_size = 0, output_size = 0;
1843   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1844   if (input_size != output_size) {
1845     // LCOV_EXCL_START
1846     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1847     // LCOV_EXCL_STOP
1848   }
1849 
1850   // Early exit for empty operator
1851   if (!is_composite) {
1852     CeedInt num_elem = 0;
1853 
1854     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1855     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1856   }
1857 
1858   if (op->LinearAssembleAddPointBlockDiagonal) {
1859     // Backend version
1860     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1861     return CEED_ERROR_SUCCESS;
1862   } else {
1863     // Operator fallback
1864     CeedOperator op_fallback;
1865 
1866     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1867     if (op_fallback) {
1868       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1869       return CEED_ERROR_SUCCESS;
1870     }
1871   }
1872   // Default interface implementation
1873   if (is_composite) {
1874     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1875   } else {
1876     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1877   }
1878 
1879   return CEED_ERROR_SUCCESS;
1880 }
1881 
1882 /**
1883    @brief Fully assemble the nonzero pattern of a linear operator.
1884 
1885    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1886 
1887    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1888 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1889 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1890 
1891    This will generally be slow unless your operator is low-order.
1892 
1893    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1894 
1895    @param[in]  op          CeedOperator to assemble
1896    @param[out] num_entries Number of entries in coordinate nonzero pattern
1897    @param[out] rows        Row number for each entry
1898    @param[out] cols        Column number for each entry
1899 
1900    @ref User
1901 **/
1902 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1903   CeedInt       num_suboperators, single_entries;
1904   CeedOperator *sub_operators;
1905   bool          is_composite;
1906   CeedCall(CeedOperatorCheckReady(op));
1907   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1908 
1909   if (op->LinearAssembleSymbolic) {
1910     // Backend version
1911     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1912     return CEED_ERROR_SUCCESS;
1913   } else {
1914     // Operator fallback
1915     CeedOperator op_fallback;
1916 
1917     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1918     if (op_fallback) {
1919       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1920       return CEED_ERROR_SUCCESS;
1921     }
1922   }
1923 
1924   // Default interface implementation
1925 
1926   // count entries and allocate rows, cols arrays
1927   *num_entries = 0;
1928   if (is_composite) {
1929     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1930     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1931     for (CeedInt k = 0; k < num_suboperators; ++k) {
1932       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1933       *num_entries += single_entries;
1934     }
1935   } else {
1936     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1937     *num_entries += single_entries;
1938   }
1939   CeedCall(CeedCalloc(*num_entries, rows));
1940   CeedCall(CeedCalloc(*num_entries, cols));
1941 
1942   // assemble nonzero locations
1943   CeedInt offset = 0;
1944   if (is_composite) {
1945     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1946     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1947     for (CeedInt k = 0; k < num_suboperators; ++k) {
1948       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1949       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1950       offset += single_entries;
1951     }
1952   } else {
1953     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1954   }
1955 
1956   return CEED_ERROR_SUCCESS;
1957 }
1958 
1959 /**
1960    @brief Fully assemble the nonzero entries of a linear operator.
1961 
1962    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1963 
1964    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1965 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1966 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1967 
1968    This will generally be slow unless your operator is low-order.
1969 
1970    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1971 
1972    @param[in]  op     CeedOperator to assemble
1973    @param[out] values Values to assemble into matrix
1974 
1975    @ref User
1976 **/
1977 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1978   CeedInt       num_suboperators, single_entries = 0;
1979   CeedOperator *sub_operators;
1980   bool          is_composite;
1981   CeedCall(CeedOperatorCheckReady(op));
1982   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1983 
1984   // Early exit for empty operator
1985   if (!is_composite) {
1986     CeedInt num_elem = 0;
1987 
1988     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1989     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1990   }
1991 
1992   if (op->LinearAssemble) {
1993     // Backend version
1994     CeedCall(op->LinearAssemble(op, values));
1995     return CEED_ERROR_SUCCESS;
1996   } else {
1997     // Operator fallback
1998     CeedOperator op_fallback;
1999 
2000     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2001     if (op_fallback) {
2002       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2003       return CEED_ERROR_SUCCESS;
2004     }
2005   }
2006 
2007   // Default interface implementation
2008   CeedInt offset = 0;
2009   CeedCall(CeedVectorSetValue(values, 0.0));
2010   if (is_composite) {
2011     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2012     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2013     for (CeedInt k = 0; k < num_suboperators; k++) {
2014       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2015       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2016       offset += single_entries;
2017     }
2018   } else {
2019     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2020   }
2021 
2022   return CEED_ERROR_SUCCESS;
2023 }
2024 
2025 /**
2026   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2027 
2028   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2029 
2030   @param[in]  op               Composite CeedOperator
2031   @param[in]  num_skip_indices Number of suboperators to skip
2032   @param[in]  skip_indices     Array of indices of suboperators to skip
2033   @param[out] mult             Vector to store multiplicity (of size l_size)
2034 
2035   @return An error code: 0 - success, otherwise - failure
2036 
2037   @ref User
2038 **/
2039 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2040   CeedCall(CeedOperatorCheckReady(op));
2041 
2042   Ceed                ceed;
2043   CeedInt             num_suboperators;
2044   CeedSize            l_vec_len;
2045   CeedScalar         *mult_array;
2046   CeedVector          ones_l_vec;
2047   CeedElemRestriction elem_rstr;
2048   CeedOperator       *sub_operators;
2049 
2050   CeedCall(CeedOperatorGetCeed(op, &ceed));
2051 
2052   // Zero mult vector
2053   CeedCall(CeedVectorSetValue(mult, 0.0));
2054 
2055   // Get suboperators
2056   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2057   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2058   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2059 
2060   // Work vector
2061   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2062   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2063   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2064   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2065 
2066   // Compute multiplicity across suboperators
2067   for (CeedInt i = 0; i < num_suboperators; i++) {
2068     const CeedScalar *sub_mult_array;
2069     CeedVector        sub_mult_l_vec, ones_e_vec;
2070 
2071     // -- Check for suboperator to skip
2072     for (CeedInt j = 0; j < num_skip_indices; j++) {
2073       if (skip_indices[j] == i) continue;
2074     }
2075 
2076     // -- Sub operator multiplicity
2077     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2078     CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2079     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2080     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2081     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2082     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2083     // ---- Flag every node present in the current suboperator
2084     for (CeedInt j = 0; j < l_vec_len; j++) {
2085       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2086     }
2087     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2088     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2089     CeedCall(CeedVectorDestroy(&ones_e_vec));
2090   }
2091   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2092   CeedCall(CeedVectorDestroy(&ones_l_vec));
2093 
2094   return CEED_ERROR_SUCCESS;
2095 }
2096 
2097 /**
2098   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2099 grid interpolation
2100 
2101   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2102 
2103   @param[in]  op_fine      Fine grid operator
2104   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2105   @param[in]  rstr_coarse  Coarse grid restriction
2106   @param[in]  basis_coarse Coarse grid active vector basis
2107   @param[out] op_coarse    Coarse grid operator
2108   @param[out] op_prolong   Coarse to fine operator, or NULL
2109   @param[out] op_restrict  Fine to coarse operator, or NULL
2110 
2111   @return An error code: 0 - success, otherwise - failure
2112 
2113   @ref User
2114 **/
2115 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2116                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2117   CeedCall(CeedOperatorCheckReady(op_fine));
2118 
2119   // Build prolongation matrix, if required
2120   CeedBasis basis_c_to_f = NULL;
2121   if (op_prolong || op_restrict) {
2122     CeedBasis basis_fine;
2123     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2124     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2125   }
2126 
2127   // Core code
2128   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2129 
2130   return CEED_ERROR_SUCCESS;
2131 }
2132 
2133 /**
2134   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2135 
2136   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2137 
2138   @param[in]  op_fine       Fine grid operator
2139   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2140   @param[in]  rstr_coarse   Coarse grid restriction
2141   @param[in]  basis_coarse  Coarse grid active vector basis
2142   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2143   @param[out] op_coarse     Coarse grid operator
2144   @param[out] op_prolong    Coarse to fine operator, or NULL
2145   @param[out] op_restrict   Fine to coarse operator, or NULL
2146 
2147   @return An error code: 0 - success, otherwise - failure
2148 
2149   @ref User
2150 **/
2151 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2152                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2153                                              CeedOperator *op_restrict) {
2154   CeedCall(CeedOperatorCheckReady(op_fine));
2155   Ceed ceed;
2156   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2157 
2158   // Check for compatible quadrature spaces
2159   CeedBasis basis_fine;
2160   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2161   CeedInt Q_f, Q_c;
2162   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2163   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2164   if (Q_f != Q_c) {
2165     // LCOV_EXCL_START
2166     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2167     // LCOV_EXCL_STOP
2168   }
2169 
2170   // Create coarse to fine basis, if required
2171   CeedBasis basis_c_to_f = NULL;
2172   if (op_prolong || op_restrict) {
2173     // Check if interpolation matrix is provided
2174     if (!interp_c_to_f) {
2175       // LCOV_EXCL_START
2176       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2177       // LCOV_EXCL_STOP
2178     }
2179     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2180     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2181     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2182     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2183     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2184     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2185     CeedScalar *q_ref, *q_weight, *grad;
2186     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2187     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2188     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2189     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2190     CeedCall(CeedFree(&q_ref));
2191     CeedCall(CeedFree(&q_weight));
2192     CeedCall(CeedFree(&grad));
2193   }
2194 
2195   // Core code
2196   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2197   return CEED_ERROR_SUCCESS;
2198 }
2199 
2200 /**
2201   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2202 
2203   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2204 
2205   @param[in]  op_fine       Fine grid operator
2206   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2207   @param[in]  rstr_coarse   Coarse grid restriction
2208   @param[in]  basis_coarse  Coarse grid active vector basis
2209   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2210   @param[out] op_coarse     Coarse grid operator
2211   @param[out] op_prolong    Coarse to fine operator, or NULL
2212   @param[out] op_restrict   Fine to coarse operator, or NULL
2213 
2214   @return An error code: 0 - success, otherwise - failure
2215 
2216   @ref User
2217 **/
2218 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2219                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2220                                        CeedOperator *op_restrict) {
2221   CeedCall(CeedOperatorCheckReady(op_fine));
2222   Ceed ceed;
2223   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2224 
2225   // Check for compatible quadrature spaces
2226   CeedBasis basis_fine;
2227   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2228   CeedInt Q_f, Q_c;
2229   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2230   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2231   if (Q_f != Q_c) {
2232     // LCOV_EXCL_START
2233     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2234     // LCOV_EXCL_STOP
2235   }
2236 
2237   // Coarse to fine basis
2238   CeedBasis basis_c_to_f = NULL;
2239   if (op_prolong || op_restrict) {
2240     // Check if interpolation matrix is provided
2241     if (!interp_c_to_f) {
2242       // LCOV_EXCL_START
2243       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2244       // LCOV_EXCL_STOP
2245     }
2246     CeedElemTopology topo;
2247     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2248     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2249     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2250     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2251     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2252     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2253     CeedScalar *q_ref, *q_weight, *grad;
2254     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2255     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2256     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2257     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2258     CeedCall(CeedFree(&q_ref));
2259     CeedCall(CeedFree(&q_weight));
2260     CeedCall(CeedFree(&grad));
2261   }
2262 
2263   // Core code
2264   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2265   return CEED_ERROR_SUCCESS;
2266 }
2267 
2268 /**
2269   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2270 
2271   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2272     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2273     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2274 \hat S V\f$. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
2275 
2276   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2277 
2278   @param[in]  op      CeedOperator to create element inverses
2279   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2280   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2281 
2282   @return An error code: 0 - success, otherwise - failure
2283 
2284   @ref User
2285 **/
2286 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2287   CeedCall(CeedOperatorCheckReady(op));
2288 
2289   if (op->CreateFDMElementInverse) {
2290     // Backend version
2291     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2292     return CEED_ERROR_SUCCESS;
2293   } else {
2294     // Operator fallback
2295     CeedOperator op_fallback;
2296 
2297     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2298     if (op_fallback) {
2299       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2300       return CEED_ERROR_SUCCESS;
2301     }
2302   }
2303 
2304   // Default interface implementation
2305   Ceed ceed, ceed_parent;
2306   CeedCall(CeedOperatorGetCeed(op, &ceed));
2307   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2308   ceed_parent = ceed_parent ? ceed_parent : ceed;
2309   CeedQFunction qf;
2310   CeedCall(CeedOperatorGetQFunction(op, &qf));
2311 
2312   // Determine active input basis
2313   bool                interp = false, grad = false;
2314   CeedBasis           basis = NULL;
2315   CeedElemRestriction rstr  = NULL;
2316   CeedOperatorField  *op_fields;
2317   CeedQFunctionField *qf_fields;
2318   CeedInt             num_input_fields;
2319   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2320   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2321   for (CeedInt i = 0; i < num_input_fields; i++) {
2322     CeedVector vec;
2323     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2324     if (vec == CEED_VECTOR_ACTIVE) {
2325       CeedEvalMode eval_mode;
2326       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2327       interp = interp || eval_mode == CEED_EVAL_INTERP;
2328       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2329       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2330       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2331     }
2332   }
2333   if (!basis) {
2334     // LCOV_EXCL_START
2335     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2336     // LCOV_EXCL_STOP
2337   }
2338   CeedSize l_size = 1;
2339   CeedInt  P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2340   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2341   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2342   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2343   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2344   CeedCall(CeedBasisGetDimension(basis, &dim));
2345   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2346   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2347   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2348 
2349   // Build and diagonalize 1D Mass and Laplacian
2350   bool tensor_basis;
2351   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2352   if (!tensor_basis) {
2353     // LCOV_EXCL_START
2354     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2355     // LCOV_EXCL_STOP
2356   }
2357   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2358   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2359   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2360   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2361   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2362   CeedCall(CeedCalloc(P_1d, &lambda));
2363   // -- Build matrices
2364   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2365   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2366   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2367   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2368   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2369 
2370   // -- Diagonalize
2371   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2372   CeedCall(CeedFree(&mass));
2373   CeedCall(CeedFree(&laplace));
2374   for (CeedInt i = 0; i < P_1d; i++) {
2375     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2376   }
2377   CeedCall(CeedFree(&x));
2378 
2379   // Assemble QFunction
2380   CeedVector          assembled;
2381   CeedElemRestriction rstr_qf;
2382   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2383   CeedInt layout[3];
2384   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2385   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2386   CeedScalar max_norm = 0;
2387   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2388 
2389   // Calculate element averages
2390   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2391   CeedScalar       *elem_avg;
2392   const CeedScalar *assembled_array, *q_weight_array;
2393   CeedVector        q_weight;
2394   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2395   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2396   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2397   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2398   CeedCall(CeedCalloc(num_elem, &elem_avg));
2399   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2400   for (CeedInt e = 0; e < num_elem; e++) {
2401     CeedInt count = 0;
2402     for (CeedInt q = 0; q < num_qpts; q++) {
2403       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2404         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2405           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2406           count++;
2407         }
2408       }
2409     }
2410     if (count) {
2411       elem_avg[e] /= count;
2412     } else {
2413       elem_avg[e] = 1.0;
2414     }
2415   }
2416   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2417   CeedCall(CeedVectorDestroy(&assembled));
2418   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2419   CeedCall(CeedVectorDestroy(&q_weight));
2420 
2421   // Build FDM diagonal
2422   CeedVector  q_data;
2423   CeedScalar *q_data_array, *fdm_diagonal;
2424   CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2425   const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2426   for (CeedInt c = 0; c < num_comp; c++) {
2427     for (CeedInt n = 0; n < num_nodes; n++) {
2428       if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2429       if (grad) {
2430         for (CeedInt d = 0; d < dim; d++) {
2431           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2432           fdm_diagonal[c * num_nodes + n] += lambda[i];
2433         }
2434       }
2435       if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2436     }
2437   }
2438   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2439   CeedCall(CeedVectorSetValue(q_data, 0.0));
2440   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2441   for (CeedInt e = 0; e < num_elem; e++) {
2442     for (CeedInt c = 0; c < num_comp; c++) {
2443       for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2444     }
2445   }
2446   CeedCall(CeedFree(&elem_avg));
2447   CeedCall(CeedFree(&fdm_diagonal));
2448   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2449 
2450   // Setup FDM operator
2451   // -- Basis
2452   CeedBasis   fdm_basis;
2453   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2454   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2455   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2456   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2457   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2458   CeedCall(CeedFree(&fdm_interp));
2459   CeedCall(CeedFree(&grad_dummy));
2460   CeedCall(CeedFree(&q_ref_dummy));
2461   CeedCall(CeedFree(&q_weight_dummy));
2462   CeedCall(CeedFree(&lambda));
2463 
2464   // -- Restriction
2465   CeedElemRestriction rstr_qd_i;
2466   CeedInt             strides[3] = {1, num_nodes, num_nodes * num_comp};
2467   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2468   // -- QFunction
2469   CeedQFunction qf_fdm;
2470   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2471   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2472   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2473   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2474   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2475   // -- QFunction context
2476   CeedInt *num_comp_data;
2477   CeedCall(CeedCalloc(1, &num_comp_data));
2478   num_comp_data[0] = num_comp;
2479   CeedQFunctionContext ctx_fdm;
2480   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2481   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2482   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2483   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2484   // -- Operator
2485   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2486   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2487   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2488   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2489 
2490   // Cleanup
2491   CeedCall(CeedVectorDestroy(&q_data));
2492   CeedCall(CeedBasisDestroy(&fdm_basis));
2493   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2494   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2495 
2496   return CEED_ERROR_SUCCESS;
2497 }
2498 
2499 /// @}
2500