xref: /libCEED/interface/ceed-preconditioning.c (revision 23d4529ec201513b6e781993a7a761deb8126e79)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = NULL;
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  basis     CeedBasis from which to get the basis matrix
184   @param[in]  eval_mode Current basis evaluation mode
185   @param[in]  identity  Pointer to identity matrix
186   @param[out] basis_ptr Basis pointer to set
187 
188   @ref Developer
189 **/
190 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
191   switch (eval_mode) {
192     case CEED_EVAL_NONE:
193       *basis_ptr = identity;
194       break;
195     case CEED_EVAL_INTERP:
196       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
197       break;
198     case CEED_EVAL_GRAD:
199       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
200       break;
201     case CEED_EVAL_DIV:
202       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
203       break;
204     case CEED_EVAL_CURL:
205       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
206       break;
207     case CEED_EVAL_WEIGHT:
208       break;  // Caught by QF Assembly
209   }
210   assert(*basis_ptr != NULL);
211 
212   return CEED_ERROR_SUCCESS;
213 }
214 
215 /**
216   @brief Create point block restriction for active operator field
217 
218   @param[in]  rstr            Original CeedElemRestriction for active field
219   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
220 
221   @return An error code: 0 - success, otherwise - failure
222 
223   @ref Developer
224 **/
225 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
226   Ceed ceed;
227   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
228   const CeedInt *offsets;
229   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
230 
231   // Expand offsets
232   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
233   CeedSize l_size;
234   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
235   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
236   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
237   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
238   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
239   CeedInt shift = num_comp;
240   if (comp_stride != 1) shift *= num_comp;
241   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
242   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
243     pointblock_offsets[i] = offsets[i] * shift;
244   }
245 
246   // Create new restriction
247   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
248                                      pointblock_offsets, pointblock_rstr));
249 
250   // Cleanup
251   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
252 
253   return CEED_ERROR_SUCCESS;
254 }
255 
256 /**
257   @brief Core logic for assembling operator diagonal or point block diagonal
258 
259   @param[in]  op            CeedOperator to assemble point block diagonal
260   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
261   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
262   @param[out] assembled     CeedVector to store assembled diagonal
263 
264   @return An error code: 0 - success, otherwise - failure
265 
266   @ref Developer
267 **/
268 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
269   Ceed ceed;
270   CeedCall(CeedOperatorGetCeed(op, &ceed));
271 
272   // Assemble QFunction
273   CeedQFunction       qf;
274   const CeedScalar   *assembled_qf_array;
275   CeedVector          assembled_qf        = NULL;
276   CeedElemRestriction assembled_elem_rstr = NULL;
277   CeedInt             num_input_fields, num_output_fields;
278   CeedInt             layout[3];
279 
280   CeedCall(CeedOperatorGetQFunction(op, &qf));
281   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
282   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
283   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout));
284   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
285   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
286 
287   // Get assembly data
288   CeedOperatorAssemblyData data;
289   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
290   CeedInt                 *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
291   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
292   CeedBasis               *active_bases;
293   CeedElemRestriction     *active_elem_rstrs;
294   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
295   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
296                                                 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components));
297   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL));
298   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs));
299 
300   // Loop over all active bases
301   for (CeedInt b = 0; b < num_active_bases; b++) {
302     // Assemble point block diagonal restriction, if needed
303     CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b];
304 
305     if (is_pointblock) {
306       CeedElemRestriction point_block_elem_rstr;
307 
308       CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr));
309       diag_elem_rstr = point_block_elem_rstr;
310     }
311 
312     // Create diagonal vector
313     CeedVector elem_diag;
314     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
315 
316     // Assemble element operator diagonals
317     CeedScalar *elem_diag_array;
318     CeedInt     num_elem, num_nodes, num_qpts, num_components;
319 
320     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
321     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
322     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
323     CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes));
324     CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components));
325     CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts));
326 
327     // Construct identity matrix for basis if required
328     bool        has_eval_none = false;
329     CeedScalar *identity      = NULL;
330     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
331       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
332     }
333     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
334       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
335     }
336     if (has_eval_none) {
337       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
338       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
339     }
340 
341     // Compute the diagonal of B^T D B
342     // Each element
343     for (CeedSize e = 0; e < num_elem; e++) {
344       // Each basis eval mode pair
345       CeedInt      d_out              = 0, q_comp_out;
346       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
347       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
348         const CeedScalar *B_t = NULL;
349         CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t);
350         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out));
351         if (q_comp_out > 1) {
352           if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
353           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
354         }
355         eval_mode_out_prev = eval_modes_out[b][e_out];
356 
357         CeedInt      d_in              = 0, q_comp_in;
358         CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
359         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
360           const CeedScalar *B = NULL;
361           CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B);
362           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in));
363           if (q_comp_in > 1) {
364             if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
365             else B = &B[(++d_in) * num_qpts * num_nodes];
366           }
367           eval_mode_in_prev = eval_modes_in[b][e_in];
368 
369           // Each component
370           for (CeedInt c_out = 0; c_out < num_components; c_out++) {
371             // Each qpt/node pair
372             for (CeedInt q = 0; q < num_qpts; q++) {
373               if (is_pointblock) {
374                 // Point Block Diagonal
375                 for (CeedInt c_in = 0; c_in < num_components; c_in++) {
376                   const CeedSize c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
377                   const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
378                   for (CeedInt n = 0; n < num_nodes; n++) {
379                     elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] +=
380                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
381                   }
382                 }
383               } else {
384                 // Diagonal Only
385                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
386                 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
387                 for (CeedInt n = 0; n < num_nodes; n++) {
388                   elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
389                 }
390               }
391             }
392           }
393         }
394       }
395     }
396     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
397 
398     // Assemble local operator diagonal
399     CeedCall(CeedElemRestrictionApplyUnsigned(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
400 
401     // Cleanup
402     if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
403     CeedCall(CeedVectorDestroy(&elem_diag));
404     CeedCall(CeedFree(&identity));
405   }
406   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
407   CeedCall(CeedVectorDestroy(&assembled_qf));
408 
409   return CEED_ERROR_SUCCESS;
410 }
411 
412 /**
413   @brief Core logic for assembling composite operator diagonal
414 
415   @param[in]  op            CeedOperator to assemble point block diagonal
416   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
417   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
418   @param[out] assembled     CeedVector to store assembled diagonal
419 
420   @return An error code: 0 - success, otherwise - failure
421 
422   @ref Developer
423 **/
424 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
425                                                                  CeedVector assembled) {
426   CeedInt       num_sub;
427   CeedOperator *suboperators;
428   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
429   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
430   for (CeedInt i = 0; i < num_sub; i++) {
431     if (is_pointblock) {
432       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
433     } else {
434       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
435     }
436   }
437   return CEED_ERROR_SUCCESS;
438 }
439 
440 /**
441   @brief Build nonzero pattern for non-composite operator
442 
443   Users should generally use CeedOperatorLinearAssembleSymbolic()
444 
445   Note: For operators using oriented element restrictions, entries in rows or cols may be negative indicating the assembled value at this nonzero
446 should be negated
447 
448   @param[in]  op     CeedOperator to assemble nonzero pattern
449   @param[in]  offset Offset for number of entries
450   @param[out] rows   Row number for each entry
451   @param[out] cols   Column number for each entry
452 
453   @return An error code: 0 - success, otherwise - failure
454 
455   @ref Developer
456 **/
457 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
458   Ceed ceed;
459   bool is_composite;
460   CeedCall(CeedOperatorGetCeed(op, &ceed));
461   CeedCall(CeedOperatorIsComposite(op, &is_composite));
462 
463   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
464 
465   CeedSize num_nodes;
466   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
467   CeedElemRestriction rstr_in;
468   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
469   CeedInt num_elem, elem_size, num_comp;
470   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
471   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
472   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
473   CeedInt layout_er[3];
474   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
475 
476   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
477 
478   // Determine elem_dof relation
479   CeedVector index_vec;
480   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
481   CeedScalar *array;
482   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
483   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
484   CeedCall(CeedVectorRestoreArray(index_vec, &array));
485   CeedVector elem_dof;
486   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
487   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
488   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
489   const CeedScalar *elem_dof_a;
490   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
491   CeedCall(CeedVectorDestroy(&index_vec));
492 
493   // Determine i, j locations for element matrices
494   CeedSize count = 0;
495   for (CeedInt e = 0; e < num_elem; e++) {
496     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
497       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
498         for (CeedInt i = 0; i < elem_size; i++) {
499           for (CeedInt j = 0; j < elem_size; j++) {
500             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
501             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
502 
503             const CeedInt row = elem_dof_a[elem_dof_index_row];
504             const CeedInt col = elem_dof_a[elem_dof_index_col];
505 
506             rows[offset + count] = row;
507             cols[offset + count] = col;
508             count++;
509           }
510         }
511       }
512     }
513   }
514   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
515   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
516   CeedCall(CeedVectorDestroy(&elem_dof));
517 
518   return CEED_ERROR_SUCCESS;
519 }
520 
521 /**
522   @brief Assemble nonzero entries for non-composite operator
523 
524   Users should generally use CeedOperatorLinearAssemble()
525 
526   @param[in]  op     CeedOperator to assemble
527   @param[in]  offset Offset for number of entries
528   @param[out] values Values to assemble into matrix
529 
530   @return An error code: 0 - success, otherwise - failure
531 
532   @ref Developer
533 **/
534 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
535   Ceed ceed;
536   bool is_composite;
537   CeedCall(CeedOperatorGetCeed(op, &ceed));
538   CeedCall(CeedOperatorIsComposite(op, &is_composite));
539 
540   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
541 
542   // Early exit for empty operator
543   {
544     CeedInt num_elem = 0;
545 
546     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
547     if (num_elem == 0) return CEED_ERROR_SUCCESS;
548   }
549 
550   if (op->LinearAssembleSingle) {
551     // Backend version
552     CeedCall(op->LinearAssembleSingle(op, offset, values));
553     return CEED_ERROR_SUCCESS;
554   } else {
555     // Operator fallback
556     CeedOperator op_fallback;
557 
558     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
559     if (op_fallback) {
560       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
561       return CEED_ERROR_SUCCESS;
562     }
563   }
564 
565   // Assemble QFunction
566   CeedQFunction qf;
567   CeedCall(CeedOperatorGetQFunction(op, &qf));
568   CeedVector          assembled_qf = NULL;
569   CeedElemRestriction rstr_q       = NULL;
570   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
571   CeedSize qf_length;
572   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
573 
574   CeedInt            num_input_fields, num_output_fields;
575   CeedOperatorField *input_fields;
576   CeedOperatorField *output_fields;
577   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
578 
579   // Get assembly data
580   CeedOperatorAssemblyData data;
581   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
582   const CeedEvalMode **eval_modes_in, **eval_modes_out;
583   CeedInt             *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
584   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out,
585                                                 &eval_modes_out, NULL, NULL));
586   CeedBasis *bases;
587   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL));
588   CeedBasis basis_in = bases[0];
589 
590   CeedCheck(num_active_bases == 1, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases");
591   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
592 
593   CeedElemRestriction active_rstr;
594   CeedInt             num_elem, elem_size, num_qpts, num_comp;
595   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
596   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
597   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
598   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
599   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
600 
601   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
602 
603   // loop over elements and put in data structure
604   const CeedScalar *assembled_qf_array;
605   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
606 
607   CeedInt layout_qf[3];
608   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
609   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
610 
611   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
612   const CeedScalar **B_mats_in, **B_mats_out;
613   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out));
614   const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0];
615   CeedScalar        BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]];
616   CeedScalar        elem_mat[elem_size * elem_size];
617   CeedSize          count = 0;
618   CeedScalar       *vals;
619   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
620   for (CeedSize e = 0; e < num_elem; e++) {
621     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
622       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
623         // Compute B^T*D
624         for (CeedSize n = 0; n < elem_size; n++) {
625           for (CeedSize q = 0; q < num_qpts; q++) {
626             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
627               const CeedSize btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in);
628               CeedScalar     sum       = 0.0;
629               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
630                 const CeedSize b_out_index     = (num_eval_modes_out[0] * q + e_out) * elem_size + n;
631                 const CeedSize eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out;
632                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
633                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
634               }
635               BTD_mat[btd_index] = sum;
636             }
637           }
638         }
639         // form element matrix itself (for each block component)
640         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0]));
641 
642         // put element matrix in coordinate data structure
643         for (CeedInt i = 0; i < elem_size; i++) {
644           for (CeedInt j = 0; j < elem_size; j++) {
645             vals[offset + count] = elem_mat[i * elem_size + j];
646             count++;
647           }
648         }
649       }
650     }
651   }
652   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries");
653   CeedCall(CeedVectorRestoreArray(values, &vals));
654 
655   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
656   CeedCall(CeedVectorDestroy(&assembled_qf));
657 
658   return CEED_ERROR_SUCCESS;
659 }
660 
661 /**
662   @brief Count number of entries for assembled CeedOperator
663 
664   @param[in]  op          CeedOperator to assemble
665   @param[out] num_entries Number of entries in assembled representation
666 
667   @return An error code: 0 - success, otherwise - failure
668 
669   @ref Utility
670 **/
671 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) {
672   bool                is_composite;
673   CeedElemRestriction rstr;
674   CeedInt             num_elem, elem_size, num_comp;
675 
676   CeedCall(CeedOperatorIsComposite(op, &is_composite));
677   CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
678   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
679   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
680   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
681   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
682   *num_entries = (CeedSize)elem_size * num_comp * elem_size * num_comp * num_elem;
683 
684   return CEED_ERROR_SUCCESS;
685 }
686 
687 /**
688   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
689 
690   @param[in]  op_fine      Fine grid operator
691   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
692   @param[in]  rstr_coarse  Coarse grid restriction
693   @param[in]  basis_coarse Coarse grid active vector basis
694   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
695   @param[out] op_coarse    Coarse grid operator
696   @param[out] op_prolong   Coarse to fine operator, or NULL
697   @param[out] op_restrict  Fine to coarse operator, or NULL
698 
699   @return An error code: 0 - success, otherwise - failure
700 
701   @ref Developer
702 **/
703 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
704                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
705   Ceed                ceed;
706   CeedVector          mult_vec         = NULL;
707   CeedElemRestriction rstr_p_mult_fine = NULL;
708   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
709 
710   // Check for composite operator
711   bool is_composite;
712   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
713   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
714 
715   // Coarse Grid
716   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
717   CeedElemRestriction rstr_fine = NULL;
718   // -- Clone input fields
719   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
720     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
721       rstr_fine = op_fine->input_fields[i]->elem_rstr;
722       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
723     } else {
724       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
725                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
726     }
727   }
728   // -- Clone output fields
729   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
730     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
731       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
732     } else {
733       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
734                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
735     }
736   }
737   // -- Clone QFunctionAssemblyData
738   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
739 
740   // Multiplicity vector
741   if (op_restrict || op_prolong) {
742     CeedVector mult_e_vec;
743 
744     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
745     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
746     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
747     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
748     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
749     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
750     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
751     CeedCall(CeedVectorDestroy(&mult_e_vec));
752     CeedCall(CeedVectorReciprocal(mult_vec));
753   }
754 
755   // Clone name
756   bool   has_name = op_fine->name;
757   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
758   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
759 
760   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
761   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
762             "Prolongation or restriction operator creation requires coarse-to-fine basis");
763 
764   // Restriction/Prolongation Operators
765   CeedInt num_comp;
766   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
767 
768   // Restriction
769   if (op_restrict) {
770     CeedInt             *num_comp_r_data;
771     CeedQFunction        qf_restrict;
772     CeedQFunctionContext ctx_r;
773 
774     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
775     CeedCall(CeedCalloc(1, &num_comp_r_data));
776     num_comp_r_data[0] = num_comp;
777     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
778     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
779     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
780     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
781     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
782     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
783     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
784     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
785 
786     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
787     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
788     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec));
789     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
790 
791     // Set name
792     char *restriction_name;
793     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
794     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
795     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
796     CeedCall(CeedFree(&restriction_name));
797 
798     // Check
799     CeedCall(CeedOperatorCheckReady(*op_restrict));
800 
801     // Cleanup
802     CeedCall(CeedQFunctionDestroy(&qf_restrict));
803   }
804 
805   // Prolongation
806   if (op_prolong) {
807     CeedInt             *num_comp_p_data;
808     CeedQFunction        qf_prolong;
809     CeedQFunctionContext ctx_p;
810 
811     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
812     CeedCall(CeedCalloc(1, &num_comp_p_data));
813     num_comp_p_data[0] = num_comp;
814     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
815     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
816     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
817     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
818     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
819     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
820     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
821     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
822 
823     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
824     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
825     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec));
826     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
827 
828     // Set name
829     char *prolongation_name;
830     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
831     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
832     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
833     CeedCall(CeedFree(&prolongation_name));
834 
835     // Check
836     CeedCall(CeedOperatorCheckReady(*op_prolong));
837 
838     // Cleanup
839     CeedCall(CeedQFunctionDestroy(&qf_prolong));
840   }
841 
842   // Check
843   CeedCall(CeedOperatorCheckReady(*op_coarse));
844 
845   // Cleanup
846   CeedCall(CeedVectorDestroy(&mult_vec));
847   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
848   CeedCall(CeedBasisDestroy(&basis_c_to_f));
849 
850   return CEED_ERROR_SUCCESS;
851 }
852 
853 /**
854   @brief Build 1D mass matrix and Laplacian with perturbation
855 
856   @param[in]  interp_1d   Interpolation matrix in one dimension
857   @param[in]  grad_1d     Gradient matrix in one dimension
858   @param[in]  q_weight_1d Quadrature weights in one dimension
859   @param[in]  P_1d        Number of basis nodes in one dimension
860   @param[in]  Q_1d        Number of quadrature points in one dimension
861   @param[in]  dim         Dimension of basis
862   @param[out] mass        Assembled mass matrix in one dimension
863   @param[out] laplace     Assembled perturbed Laplacian in one dimension
864 
865   @return An error code: 0 - success, otherwise - failure
866 
867   @ref Developer
868 **/
869 CeedPragmaOptimizeOff
870 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
871                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
872   for (CeedInt i = 0; i < P_1d; i++) {
873     for (CeedInt j = 0; j < P_1d; j++) {
874       CeedScalar sum = 0.0;
875       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
876       mass[i + j * P_1d] = sum;
877     }
878   }
879   // -- Laplacian
880   for (CeedInt i = 0; i < P_1d; i++) {
881     for (CeedInt j = 0; j < P_1d; j++) {
882       CeedScalar sum = 0.0;
883       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
884       laplace[i + j * P_1d] = sum;
885     }
886   }
887   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
888   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
889   return CEED_ERROR_SUCCESS;
890 }
891 CeedPragmaOptimizeOn
892 
893 /// @}
894 
895 /// ----------------------------------------------------------------------------
896 /// CeedOperator Backend API
897 /// ----------------------------------------------------------------------------
898 /// @addtogroup CeedOperatorBackend
899 /// @{
900 
901 /**
902   @brief Create object holding CeedQFunction assembly data for CeedOperator
903 
904   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
905   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
906 
907   @return An error code: 0 - success, otherwise - failure
908 
909   @ref Backend
910 **/
911 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
912   CeedCall(CeedCalloc(1, data));
913   (*data)->ref_count = 1;
914   (*data)->ceed      = ceed;
915   CeedCall(CeedReference(ceed));
916 
917   return CEED_ERROR_SUCCESS;
918 }
919 
920 /**
921   @brief Increment the reference counter for a CeedQFunctionAssemblyData
922 
923   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
924 
925   @return An error code: 0 - success, otherwise - failure
926 
927   @ref Backend
928 **/
929 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
930   data->ref_count++;
931   return CEED_ERROR_SUCCESS;
932 }
933 
934 /**
935   @brief Set re-use of CeedQFunctionAssemblyData
936 
937   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
938   @param[in]     reuse_data Boolean flag indicating data re-use
939 
940   @return An error code: 0 - success, otherwise - failure
941 
942   @ref Backend
943 **/
944 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
945   data->reuse_data        = reuse_data;
946   data->needs_data_update = true;
947   return CEED_ERROR_SUCCESS;
948 }
949 
950 /**
951   @brief Mark QFunctionAssemblyData as stale
952 
953   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
954   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
955 
956   @return An error code: 0 - success, otherwise - failure
957 
958   @ref Backend
959 **/
960 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
961   data->needs_data_update = needs_data_update;
962   return CEED_ERROR_SUCCESS;
963 }
964 
965 /**
966   @brief Determine if QFunctionAssemblyData needs update
967 
968   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
969   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
970 
971   @return An error code: 0 - success, otherwise - failure
972 
973   @ref Backend
974 **/
975 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
976   *is_update_needed = !data->reuse_data || data->needs_data_update;
977   return CEED_ERROR_SUCCESS;
978 }
979 
980 /**
981   @brief Copy the pointer to a CeedQFunctionAssemblyData.
982 
983   Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
984 
985   Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
986         CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
987         CeedQFunctionAssemblyData.
988 
989   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
990   @param[in,out] data_copy Variable to store copied reference
991 
992   @return An error code: 0 - success, otherwise - failure
993 
994   @ref Backend
995 **/
996 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
997   CeedCall(CeedQFunctionAssemblyDataReference(data));
998   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
999   *data_copy = data;
1000   return CEED_ERROR_SUCCESS;
1001 }
1002 
1003 /**
1004   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1005 
1006   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1007   @param[out] is_setup Boolean flag for setup status
1008 
1009   @return An error code: 0 - success, otherwise - failure
1010 
1011   @ref Backend
1012 **/
1013 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1014   *is_setup = data->is_setup;
1015   return CEED_ERROR_SUCCESS;
1016 }
1017 
1018 /**
1019   @brief Set internal objects for CeedQFunctionAssemblyData
1020 
1021   @param[in,out] data CeedQFunctionAssemblyData to set objects
1022   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1023   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1024 
1025   @return An error code: 0 - success, otherwise - failure
1026 
1027   @ref Backend
1028 **/
1029 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1030   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1031   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1032 
1033   data->is_setup = true;
1034   return CEED_ERROR_SUCCESS;
1035 }
1036 
1037 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1038   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1039 
1040   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1041   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1042 
1043   return CEED_ERROR_SUCCESS;
1044 }
1045 
1046 /**
1047   @brief Destroy CeedQFunctionAssemblyData
1048 
1049   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1050 
1051   @return An error code: 0 - success, otherwise - failure
1052 
1053   @ref Backend
1054 **/
1055 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1056   if (!*data || --(*data)->ref_count > 0) {
1057     *data = NULL;
1058     return CEED_ERROR_SUCCESS;
1059   }
1060   CeedCall(CeedDestroy(&(*data)->ceed));
1061   CeedCall(CeedVectorDestroy(&(*data)->vec));
1062   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1063 
1064   CeedCall(CeedFree(data));
1065   return CEED_ERROR_SUCCESS;
1066 }
1067 
1068 /**
1069   @brief Get CeedOperatorAssemblyData
1070 
1071   @param[in]  op   CeedOperator to assemble
1072   @param[out] data CeedQFunctionAssemblyData
1073 
1074   @return An error code: 0 - success, otherwise - failure
1075 
1076   @ref Backend
1077 **/
1078 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1079   if (!op->op_assembled) {
1080     CeedOperatorAssemblyData data;
1081 
1082     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1083     op->op_assembled = data;
1084   }
1085   *data = op->op_assembled;
1086 
1087   return CEED_ERROR_SUCCESS;
1088 }
1089 
1090 /**
1091   @brief Create object holding CeedOperator assembly data.
1092 
1093   The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1094   An array with references to the corresponding active CeedElemRestrictions is also stored.
1095   For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1096   The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1097 CeedEvalMode.
1098   The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1099   Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1100 
1101   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1102   @param[in]  op   CeedOperator to be assembled
1103   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1104 
1105   @return An error code: 0 - success, otherwise - failure
1106 
1107   @ref Backend
1108 **/
1109 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1110   CeedInt num_active_bases = 0;
1111 
1112   // Allocate
1113   CeedCall(CeedCalloc(1, data));
1114   (*data)->ceed = ceed;
1115   CeedCall(CeedReference(ceed));
1116 
1117   // Build OperatorAssembly data
1118   CeedQFunction       qf;
1119   CeedQFunctionField *qf_fields;
1120   CeedOperatorField  *op_fields;
1121   CeedInt             num_input_fields;
1122   CeedCall(CeedOperatorGetQFunction(op, &qf));
1123   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1124   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1125 
1126   // Determine active input basis
1127   CeedInt       *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0;
1128   CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL;
1129   CeedSize     **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1130   for (CeedInt i = 0; i < num_input_fields; i++) {
1131     CeedVector vec;
1132     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1133     if (vec == CEED_VECTOR_ACTIVE) {
1134       CeedBasis    basis_in = NULL;
1135       CeedEvalMode eval_mode;
1136       CeedInt      index = -1, dim, num_comp, q_comp;
1137       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1138       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1139       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1140       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1141       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1142       for (CeedInt i = 0; i < num_active_bases; i++) {
1143         if ((*data)->active_bases[i] == basis_in) index = i;
1144       }
1145       if (index == -1) {
1146         CeedElemRestriction elem_rstr_in;
1147         index = num_active_bases;
1148         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1149         (*data)->active_bases[num_active_bases] = NULL;
1150         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases]));
1151         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1152         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1153         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1154         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases]));
1155         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1156         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1157         num_eval_modes_in[index]  = 0;
1158         num_eval_modes_out[index] = 0;
1159         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1160         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1161         eval_modes_in[index]  = NULL;
1162         eval_modes_out[index] = NULL;
1163         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1164         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1165         eval_mode_offsets_in[index]  = NULL;
1166         eval_mode_offsets_out[index] = NULL;
1167         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1168         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1169         (*data)->assembled_bases_in[index]  = NULL;
1170         (*data)->assembled_bases_out[index] = NULL;
1171         num_active_bases++;
1172       }
1173       if (eval_mode != CEED_EVAL_WEIGHT) {
1174         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1175         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1176         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1177         for (CeedInt d = 0; d < q_comp; d++) {
1178           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1179           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1180           offset += num_comp;
1181         }
1182         num_eval_modes_in[index] += q_comp;
1183       }
1184     }
1185   }
1186   (*data)->num_eval_modes_in    = num_eval_modes_in;
1187   (*data)->eval_modes_in        = eval_modes_in;
1188   (*data)->eval_mode_offsets_in = eval_mode_offsets_in;
1189 
1190   // Determine active output basis
1191   CeedInt num_output_fields;
1192   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1193   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1194   offset = 0;
1195   for (CeedInt i = 0; i < num_output_fields; i++) {
1196     CeedVector vec;
1197     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1198     if (vec == CEED_VECTOR_ACTIVE) {
1199       CeedBasis    basis_out = NULL;
1200       CeedEvalMode eval_mode;
1201       CeedInt      index = -1, dim, num_comp, q_comp;
1202       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1203       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1204       CeedCall(CeedBasisGetDimension(basis_out, &dim));
1205       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1206       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1207       for (CeedInt i = 0; i < num_active_bases; i++) {
1208         if ((*data)->active_bases[i] == basis_out) index = i;
1209       }
1210       if (index == -1) {
1211         CeedElemRestriction elem_rstr_out;
1212 
1213         index = num_active_bases;
1214         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1215         (*data)->active_bases[num_active_bases] = NULL;
1216         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases]));
1217         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1218         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1219         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1220         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases]));
1221         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1222         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1223         num_eval_modes_in[index]  = 0;
1224         num_eval_modes_out[index] = 0;
1225         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1226         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1227         eval_modes_in[index]  = NULL;
1228         eval_modes_out[index] = NULL;
1229         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1230         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1231         eval_mode_offsets_in[index]  = NULL;
1232         eval_mode_offsets_out[index] = NULL;
1233         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1234         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1235         (*data)->assembled_bases_in[index]  = NULL;
1236         (*data)->assembled_bases_out[index] = NULL;
1237         num_active_bases++;
1238       }
1239       if (eval_mode != CEED_EVAL_WEIGHT) {
1240         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1241         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1242         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1243         for (CeedInt d = 0; d < q_comp; d++) {
1244           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1245           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1246           offset += num_comp;
1247         }
1248         num_eval_modes_out[index] += q_comp;
1249       }
1250     }
1251   }
1252   (*data)->num_output_components = offset;
1253   (*data)->num_eval_modes_out    = num_eval_modes_out;
1254   (*data)->eval_modes_out        = eval_modes_out;
1255   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1256   (*data)->num_active_bases      = num_active_bases;
1257 
1258   return CEED_ERROR_SUCCESS;
1259 }
1260 
1261 /**
1262   @brief Get CeedOperator CeedEvalModes for assembly.
1263 
1264   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1265 
1266   @param[in]  data                  CeedOperatorAssemblyData
1267   @param[out] num_active_bases      Total number of active bases
1268   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1269                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1270   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1271   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1272   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1273   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1274   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1275   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1276                                       including contributions of all active bases
1277 
1278   @return An error code: 0 - success, otherwise - failure
1279 
1280 
1281   @ref Backend
1282 **/
1283 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in,
1284                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out,
1285                                          const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) {
1286   if (num_active_bases) *num_active_bases = data->num_active_bases;
1287   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1288   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1289   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1290   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1291   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1292   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1293   if (num_output_components) *num_output_components = data->num_output_components;
1294 
1295   return CEED_ERROR_SUCCESS;
1296 }
1297 
1298 /**
1299   @brief Get CeedOperator CeedBasis data for assembly.
1300 
1301   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1302 
1303   @param[in]  data                CeedOperatorAssemblyData
1304   @param[out] num_active_bases    Number of active bases, or NULL
1305   @param[out] active_bases        Pointer to hold active CeedBasis, or NULL
1306   @param[out] assembled_bases_in  Pointer to hold assembled active input B, or NULL
1307   @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL
1308 
1309   @return An error code: 0 - success, otherwise - failure
1310 
1311   @ref Backend
1312 **/
1313 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases,
1314                                      const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) {
1315   // Assemble B_in, B_out if needed
1316   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1317     CeedInt num_qpts;
1318 
1319     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1320     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1321       CeedInt     num_nodes;
1322       CeedScalar *B_in = NULL, *identity = NULL;
1323       bool        has_eval_none = false;
1324 
1325       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1326       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1327 
1328       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1329         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1330       }
1331       if (has_eval_none) {
1332         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1333         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1334           identity[i * num_nodes + i] = 1.0;
1335         }
1336       }
1337 
1338       for (CeedInt q = 0; q < num_qpts; q++) {
1339         for (CeedInt n = 0; n < num_nodes; n++) {
1340           CeedInt      d_in              = 0, q_comp_in;
1341           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1342           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1343             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1344             const CeedScalar *B  = NULL;
1345             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B);
1346             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in));
1347             if (q_comp_in > 1) {
1348               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1349               else B = &B[(++d_in) * num_qpts * num_nodes];
1350             }
1351             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1352             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1353           }
1354         }
1355       }
1356       if (identity) CeedCall(CeedFree(identity));
1357       data->assembled_bases_in[b] = B_in;
1358     }
1359   }
1360 
1361   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1362     CeedInt num_qpts;
1363 
1364     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1365     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1366       CeedInt     num_nodes;
1367       bool        has_eval_none = false;
1368       CeedScalar *B_out = NULL, *identity = NULL;
1369 
1370       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1371       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1372 
1373       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1374         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1375       }
1376       if (has_eval_none) {
1377         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1378         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1379           identity[i * num_nodes + i] = 1.0;
1380         }
1381       }
1382 
1383       for (CeedInt q = 0; q < num_qpts; q++) {
1384         for (CeedInt n = 0; n < num_nodes; n++) {
1385           CeedInt      d_out              = 0, q_comp_out;
1386           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1387           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1388             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1389             const CeedScalar *B  = NULL;
1390             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B);
1391             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out));
1392             if (q_comp_out > 1) {
1393               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1394               else B = &B[(++d_out) * num_qpts * num_nodes];
1395             }
1396             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1397             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1398           }
1399         }
1400       }
1401       if (identity) CeedCall(CeedFree(identity));
1402       data->assembled_bases_out[b] = B_out;
1403     }
1404   }
1405 
1406   // Pass out assembled data
1407   if (active_bases) *active_bases = data->active_bases;
1408   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1409   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1410 
1411   return CEED_ERROR_SUCCESS;
1412 }
1413 
1414 /**
1415   @brief Get CeedOperator CeedBasis data for assembly.
1416 
1417   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1418 
1419   @param[in]  data                  CeedOperatorAssemblyData
1420   @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL
1421   @param[out] active_elem_rstrs     Pointer to hold active CeedElemRestrictions, or NULL
1422 
1423   @return An error code: 0 - success, otherwise - failure
1424 
1425   @ref Backend
1426 **/
1427 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs,
1428                                                 CeedElemRestriction **active_elem_rstrs) {
1429   if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases;
1430   if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs;
1431 
1432   return CEED_ERROR_SUCCESS;
1433 }
1434 
1435 /**
1436   @brief Destroy CeedOperatorAssemblyData
1437 
1438   @param[in,out] data CeedOperatorAssemblyData to destroy
1439 
1440   @return An error code: 0 - success, otherwise - failure
1441 
1442   @ref Backend
1443 **/
1444 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1445   if (!*data) {
1446     *data = NULL;
1447     return CEED_ERROR_SUCCESS;
1448   }
1449   CeedCall(CeedDestroy(&(*data)->ceed));
1450   for (CeedInt b = 0; b < (*data)->num_active_bases; b++) {
1451     CeedCall(CeedBasisDestroy(&(*data)->active_bases[b]));
1452     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b]));
1453     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1454     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1455     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1456     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1457     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1458     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1459   }
1460   CeedCall(CeedFree(&(*data)->active_bases));
1461   CeedCall(CeedFree(&(*data)->active_elem_rstrs));
1462   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1463   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1464   CeedCall(CeedFree(&(*data)->eval_modes_in));
1465   CeedCall(CeedFree(&(*data)->eval_modes_out));
1466   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1467   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1468   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1469   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1470 
1471   CeedCall(CeedFree(data));
1472   return CEED_ERROR_SUCCESS;
1473 }
1474 
1475 /// @}
1476 
1477 /// ----------------------------------------------------------------------------
1478 /// CeedOperator Public API
1479 /// ----------------------------------------------------------------------------
1480 /// @addtogroup CeedOperatorUser
1481 /// @{
1482 
1483 /**
1484   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1485 
1486   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1487   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1488 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1489 
1490   Inputs and outputs are in the order provided by the user when adding CeedOperator fields.
1491   For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction
1492 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1493 
1494   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1495 
1496   @param[in]  op        CeedOperator to assemble CeedQFunction
1497   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1498   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1499   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1500 
1501   @return An error code: 0 - success, otherwise - failure
1502 
1503   @ref User
1504 **/
1505 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1506   CeedCall(CeedOperatorCheckReady(op));
1507 
1508   if (op->LinearAssembleQFunction) {
1509     // Backend version
1510     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1511   } else {
1512     // Operator fallback
1513     CeedOperator op_fallback;
1514 
1515     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1516     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1517     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1518   }
1519   return CEED_ERROR_SUCCESS;
1520 }
1521 
1522 /**
1523   @brief Assemble CeedQFunction and store result internally.
1524 
1525   Return copied references of stored data to the caller.
1526   Caller is responsible for ownership and destruction of the copied references.
1527   See also @ref CeedOperatorLinearAssembleQFunction
1528 
1529   Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers.
1530         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
1531 
1532   @param[in]  op        CeedOperator to assemble CeedQFunction
1533   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1534   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1535   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1536 
1537   @return An error code: 0 - success, otherwise - failure
1538 
1539   @ref User
1540 **/
1541 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1542   CeedCall(CeedOperatorCheckReady(op));
1543 
1544   if (op->LinearAssembleQFunctionUpdate) {
1545     // Backend version
1546     bool                qf_assembled_is_setup;
1547     CeedVector          assembled_vec  = NULL;
1548     CeedElemRestriction assembled_rstr = NULL;
1549 
1550     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1551     if (qf_assembled_is_setup) {
1552       bool update_needed;
1553 
1554       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1555       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1556       if (update_needed) CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1557     } else {
1558       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1559       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1560     }
1561     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1562 
1563     // Copy reference from internally held copy
1564     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1565     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1566     CeedCall(CeedVectorDestroy(&assembled_vec));
1567     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1568   } else {
1569     // Operator fallback
1570     CeedOperator op_fallback;
1571 
1572     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1573     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1574     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1575   }
1576 
1577   return CEED_ERROR_SUCCESS;
1578 }
1579 
1580 /**
1581   @brief Assemble the diagonal of a square linear CeedOperator
1582 
1583   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1584 
1585   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1586 
1587   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1588 
1589   @param[in]  op        CeedOperator to assemble CeedQFunction
1590   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1591   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1592 
1593   @return An error code: 0 - success, otherwise - failure
1594 
1595   @ref User
1596 **/
1597 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1598   bool is_composite;
1599   CeedCall(CeedOperatorCheckReady(op));
1600   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1601 
1602   CeedSize input_size = 0, output_size = 0;
1603   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1604   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1605 
1606   // Early exit for empty operator
1607   if (!is_composite) {
1608     CeedInt num_elem = 0;
1609 
1610     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1611     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1612   }
1613 
1614   if (op->LinearAssembleDiagonal) {
1615     // Backend version
1616     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1617     return CEED_ERROR_SUCCESS;
1618   } else if (op->LinearAssembleAddDiagonal) {
1619     // Backend version with zeroing first
1620     CeedCall(CeedVectorSetValue(assembled, 0.0));
1621     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1622     return CEED_ERROR_SUCCESS;
1623   } else {
1624     // Operator fallback
1625     CeedOperator op_fallback;
1626 
1627     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1628     if (op_fallback) {
1629       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1630       return CEED_ERROR_SUCCESS;
1631     }
1632   }
1633   // Default interface implementation
1634   CeedCall(CeedVectorSetValue(assembled, 0.0));
1635   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1636 
1637   return CEED_ERROR_SUCCESS;
1638 }
1639 
1640 /**
1641   @brief Assemble the diagonal of a square linear CeedOperator
1642 
1643   This sums into a CeedVector the diagonal of a linear CeedOperator.
1644 
1645   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1646 
1647   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1648 
1649   @param[in]  op        CeedOperator to assemble CeedQFunction
1650   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1651   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1652 
1653   @return An error code: 0 - success, otherwise - failure
1654 
1655   @ref User
1656 **/
1657 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1658   bool is_composite;
1659   CeedCall(CeedOperatorCheckReady(op));
1660   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1661 
1662   CeedSize input_size = 0, output_size = 0;
1663   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1664   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1665 
1666   // Early exit for empty operator
1667   if (!is_composite) {
1668     CeedInt num_elem = 0;
1669 
1670     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1671     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1672   }
1673 
1674   if (op->LinearAssembleAddDiagonal) {
1675     // Backend version
1676     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1677     return CEED_ERROR_SUCCESS;
1678   } else {
1679     // Operator fallback
1680     CeedOperator op_fallback;
1681 
1682     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1683     if (op_fallback) {
1684       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1685       return CEED_ERROR_SUCCESS;
1686     }
1687   }
1688   // Default interface implementation
1689   if (is_composite) {
1690     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1691   } else {
1692     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1693   }
1694 
1695   return CEED_ERROR_SUCCESS;
1696 }
1697 
1698 /**
1699   @brief Assemble the point block diagonal of a square linear CeedOperator
1700 
1701   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1702 
1703   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1704 
1705   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1706 
1707   @param[in]  op        CeedOperator to assemble CeedQFunction
1708   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1709 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1710 component in].
1711   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1712 
1713   @return An error code: 0 - success, otherwise - failure
1714 
1715   @ref User
1716 **/
1717 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1718   bool is_composite;
1719   CeedCall(CeedOperatorCheckReady(op));
1720   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1721 
1722   CeedSize input_size = 0, output_size = 0;
1723   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1724   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1725 
1726   // Early exit for empty operator
1727   if (!is_composite) {
1728     CeedInt num_elem = 0;
1729 
1730     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1731     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1732   }
1733 
1734   if (op->LinearAssemblePointBlockDiagonal) {
1735     // Backend version
1736     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1737     return CEED_ERROR_SUCCESS;
1738   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1739     // Backend version with zeroing first
1740     CeedCall(CeedVectorSetValue(assembled, 0.0));
1741     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1742     return CEED_ERROR_SUCCESS;
1743   } else {
1744     // Operator fallback
1745     CeedOperator op_fallback;
1746 
1747     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1748     if (op_fallback) {
1749       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1750       return CEED_ERROR_SUCCESS;
1751     }
1752   }
1753   // Default interface implementation
1754   CeedCall(CeedVectorSetValue(assembled, 0.0));
1755   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1756 
1757   return CEED_ERROR_SUCCESS;
1758 }
1759 
1760 /**
1761   @brief Assemble the point block diagonal of a square linear CeedOperator
1762 
1763   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1764 
1765   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1766 
1767   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1768 
1769   @param[in]  op        CeedOperator to assemble CeedQFunction
1770   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1771 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1772 component in].
1773   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1774 
1775   @return An error code: 0 - success, otherwise - failure
1776 
1777   @ref User
1778 **/
1779 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1780   bool is_composite;
1781   CeedCall(CeedOperatorCheckReady(op));
1782   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1783 
1784   CeedSize input_size = 0, output_size = 0;
1785   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1786   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1787 
1788   // Early exit for empty operator
1789   if (!is_composite) {
1790     CeedInt num_elem = 0;
1791 
1792     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1793     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1794   }
1795 
1796   if (op->LinearAssembleAddPointBlockDiagonal) {
1797     // Backend version
1798     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1799     return CEED_ERROR_SUCCESS;
1800   } else {
1801     // Operator fallback
1802     CeedOperator op_fallback;
1803 
1804     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1805     if (op_fallback) {
1806       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1807       return CEED_ERROR_SUCCESS;
1808     }
1809   }
1810   // Default interface implementation
1811   if (is_composite) {
1812     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1813   } else {
1814     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1815   }
1816 
1817   return CEED_ERROR_SUCCESS;
1818 }
1819 
1820 /**
1821    @brief Fully assemble the nonzero pattern of a linear operator.
1822 
1823    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1824 
1825    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1826 matrix in entry (i, j).
1827   Note that the (i, j) pairs are not unique and may repeat.
1828   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1829 
1830    This will generally be slow unless your operator is low-order.
1831 
1832    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1833 
1834    @param[in]  op          CeedOperator to assemble
1835    @param[out] num_entries Number of entries in coordinate nonzero pattern
1836    @param[out] rows        Row number for each entry
1837    @param[out] cols        Column number for each entry
1838 
1839    @ref User
1840 **/
1841 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1842   CeedInt       num_suboperators;
1843   CeedSize      single_entries;
1844   CeedOperator *sub_operators;
1845   bool          is_composite;
1846   CeedCall(CeedOperatorCheckReady(op));
1847   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1848 
1849   if (op->LinearAssembleSymbolic) {
1850     // Backend version
1851     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1852     return CEED_ERROR_SUCCESS;
1853   } else {
1854     // Operator fallback
1855     CeedOperator op_fallback;
1856 
1857     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1858     if (op_fallback) {
1859       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1860       return CEED_ERROR_SUCCESS;
1861     }
1862   }
1863 
1864   // Default interface implementation
1865 
1866   // count entries and allocate rows, cols arrays
1867   *num_entries = 0;
1868   if (is_composite) {
1869     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1870     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1871     for (CeedInt k = 0; k < num_suboperators; ++k) {
1872       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1873       *num_entries += single_entries;
1874     }
1875   } else {
1876     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1877     *num_entries += single_entries;
1878   }
1879   CeedCall(CeedCalloc(*num_entries, rows));
1880   CeedCall(CeedCalloc(*num_entries, cols));
1881 
1882   // assemble nonzero locations
1883   CeedInt offset = 0;
1884   if (is_composite) {
1885     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1886     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1887     for (CeedInt k = 0; k < num_suboperators; ++k) {
1888       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1889       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1890       offset += single_entries;
1891     }
1892   } else {
1893     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1894   }
1895 
1896   return CEED_ERROR_SUCCESS;
1897 }
1898 
1899 /**
1900    @brief Fully assemble the nonzero entries of a linear operator.
1901 
1902    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1903 
1904    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1905 matrix in entry (i, j).
1906   Note that the (i, j) pairs are not unique and may repeat.
1907   This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1908 
1909    This will generally be slow unless your operator is low-order.
1910 
1911    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1912 
1913    @param[in]  op     CeedOperator to assemble
1914    @param[out] values Values to assemble into matrix
1915 
1916    @ref User
1917 **/
1918 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1919   CeedInt       num_suboperators;
1920   CeedSize      single_entries = 0;
1921   CeedOperator *sub_operators;
1922   bool          is_composite;
1923   CeedCall(CeedOperatorCheckReady(op));
1924   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1925 
1926   // Early exit for empty operator
1927   if (!is_composite) {
1928     CeedInt num_elem = 0;
1929 
1930     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1931     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1932   }
1933 
1934   if (op->LinearAssemble) {
1935     // Backend version
1936     CeedCall(op->LinearAssemble(op, values));
1937     return CEED_ERROR_SUCCESS;
1938   } else {
1939     // Operator fallback
1940     CeedOperator op_fallback;
1941 
1942     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1943     if (op_fallback) {
1944       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
1945       return CEED_ERROR_SUCCESS;
1946     }
1947   }
1948 
1949   // Default interface implementation
1950   CeedInt offset = 0;
1951   CeedCall(CeedVectorSetValue(values, 0.0));
1952   if (is_composite) {
1953     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1954     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1955     for (CeedInt k = 0; k < num_suboperators; k++) {
1956       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
1957       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1958       offset += single_entries;
1959     }
1960   } else {
1961     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
1962   }
1963 
1964   return CEED_ERROR_SUCCESS;
1965 }
1966 
1967 /**
1968   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
1969 
1970   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1971 
1972   @param[in]  op               Composite CeedOperator
1973   @param[in]  num_skip_indices Number of suboperators to skip
1974   @param[in]  skip_indices     Array of indices of suboperators to skip
1975   @param[out] mult             Vector to store multiplicity (of size l_size)
1976 
1977   @return An error code: 0 - success, otherwise - failure
1978 
1979   @ref User
1980 **/
1981 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
1982   CeedCall(CeedOperatorCheckReady(op));
1983 
1984   Ceed                ceed;
1985   CeedInt             num_suboperators;
1986   CeedSize            l_vec_len;
1987   CeedScalar         *mult_array;
1988   CeedVector          ones_l_vec;
1989   CeedElemRestriction elem_rstr;
1990   CeedOperator       *sub_operators;
1991 
1992   CeedCall(CeedOperatorGetCeed(op, &ceed));
1993 
1994   // Zero mult vector
1995   CeedCall(CeedVectorSetValue(mult, 0.0));
1996 
1997   // Get suboperators
1998   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1999   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2000   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2001 
2002   // Work vector
2003   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2004   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2005   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2006   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2007 
2008   // Compute multiplicity across suboperators
2009   for (CeedInt i = 0; i < num_suboperators; i++) {
2010     const CeedScalar *sub_mult_array;
2011     CeedVector        sub_mult_l_vec, ones_e_vec;
2012 
2013     // -- Check for suboperator to skip
2014     for (CeedInt j = 0; j < num_skip_indices; j++) {
2015       if (skip_indices[j] == i) continue;
2016     }
2017 
2018     // -- Sub operator multiplicity
2019     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2020     CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2021     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2022     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2023     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2024     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2025     // ---- Flag every node present in the current suboperator
2026     for (CeedInt j = 0; j < l_vec_len; j++) {
2027       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2028     }
2029     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2030     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2031     CeedCall(CeedVectorDestroy(&ones_e_vec));
2032   }
2033   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2034   CeedCall(CeedVectorDestroy(&ones_l_vec));
2035 
2036   return CEED_ERROR_SUCCESS;
2037 }
2038 
2039 /**
2040   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2041 grid interpolation
2042 
2043   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2044 
2045   @param[in]  op_fine      Fine grid operator
2046   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2047   @param[in]  rstr_coarse  Coarse grid restriction
2048   @param[in]  basis_coarse Coarse grid active vector basis
2049   @param[out] op_coarse    Coarse grid operator
2050   @param[out] op_prolong   Coarse to fine operator, or NULL
2051   @param[out] op_restrict  Fine to coarse operator, or NULL
2052 
2053   @return An error code: 0 - success, otherwise - failure
2054 
2055   @ref User
2056 **/
2057 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2058                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2059   CeedCall(CeedOperatorCheckReady(op_fine));
2060 
2061   // Build prolongation matrix, if required
2062   CeedBasis basis_c_to_f = NULL;
2063   if (op_prolong || op_restrict) {
2064     CeedBasis basis_fine;
2065     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2066     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2067   }
2068 
2069   // Core code
2070   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2071 
2072   return CEED_ERROR_SUCCESS;
2073 }
2074 
2075 /**
2076   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2077 
2078   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2079 
2080   @param[in]  op_fine       Fine grid operator
2081   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2082   @param[in]  rstr_coarse   Coarse grid restriction
2083   @param[in]  basis_coarse  Coarse grid active vector basis
2084   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2085   @param[out] op_coarse     Coarse grid operator
2086   @param[out] op_prolong    Coarse to fine operator, or NULL
2087   @param[out] op_restrict   Fine to coarse operator, or NULL
2088 
2089   @return An error code: 0 - success, otherwise - failure
2090 
2091   @ref User
2092 **/
2093 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2094                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2095                                              CeedOperator *op_restrict) {
2096   CeedCall(CeedOperatorCheckReady(op_fine));
2097   Ceed ceed;
2098   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2099 
2100   // Check for compatible quadrature spaces
2101   CeedBasis basis_fine;
2102   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2103   CeedInt Q_f, Q_c;
2104   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2105   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2106   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2107 
2108   // Create coarse to fine basis, if required
2109   CeedBasis basis_c_to_f = NULL;
2110   if (op_prolong || op_restrict) {
2111     // Check if interpolation matrix is provided
2112     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2113               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2114     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2115     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2116     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2117     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2118     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2119     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2120     CeedScalar *q_ref, *q_weight, *grad;
2121     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2122     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2123     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2124     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2125     CeedCall(CeedFree(&q_ref));
2126     CeedCall(CeedFree(&q_weight));
2127     CeedCall(CeedFree(&grad));
2128   }
2129 
2130   // Core code
2131   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2132   return CEED_ERROR_SUCCESS;
2133 }
2134 
2135 /**
2136   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2137 
2138   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2139 
2140   @param[in]  op_fine       Fine grid operator
2141   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2142   @param[in]  rstr_coarse   Coarse grid restriction
2143   @param[in]  basis_coarse  Coarse grid active vector basis
2144   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2145   @param[out] op_coarse     Coarse grid operator
2146   @param[out] op_prolong    Coarse to fine operator, or NULL
2147   @param[out] op_restrict   Fine to coarse operator, or NULL
2148 
2149   @return An error code: 0 - success, otherwise - failure
2150 
2151   @ref User
2152 **/
2153 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2154                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2155                                        CeedOperator *op_restrict) {
2156   CeedCall(CeedOperatorCheckReady(op_fine));
2157   Ceed ceed;
2158   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2159 
2160   // Check for compatible quadrature spaces
2161   CeedBasis basis_fine;
2162   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2163   CeedInt Q_f, Q_c;
2164   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2165   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2166   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2167 
2168   // Coarse to fine basis
2169   CeedBasis basis_c_to_f = NULL;
2170   if (op_prolong || op_restrict) {
2171     // Check if interpolation matrix is provided
2172     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2173               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2174     CeedElemTopology topo;
2175     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2176     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2177     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2178     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2179     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2180     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2181     CeedScalar *q_ref, *q_weight, *grad;
2182     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2183     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2184     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2185     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2186     CeedCall(CeedFree(&q_ref));
2187     CeedCall(CeedFree(&q_weight));
2188     CeedCall(CeedFree(&grad));
2189   }
2190 
2191   // Core code
2192   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2193   return CEED_ERROR_SUCCESS;
2194 }
2195 
2196 /**
2197   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2198 
2199   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2200   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2201   The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2202 \hat S V\f$.
2203   The CeedOperator must be linear and non-composite.
2204   The associated CeedQFunction must therefore also be linear.
2205 
2206   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2207 
2208   @param[in]  op      CeedOperator to create element inverses
2209   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2210   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2211 
2212   @return An error code: 0 - success, otherwise - failure
2213 
2214   @ref User
2215 **/
2216 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2217   CeedCall(CeedOperatorCheckReady(op));
2218 
2219   if (op->CreateFDMElementInverse) {
2220     // Backend version
2221     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2222     return CEED_ERROR_SUCCESS;
2223   } else {
2224     // Operator fallback
2225     CeedOperator op_fallback;
2226 
2227     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2228     if (op_fallback) {
2229       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2230       return CEED_ERROR_SUCCESS;
2231     }
2232   }
2233 
2234   // Default interface implementation
2235   Ceed ceed, ceed_parent;
2236   CeedCall(CeedOperatorGetCeed(op, &ceed));
2237   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2238   ceed_parent = ceed_parent ? ceed_parent : ceed;
2239   CeedQFunction qf;
2240   CeedCall(CeedOperatorGetQFunction(op, &qf));
2241 
2242   // Determine active input basis
2243   bool                interp = false, grad = false;
2244   CeedBasis           basis = NULL;
2245   CeedElemRestriction rstr  = NULL;
2246   CeedOperatorField  *op_fields;
2247   CeedQFunctionField *qf_fields;
2248   CeedInt             num_input_fields;
2249   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2250   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2251   for (CeedInt i = 0; i < num_input_fields; i++) {
2252     CeedVector vec;
2253     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2254     if (vec == CEED_VECTOR_ACTIVE) {
2255       CeedEvalMode eval_mode;
2256       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2257       interp = interp || eval_mode == CEED_EVAL_INTERP;
2258       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2259       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2260       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2261     }
2262   }
2263   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2264   CeedSize l_size = 1;
2265   CeedInt  P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2266   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2267   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2268   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2269   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2270   CeedCall(CeedBasisGetDimension(basis, &dim));
2271   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2272   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2273   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2274 
2275   // Build and diagonalize 1D Mass and Laplacian
2276   bool is_tensor_basis;
2277   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2278   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2279   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2280   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2281   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2282   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2283   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2284   CeedCall(CeedCalloc(P_1d, &lambda));
2285   // -- Build matrices
2286   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2287   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2288   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2289   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2290   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2291 
2292   // -- Diagonalize
2293   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2294   CeedCall(CeedFree(&mass));
2295   CeedCall(CeedFree(&laplace));
2296   for (CeedInt i = 0; i < P_1d; i++) {
2297     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2298   }
2299   CeedCall(CeedFree(&x));
2300 
2301   // Assemble QFunction
2302   CeedVector          assembled = NULL;
2303   CeedElemRestriction rstr_qf   = NULL;
2304   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2305   CeedInt layout[3];
2306   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2307   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2308   CeedScalar max_norm = 0;
2309   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2310 
2311   // Calculate element averages
2312   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2313   CeedScalar       *elem_avg;
2314   const CeedScalar *assembled_array, *q_weight_array;
2315   CeedVector        q_weight;
2316   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2317   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2318   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2319   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2320   CeedCall(CeedCalloc(num_elem, &elem_avg));
2321   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2322   for (CeedInt e = 0; e < num_elem; e++) {
2323     CeedInt count = 0;
2324     for (CeedInt q = 0; q < num_qpts; q++) {
2325       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2326         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2327           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2328           count++;
2329         }
2330       }
2331     }
2332     if (count) {
2333       elem_avg[e] /= count;
2334     } else {
2335       elem_avg[e] = 1.0;
2336     }
2337   }
2338   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2339   CeedCall(CeedVectorDestroy(&assembled));
2340   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2341   CeedCall(CeedVectorDestroy(&q_weight));
2342 
2343   // Build FDM diagonal
2344   CeedVector  q_data;
2345   CeedScalar *q_data_array, *fdm_diagonal;
2346   CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2347   const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2348   for (CeedInt c = 0; c < num_comp; c++) {
2349     for (CeedInt n = 0; n < num_nodes; n++) {
2350       if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2351       if (grad) {
2352         for (CeedInt d = 0; d < dim; d++) {
2353           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2354           fdm_diagonal[c * num_nodes + n] += lambda[i];
2355         }
2356       }
2357       if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2358     }
2359   }
2360   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2361   CeedCall(CeedVectorSetValue(q_data, 0.0));
2362   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2363   for (CeedInt e = 0; e < num_elem; e++) {
2364     for (CeedInt c = 0; c < num_comp; c++) {
2365       for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2366     }
2367   }
2368   CeedCall(CeedFree(&elem_avg));
2369   CeedCall(CeedFree(&fdm_diagonal));
2370   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2371 
2372   // Setup FDM operator
2373   // -- Basis
2374   CeedBasis   fdm_basis;
2375   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2376   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2377   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2378   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2379   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2380   CeedCall(CeedFree(&fdm_interp));
2381   CeedCall(CeedFree(&grad_dummy));
2382   CeedCall(CeedFree(&q_ref_dummy));
2383   CeedCall(CeedFree(&q_weight_dummy));
2384   CeedCall(CeedFree(&lambda));
2385 
2386   // -- Restriction
2387   CeedElemRestriction rstr_qd_i;
2388   CeedInt             strides[3] = {1, num_nodes, num_nodes * num_comp};
2389   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2390   // -- QFunction
2391   CeedQFunction qf_fdm;
2392   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2393   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2394   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2395   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2396   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2397   // -- QFunction context
2398   CeedInt *num_comp_data;
2399   CeedCall(CeedCalloc(1, &num_comp_data));
2400   num_comp_data[0] = num_comp;
2401   CeedQFunctionContext ctx_fdm;
2402   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2403   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2404   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2405   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2406   // -- Operator
2407   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2408   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2409   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2410   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2411 
2412   // Cleanup
2413   CeedCall(CeedVectorDestroy(&q_data));
2414   CeedCall(CeedBasisDestroy(&fdm_basis));
2415   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2416   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2417 
2418   return CEED_ERROR_SUCCESS;
2419 }
2420 
2421 /// @}
2422