xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision febe297216863906e4e765a068e88b36cde1d584)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = NULL;
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
128     // Cleanup
129     CeedCall(CeedQFunctionDestroy(&qf_fallback));
130     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
131     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
132   }
133   CeedCall(CeedOperatorSetName(op_fallback, op->name));
134   CeedCall(CeedOperatorCheckReady(op_fallback));
135   op->op_fallback = op_fallback;
136 
137   return CEED_ERROR_SUCCESS;
138 }
139 
140 /**
141   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
142 
143   @param[in]  op          CeedOperator to retrieve fallback for
144   @param[out] op_fallback Fallback CeedOperator
145 
146   @return An error code: 0 - success, otherwise - failure
147 
148   @ref Developer
149 **/
150 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
151   // Create if needed
152   if (!op->op_fallback) {
153     CeedCall(CeedOperatorCreateFallback(op));
154   }
155   if (op->op_fallback) {
156     bool is_debug;
157 
158     CeedCall(CeedIsDebug(op->ceed, &is_debug));
159     if (is_debug) {
160       Ceed        ceed, ceed_fallback;
161       const char *resource, *resource_fallback;
162 
163       CeedCall(CeedOperatorGetCeed(op, &ceed));
164       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
165       CeedCall(CeedGetResource(ceed, &resource));
166       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
167 
168       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
169       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
170                 op->op_fallback);
171     }
172   }
173   *op_fallback = op->op_fallback;
174 
175   return CEED_ERROR_SUCCESS;
176 }
177 
178 /**
179   @brief Select correct basis matrix pointer based on CeedEvalMode
180 
181   @param[in]  basis     CeedBasis from which to get the basis matrix
182   @param[in]  eval_mode Current basis evaluation mode
183   @param[in]  identity  Pointer to identity matrix
184   @param[out] basis_ptr Basis pointer to set
185 
186   @ref Developer
187 **/
188 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
189   switch (eval_mode) {
190     case CEED_EVAL_NONE:
191       *basis_ptr = identity;
192       break;
193     case CEED_EVAL_INTERP:
194       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
195       break;
196     case CEED_EVAL_GRAD:
197       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
198       break;
199     case CEED_EVAL_DIV:
200       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
201       break;
202     case CEED_EVAL_CURL:
203       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
204       break;
205     case CEED_EVAL_WEIGHT:
206       break;  // Caught by QF Assembly
207   }
208   assert(*basis_ptr != NULL);
209 
210   return CEED_ERROR_SUCCESS;
211 }
212 
213 /**
214   @brief Create point block restriction for active operator field
215 
216   @param[in]  rstr            Original CeedElemRestriction for active field
217   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
218 
219   @return An error code: 0 - success, otherwise - failure
220 
221   @ref Developer
222 **/
223 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
224   Ceed ceed;
225   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
226   const CeedInt *offsets;
227   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
228 
229   // Expand offsets
230   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
231   CeedSize l_size;
232   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
233   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
234   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
235   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
236   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
237   CeedInt shift = num_comp;
238   if (comp_stride != 1) shift *= num_comp;
239   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
240   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
241     pointblock_offsets[i] = offsets[i] * shift;
242   }
243 
244   // Create new restriction
245   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
246                                      pointblock_offsets, pointblock_rstr));
247 
248   // Cleanup
249   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
250 
251   return CEED_ERROR_SUCCESS;
252 }
253 
254 /**
255   @brief Core logic for assembling operator diagonal or point block diagonal
256 
257   @param[in]  op            CeedOperator to assemble point block diagonal
258   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
259   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
260   @param[out] assembled     CeedVector to store assembled diagonal
261 
262   @return An error code: 0 - success, otherwise - failure
263 
264   @ref Developer
265 **/
266 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
267   Ceed ceed;
268   CeedCall(CeedOperatorGetCeed(op, &ceed));
269 
270   // Assemble QFunction
271   CeedQFunction       qf;
272   const CeedScalar   *assembled_qf_array;
273   CeedVector          assembled_qf        = NULL;
274   CeedElemRestriction assembled_elem_rstr = NULL;
275   CeedInt             num_input_fields, num_output_fields;
276   CeedInt             layout[3];
277 
278   CeedCall(CeedOperatorGetQFunction(op, &qf));
279   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
280   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
281   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout));
282   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
283   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
284 
285   // Get assembly data
286   CeedOperatorAssemblyData data;
287   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
288   CeedInt                 *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
289   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
290   CeedBasis               *active_bases;
291   CeedElemRestriction     *active_elem_rstrs;
292   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
293   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
294                                                 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components));
295   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL));
296   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs));
297 
298   // Loop over all active bases
299   for (CeedInt b = 0; b < num_active_bases; b++) {
300     // Assemble point block diagonal restriction, if needed
301     CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b];
302 
303     if (is_pointblock) {
304       CeedElemRestriction point_block_elem_rstr;
305 
306       CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr));
307       diag_elem_rstr = point_block_elem_rstr;
308     }
309 
310     // Create diagonal vector
311     CeedVector elem_diag;
312     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
313 
314     // Assemble element operator diagonals
315     CeedScalar *elem_diag_array;
316     CeedInt     num_elem, num_nodes, num_qpts, num_components;
317 
318     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
319     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
320     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
321     CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes));
322     CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components));
323     CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts));
324 
325     // Construct identity matrix for basis if required
326     bool        has_eval_none = false;
327     CeedScalar *identity      = NULL;
328     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
329       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
330     }
331     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
332       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
333     }
334     if (has_eval_none) {
335       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
336       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
337     }
338 
339     // Compute the diagonal of B^T D B
340     // Each element
341     for (CeedSize e = 0; e < num_elem; e++) {
342       // Each basis eval mode pair
343       CeedInt      d_out              = 0, q_comp_out;
344       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
345       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
346         const CeedScalar *B_t = NULL;
347         CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t);
348         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out));
349         if (q_comp_out > 1) {
350           if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
351           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
352         }
353         eval_mode_out_prev = eval_modes_out[b][e_out];
354 
355         CeedInt      d_in              = 0, q_comp_in;
356         CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
357         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
358           const CeedScalar *B = NULL;
359           CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B);
360           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in));
361           if (q_comp_in > 1) {
362             if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
363             else B = &B[(++d_in) * num_qpts * num_nodes];
364           }
365           eval_mode_in_prev = eval_modes_in[b][e_in];
366 
367           // Each component
368           for (CeedInt c_out = 0; c_out < num_components; c_out++) {
369             // Each qpt/node pair
370             for (CeedInt q = 0; q < num_qpts; q++) {
371               if (is_pointblock) {
372                 // Point Block Diagonal
373                 for (CeedInt c_in = 0; c_in < num_components; c_in++) {
374                   const CeedSize c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
375                   const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
376                   for (CeedInt n = 0; n < num_nodes; n++) {
377                     elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] +=
378                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
379                   }
380                 }
381               } else {
382                 // Diagonal Only
383                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
384                 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
385                 for (CeedInt n = 0; n < num_nodes; n++) {
386                   elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
387                 }
388               }
389             }
390           }
391         }
392       }
393     }
394     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
395 
396     // Assemble local operator diagonal
397     CeedCall(CeedElemRestrictionApplyUnsigned(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
398 
399     // Cleanup
400     if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
401     CeedCall(CeedVectorDestroy(&elem_diag));
402     CeedCall(CeedFree(&identity));
403   }
404   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
405   CeedCall(CeedVectorDestroy(&assembled_qf));
406 
407   return CEED_ERROR_SUCCESS;
408 }
409 
410 /**
411   @brief Core logic for assembling composite operator diagonal
412 
413   @param[in]  op            CeedOperator to assemble point block diagonal
414   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
415   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
416   @param[out] assembled     CeedVector to store assembled diagonal
417 
418   @return An error code: 0 - success, otherwise - failure
419 
420   @ref Developer
421 **/
422 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
423                                                                  CeedVector assembled) {
424   CeedInt       num_sub;
425   CeedOperator *suboperators;
426   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
427   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
428   for (CeedInt i = 0; i < num_sub; i++) {
429     if (is_pointblock) {
430       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
431     } else {
432       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
433     }
434   }
435   return CEED_ERROR_SUCCESS;
436 }
437 
438 /**
439   @brief Build nonzero pattern for non-composite operator
440 
441   Users should generally use CeedOperatorLinearAssembleSymbolic()
442 
443   Note: For operators using oriented element restrictions, entries in rows or cols may be negative indicating the assembled value at this nonzero
444 should be negated
445 
446   @param[in]  op     CeedOperator to assemble nonzero pattern
447   @param[in]  offset Offset for number of entries
448   @param[out] rows   Row number for each entry
449   @param[out] cols   Column number for each entry
450 
451   @return An error code: 0 - success, otherwise - failure
452 
453   @ref Developer
454 **/
455 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
456   Ceed ceed;
457   bool is_composite;
458   CeedCall(CeedOperatorGetCeed(op, &ceed));
459   CeedCall(CeedOperatorIsComposite(op, &is_composite));
460 
461   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
462 
463   CeedSize num_nodes;
464   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
465   CeedElemRestriction rstr_in;
466   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
467   CeedInt num_elem, elem_size, num_comp;
468   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
469   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
470   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
471   CeedInt layout_er[3];
472   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
473 
474   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
475 
476   // Determine elem_dof relation
477   CeedVector index_vec;
478   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
479   CeedScalar *array;
480   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
481   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
482   CeedCall(CeedVectorRestoreArray(index_vec, &array));
483   CeedVector elem_dof;
484   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
485   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
486   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
487   const CeedScalar *elem_dof_a;
488   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
489   CeedCall(CeedVectorDestroy(&index_vec));
490 
491   // Determine i, j locations for element matrices
492   CeedSize count = 0;
493   for (CeedInt e = 0; e < num_elem; e++) {
494     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
495       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
496         for (CeedInt i = 0; i < elem_size; i++) {
497           for (CeedInt j = 0; j < elem_size; j++) {
498             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
499             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
500 
501             const CeedInt row = elem_dof_a[elem_dof_index_row];
502             const CeedInt col = elem_dof_a[elem_dof_index_col];
503 
504             rows[offset + count] = row;
505             cols[offset + count] = col;
506             count++;
507           }
508         }
509       }
510     }
511   }
512   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
513   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
514   CeedCall(CeedVectorDestroy(&elem_dof));
515 
516   return CEED_ERROR_SUCCESS;
517 }
518 
519 /**
520   @brief Assemble nonzero entries for non-composite operator
521 
522   Users should generally use CeedOperatorLinearAssemble()
523 
524   @param[in]  op     CeedOperator to assemble
525   @param[in]  offset Offset for number of entries
526   @param[out] values Values to assemble into matrix
527 
528   @return An error code: 0 - success, otherwise - failure
529 
530   @ref Developer
531 **/
532 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
533   Ceed ceed;
534   bool is_composite;
535   CeedCall(CeedOperatorGetCeed(op, &ceed));
536   CeedCall(CeedOperatorIsComposite(op, &is_composite));
537 
538   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
539 
540   // Early exit for empty operator
541   {
542     CeedInt num_elem = 0;
543 
544     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
545     if (num_elem == 0) return CEED_ERROR_SUCCESS;
546   }
547 
548   if (op->LinearAssembleSingle) {
549     // Backend version
550     CeedCall(op->LinearAssembleSingle(op, offset, values));
551     return CEED_ERROR_SUCCESS;
552   } else {
553     // Operator fallback
554     CeedOperator op_fallback;
555 
556     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
557     if (op_fallback) {
558       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
559       return CEED_ERROR_SUCCESS;
560     }
561   }
562 
563   // Assemble QFunction
564   CeedQFunction qf;
565   CeedCall(CeedOperatorGetQFunction(op, &qf));
566   CeedVector          assembled_qf = NULL;
567   CeedElemRestriction rstr_q       = NULL;
568   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
569   CeedSize qf_length;
570   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
571 
572   CeedInt            num_input_fields, num_output_fields;
573   CeedOperatorField *input_fields;
574   CeedOperatorField *output_fields;
575   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
576 
577   // Get assembly data
578   CeedOperatorAssemblyData data;
579   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
580   const CeedEvalMode **eval_modes_in, **eval_modes_out;
581   CeedInt             *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
582   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out,
583                                                 &eval_modes_out, NULL, NULL));
584   CeedBasis *bases;
585   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL));
586   CeedBasis basis_in = bases[0];
587 
588   CeedCheck(num_active_bases == 1, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases");
589   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
590 
591   CeedElemRestriction active_rstr;
592   CeedInt             num_elem, elem_size, num_qpts, num_comp;
593   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
594   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
595   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
596   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
597   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
598 
599   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
600 
601   // loop over elements and put in data structure
602   const CeedScalar *assembled_qf_array;
603   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
604 
605   CeedInt layout_qf[3];
606   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
607   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
608 
609   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
610   const CeedScalar **B_mats_in, **B_mats_out;
611   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out));
612   const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0];
613   CeedScalar        BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]];
614   CeedScalar        elem_mat[elem_size * elem_size];
615   CeedSize          count = 0;
616   CeedScalar       *vals;
617   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
618   for (CeedSize e = 0; e < num_elem; e++) {
619     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
620       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
621         // Compute B^T*D
622         for (CeedSize n = 0; n < elem_size; n++) {
623           for (CeedSize q = 0; q < num_qpts; q++) {
624             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
625               const CeedSize btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in);
626               CeedScalar     sum       = 0.0;
627               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
628                 const CeedSize b_out_index     = (num_eval_modes_out[0] * q + e_out) * elem_size + n;
629                 const CeedSize eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out;
630                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
631                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
632               }
633               BTD_mat[btd_index] = sum;
634             }
635           }
636         }
637         // form element matrix itself (for each block component)
638         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0]));
639 
640         // put element matrix in coordinate data structure
641         for (CeedInt i = 0; i < elem_size; i++) {
642           for (CeedInt j = 0; j < elem_size; j++) {
643             vals[offset + count] = elem_mat[i * elem_size + j];
644             count++;
645           }
646         }
647       }
648     }
649   }
650   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries");
651   CeedCall(CeedVectorRestoreArray(values, &vals));
652 
653   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
654   CeedCall(CeedVectorDestroy(&assembled_qf));
655 
656   return CEED_ERROR_SUCCESS;
657 }
658 
659 /**
660   @brief Count number of entries for assembled CeedOperator
661 
662   @param[in]  op          CeedOperator to assemble
663   @param[out] num_entries Number of entries in assembled representation
664 
665   @return An error code: 0 - success, otherwise - failure
666 
667   @ref Utility
668 **/
669 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) {
670   bool                is_composite;
671   CeedElemRestriction rstr;
672   CeedInt             num_elem, elem_size, num_comp;
673 
674   CeedCall(CeedOperatorIsComposite(op, &is_composite));
675   CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
676   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
677   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
678   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
679   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
680   *num_entries = (CeedSize)elem_size * num_comp * elem_size * num_comp * num_elem;
681 
682   return CEED_ERROR_SUCCESS;
683 }
684 
685 /**
686   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
687 
688   @param[in]  op_fine      Fine grid operator
689   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
690   @param[in]  rstr_coarse  Coarse grid restriction
691   @param[in]  basis_coarse Coarse grid active vector basis
692   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
693   @param[out] op_coarse    Coarse grid operator
694   @param[out] op_prolong   Coarse to fine operator, or NULL
695   @param[out] op_restrict  Fine to coarse operator, or NULL
696 
697   @return An error code: 0 - success, otherwise - failure
698 
699   @ref Developer
700 **/
701 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
702                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
703   Ceed                ceed;
704   CeedVector          mult_vec         = NULL;
705   CeedElemRestriction rstr_p_mult_fine = NULL;
706   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
707 
708   // Check for composite operator
709   bool is_composite;
710   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
711   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
712 
713   // Coarse Grid
714   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
715   CeedElemRestriction rstr_fine = NULL;
716   // -- Clone input fields
717   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
718     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
719       rstr_fine = op_fine->input_fields[i]->elem_rstr;
720       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
721     } else {
722       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
723                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
724     }
725   }
726   // -- Clone output fields
727   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
728     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
729       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
730     } else {
731       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
732                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
733     }
734   }
735   // -- Clone QFunctionAssemblyData
736   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
737 
738   // Multiplicity vector
739   if (op_restrict || op_prolong) {
740     CeedVector mult_e_vec;
741 
742     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
743     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
744     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
745     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
746     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
747     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
748     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
749     CeedCall(CeedVectorDestroy(&mult_e_vec));
750     CeedCall(CeedVectorReciprocal(mult_vec));
751   }
752 
753   // Clone name
754   bool   has_name = op_fine->name;
755   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
756   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
757 
758   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
759   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
760             "Prolongation or restriction operator creation requires coarse-to-fine basis");
761 
762   // Restriction/Prolongation Operators
763   CeedInt num_comp;
764   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
765 
766   // Restriction
767   if (op_restrict) {
768     CeedInt             *num_comp_r_data;
769     CeedQFunction        qf_restrict;
770     CeedQFunctionContext ctx_r;
771 
772     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
773     CeedCall(CeedCalloc(1, &num_comp_r_data));
774     num_comp_r_data[0] = num_comp;
775     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
776     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
777     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
778     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
779     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
780     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
781     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
782     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
783 
784     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
785     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
786     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec));
787     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
788 
789     // Set name
790     char *restriction_name;
791     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
792     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
793     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
794     CeedCall(CeedFree(&restriction_name));
795 
796     // Check
797     CeedCall(CeedOperatorCheckReady(*op_restrict));
798 
799     // Cleanup
800     CeedCall(CeedQFunctionDestroy(&qf_restrict));
801   }
802 
803   // Prolongation
804   if (op_prolong) {
805     CeedInt             *num_comp_p_data;
806     CeedQFunction        qf_prolong;
807     CeedQFunctionContext ctx_p;
808 
809     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
810     CeedCall(CeedCalloc(1, &num_comp_p_data));
811     num_comp_p_data[0] = num_comp;
812     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
813     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
814     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
815     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
816     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
817     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
818     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
819     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
820 
821     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
822     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
823     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec));
824     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
825 
826     // Set name
827     char *prolongation_name;
828     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
829     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
830     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
831     CeedCall(CeedFree(&prolongation_name));
832 
833     // Check
834     CeedCall(CeedOperatorCheckReady(*op_prolong));
835 
836     // Cleanup
837     CeedCall(CeedQFunctionDestroy(&qf_prolong));
838   }
839 
840   // Check
841   CeedCall(CeedOperatorCheckReady(*op_coarse));
842 
843   // Cleanup
844   CeedCall(CeedVectorDestroy(&mult_vec));
845   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
846   CeedCall(CeedBasisDestroy(&basis_c_to_f));
847 
848   return CEED_ERROR_SUCCESS;
849 }
850 
851 /**
852   @brief Build 1D mass matrix and Laplacian with perturbation
853 
854   @param[in]  interp_1d   Interpolation matrix in one dimension
855   @param[in]  grad_1d     Gradient matrix in one dimension
856   @param[in]  q_weight_1d Quadrature weights in one dimension
857   @param[in]  P_1d        Number of basis nodes in one dimension
858   @param[in]  Q_1d        Number of quadrature points in one dimension
859   @param[in]  dim         Dimension of basis
860   @param[out] mass        Assembled mass matrix in one dimension
861   @param[out] laplace     Assembled perturbed Laplacian in one dimension
862 
863   @return An error code: 0 - success, otherwise - failure
864 
865   @ref Developer
866 **/
867 CeedPragmaOptimizeOff
868 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
869                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
870   for (CeedInt i = 0; i < P_1d; i++) {
871     for (CeedInt j = 0; j < P_1d; j++) {
872       CeedScalar sum = 0.0;
873       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
874       mass[i + j * P_1d] = sum;
875     }
876   }
877   // -- Laplacian
878   for (CeedInt i = 0; i < P_1d; i++) {
879     for (CeedInt j = 0; j < P_1d; j++) {
880       CeedScalar sum = 0.0;
881       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
882       laplace[i + j * P_1d] = sum;
883     }
884   }
885   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
886   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
887   return CEED_ERROR_SUCCESS;
888 }
889 CeedPragmaOptimizeOn
890 
891 /// @}
892 
893 /// ----------------------------------------------------------------------------
894 /// CeedOperator Backend API
895 /// ----------------------------------------------------------------------------
896 /// @addtogroup CeedOperatorBackend
897 /// @{
898 
899 /**
900   @brief Create object holding CeedQFunction assembly data for CeedOperator
901 
902   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
903   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
904 
905   @return An error code: 0 - success, otherwise - failure
906 
907   @ref Backend
908 **/
909 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
910   CeedCall(CeedCalloc(1, data));
911   (*data)->ref_count = 1;
912   (*data)->ceed      = ceed;
913   CeedCall(CeedReference(ceed));
914 
915   return CEED_ERROR_SUCCESS;
916 }
917 
918 /**
919   @brief Increment the reference counter for a CeedQFunctionAssemblyData
920 
921   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
922 
923   @return An error code: 0 - success, otherwise - failure
924 
925   @ref Backend
926 **/
927 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
928   data->ref_count++;
929   return CEED_ERROR_SUCCESS;
930 }
931 
932 /**
933   @brief Set re-use of CeedQFunctionAssemblyData
934 
935   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
936   @param[in]     reuse_data Boolean flag indicating data re-use
937 
938   @return An error code: 0 - success, otherwise - failure
939 
940   @ref Backend
941 **/
942 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
943   data->reuse_data        = reuse_data;
944   data->needs_data_update = true;
945   return CEED_ERROR_SUCCESS;
946 }
947 
948 /**
949   @brief Mark QFunctionAssemblyData as stale
950 
951   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
952   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
953 
954   @return An error code: 0 - success, otherwise - failure
955 
956   @ref Backend
957 **/
958 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
959   data->needs_data_update = needs_data_update;
960   return CEED_ERROR_SUCCESS;
961 }
962 
963 /**
964   @brief Determine if QFunctionAssemblyData needs update
965 
966   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
967   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
968 
969   @return An error code: 0 - success, otherwise - failure
970 
971   @ref Backend
972 **/
973 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
974   *is_update_needed = !data->reuse_data || data->needs_data_update;
975   return CEED_ERROR_SUCCESS;
976 }
977 
978 /**
979   @brief Copy the pointer to a CeedQFunctionAssemblyData.
980 
981   Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
982 
983   Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
984         CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
985         CeedQFunctionAssemblyData.
986 
987   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
988   @param[in,out] data_copy Variable to store copied reference
989 
990   @return An error code: 0 - success, otherwise - failure
991 
992   @ref Backend
993 **/
994 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
995   CeedCall(CeedQFunctionAssemblyDataReference(data));
996   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
997   *data_copy = data;
998   return CEED_ERROR_SUCCESS;
999 }
1000 
1001 /**
1002   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1003 
1004   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1005   @param[out] is_setup Boolean flag for setup status
1006 
1007   @return An error code: 0 - success, otherwise - failure
1008 
1009   @ref Backend
1010 **/
1011 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1012   *is_setup = data->is_setup;
1013   return CEED_ERROR_SUCCESS;
1014 }
1015 
1016 /**
1017   @brief Set internal objects for CeedQFunctionAssemblyData
1018 
1019   @param[in,out] data CeedQFunctionAssemblyData to set objects
1020   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1021   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1022 
1023   @return An error code: 0 - success, otherwise - failure
1024 
1025   @ref Backend
1026 **/
1027 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1028   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1029   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1030 
1031   data->is_setup = true;
1032   return CEED_ERROR_SUCCESS;
1033 }
1034 
1035 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1036   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1037 
1038   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1039   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1040 
1041   return CEED_ERROR_SUCCESS;
1042 }
1043 
1044 /**
1045   @brief Destroy CeedQFunctionAssemblyData
1046 
1047   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1048 
1049   @return An error code: 0 - success, otherwise - failure
1050 
1051   @ref Backend
1052 **/
1053 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1054   if (!*data || --(*data)->ref_count > 0) {
1055     *data = NULL;
1056     return CEED_ERROR_SUCCESS;
1057   }
1058   CeedCall(CeedDestroy(&(*data)->ceed));
1059   CeedCall(CeedVectorDestroy(&(*data)->vec));
1060   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1061 
1062   CeedCall(CeedFree(data));
1063   return CEED_ERROR_SUCCESS;
1064 }
1065 
1066 /**
1067   @brief Get CeedOperatorAssemblyData
1068 
1069   @param[in]  op   CeedOperator to assemble
1070   @param[out] data CeedQFunctionAssemblyData
1071 
1072   @return An error code: 0 - success, otherwise - failure
1073 
1074   @ref Backend
1075 **/
1076 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1077   if (!op->op_assembled) {
1078     CeedOperatorAssemblyData data;
1079 
1080     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1081     op->op_assembled = data;
1082   }
1083   *data = op->op_assembled;
1084 
1085   return CEED_ERROR_SUCCESS;
1086 }
1087 
1088 /**
1089   @brief Create object holding CeedOperator assembly data.
1090 
1091   The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1092   An array with references to the corresponding active CeedElemRestrictions is also stored.
1093   For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1094   The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1095 CeedEvalMode.
1096   The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1097   Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1098 
1099   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1100   @param[in]  op   CeedOperator to be assembled
1101   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1102 
1103   @return An error code: 0 - success, otherwise - failure
1104 
1105   @ref Backend
1106 **/
1107 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1108   CeedInt num_active_bases = 0;
1109 
1110   // Allocate
1111   CeedCall(CeedCalloc(1, data));
1112   (*data)->ceed = ceed;
1113   CeedCall(CeedReference(ceed));
1114 
1115   // Build OperatorAssembly data
1116   CeedQFunction       qf;
1117   CeedQFunctionField *qf_fields;
1118   CeedOperatorField  *op_fields;
1119   CeedInt             num_input_fields;
1120   CeedCall(CeedOperatorGetQFunction(op, &qf));
1121   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1122   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1123 
1124   // Determine active input basis
1125   CeedInt       *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0;
1126   CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL;
1127   CeedSize     **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1128   for (CeedInt i = 0; i < num_input_fields; i++) {
1129     CeedVector vec;
1130     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1131     if (vec == CEED_VECTOR_ACTIVE) {
1132       CeedBasis    basis_in = NULL;
1133       CeedEvalMode eval_mode;
1134       CeedInt      index = -1, dim, num_comp, q_comp;
1135       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1136       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1137       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1138       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1139       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1140       for (CeedInt i = 0; i < num_active_bases; i++) {
1141         if ((*data)->active_bases[i] == basis_in) index = i;
1142       }
1143       if (index == -1) {
1144         CeedElemRestriction elem_rstr_in;
1145         index = num_active_bases;
1146         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1147         (*data)->active_bases[num_active_bases] = NULL;
1148         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases]));
1149         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1150         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1151         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1152         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases]));
1153         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1154         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1155         num_eval_modes_in[index]  = 0;
1156         num_eval_modes_out[index] = 0;
1157         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1158         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1159         eval_modes_in[index]  = NULL;
1160         eval_modes_out[index] = NULL;
1161         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1162         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1163         eval_mode_offsets_in[index]  = NULL;
1164         eval_mode_offsets_out[index] = NULL;
1165         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1166         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1167         (*data)->assembled_bases_in[index]  = NULL;
1168         (*data)->assembled_bases_out[index] = NULL;
1169         num_active_bases++;
1170       }
1171       if (eval_mode != CEED_EVAL_WEIGHT) {
1172         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1173         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1174         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1175         for (CeedInt d = 0; d < q_comp; d++) {
1176           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1177           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1178           offset += num_comp;
1179         }
1180         num_eval_modes_in[index] += q_comp;
1181       }
1182     }
1183   }
1184   (*data)->num_eval_modes_in    = num_eval_modes_in;
1185   (*data)->eval_modes_in        = eval_modes_in;
1186   (*data)->eval_mode_offsets_in = eval_mode_offsets_in;
1187 
1188   // Determine active output basis
1189   CeedInt num_output_fields;
1190   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1191   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1192   offset = 0;
1193   for (CeedInt i = 0; i < num_output_fields; i++) {
1194     CeedVector vec;
1195     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1196     if (vec == CEED_VECTOR_ACTIVE) {
1197       CeedBasis    basis_out = NULL;
1198       CeedEvalMode eval_mode;
1199       CeedInt      index = -1, dim, num_comp, q_comp;
1200       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1201       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1202       CeedCall(CeedBasisGetDimension(basis_out, &dim));
1203       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1204       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1205       for (CeedInt i = 0; i < num_active_bases; i++) {
1206         if ((*data)->active_bases[i] == basis_out) index = i;
1207       }
1208       if (index == -1) {
1209         CeedElemRestriction elem_rstr_out;
1210 
1211         index = num_active_bases;
1212         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1213         (*data)->active_bases[num_active_bases] = NULL;
1214         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases]));
1215         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1216         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1217         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1218         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases]));
1219         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1220         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1221         num_eval_modes_in[index]  = 0;
1222         num_eval_modes_out[index] = 0;
1223         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1224         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1225         eval_modes_in[index]  = NULL;
1226         eval_modes_out[index] = NULL;
1227         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1228         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1229         eval_mode_offsets_in[index]  = NULL;
1230         eval_mode_offsets_out[index] = NULL;
1231         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1232         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1233         (*data)->assembled_bases_in[index]  = NULL;
1234         (*data)->assembled_bases_out[index] = NULL;
1235         num_active_bases++;
1236       }
1237       if (eval_mode != CEED_EVAL_WEIGHT) {
1238         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1239         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1240         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1241         for (CeedInt d = 0; d < q_comp; d++) {
1242           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1243           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1244           offset += num_comp;
1245         }
1246         num_eval_modes_out[index] += q_comp;
1247       }
1248     }
1249   }
1250   (*data)->num_output_components = offset;
1251   (*data)->num_eval_modes_out    = num_eval_modes_out;
1252   (*data)->eval_modes_out        = eval_modes_out;
1253   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1254   (*data)->num_active_bases      = num_active_bases;
1255 
1256   return CEED_ERROR_SUCCESS;
1257 }
1258 
1259 /**
1260   @brief Get CeedOperator CeedEvalModes for assembly.
1261 
1262   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1263 
1264   @param[in]  data                  CeedOperatorAssemblyData
1265   @param[out] num_active_bases      Total number of active bases
1266   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1267                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1268   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1269   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1270   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1271   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1272   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1273   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1274                                       including contributions of all active bases
1275 
1276   @return An error code: 0 - success, otherwise - failure
1277 
1278 
1279   @ref Backend
1280 **/
1281 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in,
1282                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out,
1283                                          const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) {
1284   if (num_active_bases) *num_active_bases = data->num_active_bases;
1285   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1286   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1287   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1288   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1289   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1290   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1291   if (num_output_components) *num_output_components = data->num_output_components;
1292 
1293   return CEED_ERROR_SUCCESS;
1294 }
1295 
1296 /**
1297   @brief Get CeedOperator CeedBasis data for assembly.
1298 
1299   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1300 
1301   @param[in]  data                CeedOperatorAssemblyData
1302   @param[out] num_active_bases    Number of active bases, or NULL
1303   @param[out] active_bases        Pointer to hold active CeedBasis, or NULL
1304   @param[out] assembled_bases_in  Pointer to hold assembled active input B, or NULL
1305   @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL
1306 
1307   @return An error code: 0 - success, otherwise - failure
1308 
1309   @ref Backend
1310 **/
1311 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases,
1312                                      const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) {
1313   // Assemble B_in, B_out if needed
1314   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1315     CeedInt num_qpts;
1316 
1317     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1318     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1319       CeedInt     num_nodes;
1320       CeedScalar *B_in = NULL, *identity = NULL;
1321       bool        has_eval_none = false;
1322 
1323       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1324       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1325 
1326       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1327         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1328       }
1329       if (has_eval_none) {
1330         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1331         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1332           identity[i * num_nodes + i] = 1.0;
1333         }
1334       }
1335 
1336       for (CeedInt q = 0; q < num_qpts; q++) {
1337         for (CeedInt n = 0; n < num_nodes; n++) {
1338           CeedInt      d_in              = 0, q_comp_in;
1339           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1340           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1341             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1342             const CeedScalar *B  = NULL;
1343             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B);
1344             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in));
1345             if (q_comp_in > 1) {
1346               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1347               else B = &B[(++d_in) * num_qpts * num_nodes];
1348             }
1349             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1350             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1351           }
1352         }
1353       }
1354       if (identity) CeedCall(CeedFree(identity));
1355       data->assembled_bases_in[b] = B_in;
1356     }
1357   }
1358 
1359   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1360     CeedInt num_qpts;
1361 
1362     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1363     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1364       CeedInt     num_nodes;
1365       bool        has_eval_none = false;
1366       CeedScalar *B_out = NULL, *identity = NULL;
1367 
1368       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1369       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1370 
1371       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1372         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1373       }
1374       if (has_eval_none) {
1375         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1376         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1377           identity[i * num_nodes + i] = 1.0;
1378         }
1379       }
1380 
1381       for (CeedInt q = 0; q < num_qpts; q++) {
1382         for (CeedInt n = 0; n < num_nodes; n++) {
1383           CeedInt      d_out              = 0, q_comp_out;
1384           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1385           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1386             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1387             const CeedScalar *B  = NULL;
1388             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B);
1389             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out));
1390             if (q_comp_out > 1) {
1391               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1392               else B = &B[(++d_out) * num_qpts * num_nodes];
1393             }
1394             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1395             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1396           }
1397         }
1398       }
1399       if (identity) CeedCall(CeedFree(identity));
1400       data->assembled_bases_out[b] = B_out;
1401     }
1402   }
1403 
1404   // Pass out assembled data
1405   if (active_bases) *active_bases = data->active_bases;
1406   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1407   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1408 
1409   return CEED_ERROR_SUCCESS;
1410 }
1411 
1412 /**
1413   @brief Get CeedOperator CeedBasis data for assembly.
1414 
1415   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1416 
1417   @param[in]  data                  CeedOperatorAssemblyData
1418   @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL
1419   @param[out] active_elem_rstrs     Pointer to hold active CeedElemRestrictions, or NULL
1420 
1421   @return An error code: 0 - success, otherwise - failure
1422 
1423   @ref Backend
1424 **/
1425 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs,
1426                                                 CeedElemRestriction **active_elem_rstrs) {
1427   if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases;
1428   if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs;
1429 
1430   return CEED_ERROR_SUCCESS;
1431 }
1432 
1433 /**
1434   @brief Destroy CeedOperatorAssemblyData
1435 
1436   @param[in,out] data CeedOperatorAssemblyData to destroy
1437 
1438   @return An error code: 0 - success, otherwise - failure
1439 
1440   @ref Backend
1441 **/
1442 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1443   if (!*data) {
1444     *data = NULL;
1445     return CEED_ERROR_SUCCESS;
1446   }
1447   CeedCall(CeedDestroy(&(*data)->ceed));
1448   for (CeedInt b = 0; b < (*data)->num_active_bases; b++) {
1449     CeedCall(CeedBasisDestroy(&(*data)->active_bases[b]));
1450     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b]));
1451     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1452     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1453     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1454     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1455     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1456     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1457   }
1458   CeedCall(CeedFree(&(*data)->active_bases));
1459   CeedCall(CeedFree(&(*data)->active_elem_rstrs));
1460   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1461   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1462   CeedCall(CeedFree(&(*data)->eval_modes_in));
1463   CeedCall(CeedFree(&(*data)->eval_modes_out));
1464   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1465   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1466   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1467   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1468 
1469   CeedCall(CeedFree(data));
1470   return CEED_ERROR_SUCCESS;
1471 }
1472 
1473 /// @}
1474 
1475 /// ----------------------------------------------------------------------------
1476 /// CeedOperator Public API
1477 /// ----------------------------------------------------------------------------
1478 /// @addtogroup CeedOperatorUser
1479 /// @{
1480 
1481 /**
1482   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1483 
1484   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1485   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1486 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1487 
1488   Inputs and outputs are in the order provided by the user when adding CeedOperator fields.
1489   For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction
1490 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1491 
1492   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1493 
1494   @param[in]  op        CeedOperator to assemble CeedQFunction
1495   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1496   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1497   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1498 
1499   @return An error code: 0 - success, otherwise - failure
1500 
1501   @ref User
1502 **/
1503 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1504   CeedCall(CeedOperatorCheckReady(op));
1505 
1506   if (op->LinearAssembleQFunction) {
1507     // Backend version
1508     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1509   } else {
1510     // Operator fallback
1511     CeedOperator op_fallback;
1512 
1513     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1514     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1515     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1516   }
1517   return CEED_ERROR_SUCCESS;
1518 }
1519 
1520 /**
1521   @brief Assemble CeedQFunction and store result internally.
1522 
1523   Return copied references of stored data to the caller.
1524   Caller is responsible for ownership and destruction of the copied references.
1525   See also @ref CeedOperatorLinearAssembleQFunction
1526 
1527   Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers.
1528         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
1529 
1530   @param[in]  op        CeedOperator to assemble CeedQFunction
1531   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1532   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1533   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1534 
1535   @return An error code: 0 - success, otherwise - failure
1536 
1537   @ref User
1538 **/
1539 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1540   CeedCall(CeedOperatorCheckReady(op));
1541 
1542   if (op->LinearAssembleQFunctionUpdate) {
1543     // Backend version
1544     bool                qf_assembled_is_setup;
1545     CeedVector          assembled_vec  = NULL;
1546     CeedElemRestriction assembled_rstr = NULL;
1547 
1548     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1549     if (qf_assembled_is_setup) {
1550       bool update_needed;
1551 
1552       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1553       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1554       if (update_needed) CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1555     } else {
1556       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1557       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1558     }
1559     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1560 
1561     // Copy reference from internally held copy
1562     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1563     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1564     CeedCall(CeedVectorDestroy(&assembled_vec));
1565     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1566   } else {
1567     // Operator fallback
1568     CeedOperator op_fallback;
1569 
1570     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1571     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1572     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1573   }
1574 
1575   return CEED_ERROR_SUCCESS;
1576 }
1577 
1578 /**
1579   @brief Assemble the diagonal of a square linear CeedOperator
1580 
1581   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1582 
1583   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1584 
1585   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1586 
1587   @param[in]  op        CeedOperator to assemble CeedQFunction
1588   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1589   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1590 
1591   @return An error code: 0 - success, otherwise - failure
1592 
1593   @ref User
1594 **/
1595 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1596   bool is_composite;
1597   CeedCall(CeedOperatorCheckReady(op));
1598   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1599 
1600   CeedSize input_size = 0, output_size = 0;
1601   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1602   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1603 
1604   // Early exit for empty operator
1605   if (!is_composite) {
1606     CeedInt num_elem = 0;
1607 
1608     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1609     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1610   }
1611 
1612   if (op->LinearAssembleDiagonal) {
1613     // Backend version
1614     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1615     return CEED_ERROR_SUCCESS;
1616   } else if (op->LinearAssembleAddDiagonal) {
1617     // Backend version with zeroing first
1618     CeedCall(CeedVectorSetValue(assembled, 0.0));
1619     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1620     return CEED_ERROR_SUCCESS;
1621   } else {
1622     // Operator fallback
1623     CeedOperator op_fallback;
1624 
1625     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1626     if (op_fallback) {
1627       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1628       return CEED_ERROR_SUCCESS;
1629     }
1630   }
1631   // Default interface implementation
1632   CeedCall(CeedVectorSetValue(assembled, 0.0));
1633   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1634 
1635   return CEED_ERROR_SUCCESS;
1636 }
1637 
1638 /**
1639   @brief Assemble the diagonal of a square linear CeedOperator
1640 
1641   This sums into a CeedVector the diagonal of a linear CeedOperator.
1642 
1643   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1644 
1645   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1646 
1647   @param[in]  op        CeedOperator to assemble CeedQFunction
1648   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1649   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1650 
1651   @return An error code: 0 - success, otherwise - failure
1652 
1653   @ref User
1654 **/
1655 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1656   bool is_composite;
1657   CeedCall(CeedOperatorCheckReady(op));
1658   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1659 
1660   CeedSize input_size = 0, output_size = 0;
1661   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1662   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1663 
1664   // Early exit for empty operator
1665   if (!is_composite) {
1666     CeedInt num_elem = 0;
1667 
1668     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1669     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1670   }
1671 
1672   if (op->LinearAssembleAddDiagonal) {
1673     // Backend version
1674     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1675     return CEED_ERROR_SUCCESS;
1676   } else {
1677     // Operator fallback
1678     CeedOperator op_fallback;
1679 
1680     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1681     if (op_fallback) {
1682       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1683       return CEED_ERROR_SUCCESS;
1684     }
1685   }
1686   // Default interface implementation
1687   if (is_composite) {
1688     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1689   } else {
1690     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1691   }
1692 
1693   return CEED_ERROR_SUCCESS;
1694 }
1695 
1696 /**
1697   @brief Assemble the point block diagonal of a square linear CeedOperator
1698 
1699   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1700 
1701   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1702 
1703   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1704 
1705   @param[in]  op        CeedOperator to assemble CeedQFunction
1706   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1707 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1708 component in].
1709   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1710 
1711   @return An error code: 0 - success, otherwise - failure
1712 
1713   @ref User
1714 **/
1715 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1716   bool is_composite;
1717   CeedCall(CeedOperatorCheckReady(op));
1718   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1719 
1720   CeedSize input_size = 0, output_size = 0;
1721   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1722   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1723 
1724   // Early exit for empty operator
1725   if (!is_composite) {
1726     CeedInt num_elem = 0;
1727 
1728     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1729     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1730   }
1731 
1732   if (op->LinearAssemblePointBlockDiagonal) {
1733     // Backend version
1734     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1735     return CEED_ERROR_SUCCESS;
1736   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1737     // Backend version with zeroing first
1738     CeedCall(CeedVectorSetValue(assembled, 0.0));
1739     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1740     return CEED_ERROR_SUCCESS;
1741   } else {
1742     // Operator fallback
1743     CeedOperator op_fallback;
1744 
1745     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1746     if (op_fallback) {
1747       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1748       return CEED_ERROR_SUCCESS;
1749     }
1750   }
1751   // Default interface implementation
1752   CeedCall(CeedVectorSetValue(assembled, 0.0));
1753   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1754 
1755   return CEED_ERROR_SUCCESS;
1756 }
1757 
1758 /**
1759   @brief Assemble the point block diagonal of a square linear CeedOperator
1760 
1761   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1762 
1763   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1764 
1765   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1766 
1767   @param[in]  op        CeedOperator to assemble CeedQFunction
1768   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1769 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1770 component in].
1771   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1772 
1773   @return An error code: 0 - success, otherwise - failure
1774 
1775   @ref User
1776 **/
1777 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1778   bool is_composite;
1779   CeedCall(CeedOperatorCheckReady(op));
1780   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1781 
1782   CeedSize input_size = 0, output_size = 0;
1783   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1784   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1785 
1786   // Early exit for empty operator
1787   if (!is_composite) {
1788     CeedInt num_elem = 0;
1789 
1790     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1791     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1792   }
1793 
1794   if (op->LinearAssembleAddPointBlockDiagonal) {
1795     // Backend version
1796     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1797     return CEED_ERROR_SUCCESS;
1798   } else {
1799     // Operator fallback
1800     CeedOperator op_fallback;
1801 
1802     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1803     if (op_fallback) {
1804       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1805       return CEED_ERROR_SUCCESS;
1806     }
1807   }
1808   // Default interface implementation
1809   if (is_composite) {
1810     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1811   } else {
1812     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1813   }
1814 
1815   return CEED_ERROR_SUCCESS;
1816 }
1817 
1818 /**
1819    @brief Fully assemble the nonzero pattern of a linear operator.
1820 
1821    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1822 
1823    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1824 matrix in entry (i, j).
1825   Note that the (i, j) pairs are not unique and may repeat.
1826   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1827 
1828    This will generally be slow unless your operator is low-order.
1829 
1830    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1831 
1832    @param[in]  op          CeedOperator to assemble
1833    @param[out] num_entries Number of entries in coordinate nonzero pattern
1834    @param[out] rows        Row number for each entry
1835    @param[out] cols        Column number for each entry
1836 
1837    @ref User
1838 **/
1839 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1840   CeedInt       num_suboperators;
1841   CeedSize      single_entries;
1842   CeedOperator *sub_operators;
1843   bool          is_composite;
1844   CeedCall(CeedOperatorCheckReady(op));
1845   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1846 
1847   if (op->LinearAssembleSymbolic) {
1848     // Backend version
1849     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1850     return CEED_ERROR_SUCCESS;
1851   } else {
1852     // Operator fallback
1853     CeedOperator op_fallback;
1854 
1855     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1856     if (op_fallback) {
1857       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1858       return CEED_ERROR_SUCCESS;
1859     }
1860   }
1861 
1862   // Default interface implementation
1863 
1864   // count entries and allocate rows, cols arrays
1865   *num_entries = 0;
1866   if (is_composite) {
1867     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1868     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1869     for (CeedInt k = 0; k < num_suboperators; ++k) {
1870       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1871       *num_entries += single_entries;
1872     }
1873   } else {
1874     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1875     *num_entries += single_entries;
1876   }
1877   CeedCall(CeedCalloc(*num_entries, rows));
1878   CeedCall(CeedCalloc(*num_entries, cols));
1879 
1880   // assemble nonzero locations
1881   CeedInt offset = 0;
1882   if (is_composite) {
1883     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1884     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1885     for (CeedInt k = 0; k < num_suboperators; ++k) {
1886       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1887       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1888       offset += single_entries;
1889     }
1890   } else {
1891     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1892   }
1893 
1894   return CEED_ERROR_SUCCESS;
1895 }
1896 
1897 /**
1898    @brief Fully assemble the nonzero entries of a linear operator.
1899 
1900    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1901 
1902    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1903 matrix in entry (i, j).
1904   Note that the (i, j) pairs are not unique and may repeat.
1905   This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1906 
1907    This will generally be slow unless your operator is low-order.
1908 
1909    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1910 
1911    @param[in]  op     CeedOperator to assemble
1912    @param[out] values Values to assemble into matrix
1913 
1914    @ref User
1915 **/
1916 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1917   CeedInt       num_suboperators;
1918   CeedSize      single_entries = 0;
1919   CeedOperator *sub_operators;
1920   bool          is_composite;
1921   CeedCall(CeedOperatorCheckReady(op));
1922   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1923 
1924   // Early exit for empty operator
1925   if (!is_composite) {
1926     CeedInt num_elem = 0;
1927 
1928     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1929     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1930   }
1931 
1932   if (op->LinearAssemble) {
1933     // Backend version
1934     CeedCall(op->LinearAssemble(op, values));
1935     return CEED_ERROR_SUCCESS;
1936   } else {
1937     // Operator fallback
1938     CeedOperator op_fallback;
1939 
1940     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1941     if (op_fallback) {
1942       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
1943       return CEED_ERROR_SUCCESS;
1944     }
1945   }
1946 
1947   // Default interface implementation
1948   CeedInt offset = 0;
1949   CeedCall(CeedVectorSetValue(values, 0.0));
1950   if (is_composite) {
1951     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1952     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1953     for (CeedInt k = 0; k < num_suboperators; k++) {
1954       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
1955       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1956       offset += single_entries;
1957     }
1958   } else {
1959     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
1960   }
1961 
1962   return CEED_ERROR_SUCCESS;
1963 }
1964 
1965 /**
1966   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
1967 
1968   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1969 
1970   @param[in]  op               Composite CeedOperator
1971   @param[in]  num_skip_indices Number of suboperators to skip
1972   @param[in]  skip_indices     Array of indices of suboperators to skip
1973   @param[out] mult             Vector to store multiplicity (of size l_size)
1974 
1975   @return An error code: 0 - success, otherwise - failure
1976 
1977   @ref User
1978 **/
1979 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
1980   CeedCall(CeedOperatorCheckReady(op));
1981 
1982   Ceed                ceed;
1983   CeedInt             num_suboperators;
1984   CeedSize            l_vec_len;
1985   CeedScalar         *mult_array;
1986   CeedVector          ones_l_vec;
1987   CeedElemRestriction elem_rstr;
1988   CeedOperator       *sub_operators;
1989 
1990   CeedCall(CeedOperatorGetCeed(op, &ceed));
1991 
1992   // Zero mult vector
1993   CeedCall(CeedVectorSetValue(mult, 0.0));
1994 
1995   // Get suboperators
1996   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1997   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1998   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
1999 
2000   // Work vector
2001   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2002   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2003   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2004   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2005 
2006   // Compute multiplicity across suboperators
2007   for (CeedInt i = 0; i < num_suboperators; i++) {
2008     const CeedScalar *sub_mult_array;
2009     CeedVector        sub_mult_l_vec, ones_e_vec;
2010 
2011     // -- Check for suboperator to skip
2012     for (CeedInt j = 0; j < num_skip_indices; j++) {
2013       if (skip_indices[j] == i) continue;
2014     }
2015 
2016     // -- Sub operator multiplicity
2017     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2018     CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2019     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2020     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2021     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2022     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2023     // ---- Flag every node present in the current suboperator
2024     for (CeedInt j = 0; j < l_vec_len; j++) {
2025       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2026     }
2027     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2028     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2029     CeedCall(CeedVectorDestroy(&ones_e_vec));
2030   }
2031   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2032   CeedCall(CeedVectorDestroy(&ones_l_vec));
2033 
2034   return CEED_ERROR_SUCCESS;
2035 }
2036 
2037 /**
2038   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2039 grid interpolation
2040 
2041   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2042 
2043   @param[in]  op_fine      Fine grid operator
2044   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2045   @param[in]  rstr_coarse  Coarse grid restriction
2046   @param[in]  basis_coarse Coarse grid active vector basis
2047   @param[out] op_coarse    Coarse grid operator
2048   @param[out] op_prolong   Coarse to fine operator, or NULL
2049   @param[out] op_restrict  Fine to coarse operator, or NULL
2050 
2051   @return An error code: 0 - success, otherwise - failure
2052 
2053   @ref User
2054 **/
2055 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2056                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2057   CeedCall(CeedOperatorCheckReady(op_fine));
2058 
2059   // Build prolongation matrix, if required
2060   CeedBasis basis_c_to_f = NULL;
2061   if (op_prolong || op_restrict) {
2062     CeedBasis basis_fine;
2063     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2064     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2065   }
2066 
2067   // Core code
2068   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2069 
2070   return CEED_ERROR_SUCCESS;
2071 }
2072 
2073 /**
2074   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2075 
2076   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2077 
2078   @param[in]  op_fine       Fine grid operator
2079   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2080   @param[in]  rstr_coarse   Coarse grid restriction
2081   @param[in]  basis_coarse  Coarse grid active vector basis
2082   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2083   @param[out] op_coarse     Coarse grid operator
2084   @param[out] op_prolong    Coarse to fine operator, or NULL
2085   @param[out] op_restrict   Fine to coarse operator, or NULL
2086 
2087   @return An error code: 0 - success, otherwise - failure
2088 
2089   @ref User
2090 **/
2091 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2092                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2093                                              CeedOperator *op_restrict) {
2094   CeedCall(CeedOperatorCheckReady(op_fine));
2095   Ceed ceed;
2096   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2097 
2098   // Check for compatible quadrature spaces
2099   CeedBasis basis_fine;
2100   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2101   CeedInt Q_f, Q_c;
2102   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2103   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2104   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2105 
2106   // Create coarse to fine basis, if required
2107   CeedBasis basis_c_to_f = NULL;
2108   if (op_prolong || op_restrict) {
2109     // Check if interpolation matrix is provided
2110     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2111               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2112     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2113     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2114     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2115     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2116     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2117     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2118     CeedScalar *q_ref, *q_weight, *grad;
2119     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2120     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2121     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2122     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2123     CeedCall(CeedFree(&q_ref));
2124     CeedCall(CeedFree(&q_weight));
2125     CeedCall(CeedFree(&grad));
2126   }
2127 
2128   // Core code
2129   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2130   return CEED_ERROR_SUCCESS;
2131 }
2132 
2133 /**
2134   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2135 
2136   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2137 
2138   @param[in]  op_fine       Fine grid operator
2139   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2140   @param[in]  rstr_coarse   Coarse grid restriction
2141   @param[in]  basis_coarse  Coarse grid active vector basis
2142   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2143   @param[out] op_coarse     Coarse grid operator
2144   @param[out] op_prolong    Coarse to fine operator, or NULL
2145   @param[out] op_restrict   Fine to coarse operator, or NULL
2146 
2147   @return An error code: 0 - success, otherwise - failure
2148 
2149   @ref User
2150 **/
2151 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2152                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2153                                        CeedOperator *op_restrict) {
2154   CeedCall(CeedOperatorCheckReady(op_fine));
2155   Ceed ceed;
2156   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2157 
2158   // Check for compatible quadrature spaces
2159   CeedBasis basis_fine;
2160   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2161   CeedInt Q_f, Q_c;
2162   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2163   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2164   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2165 
2166   // Coarse to fine basis
2167   CeedBasis basis_c_to_f = NULL;
2168   if (op_prolong || op_restrict) {
2169     // Check if interpolation matrix is provided
2170     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2171               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2172     CeedElemTopology topo;
2173     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2174     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2175     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2176     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2177     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2178     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2179     CeedScalar *q_ref, *q_weight, *grad;
2180     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2181     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2182     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2183     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2184     CeedCall(CeedFree(&q_ref));
2185     CeedCall(CeedFree(&q_weight));
2186     CeedCall(CeedFree(&grad));
2187   }
2188 
2189   // Core code
2190   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2191   return CEED_ERROR_SUCCESS;
2192 }
2193 
2194 /**
2195   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2196 
2197   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2198   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2199   The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2200 \hat S V\f$.
2201   The CeedOperator must be linear and non-composite.
2202   The associated CeedQFunction must therefore also be linear.
2203 
2204   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2205 
2206   @param[in]  op      CeedOperator to create element inverses
2207   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2208   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2209 
2210   @return An error code: 0 - success, otherwise - failure
2211 
2212   @ref User
2213 **/
2214 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2215   CeedCall(CeedOperatorCheckReady(op));
2216 
2217   if (op->CreateFDMElementInverse) {
2218     // Backend version
2219     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2220     return CEED_ERROR_SUCCESS;
2221   } else {
2222     // Operator fallback
2223     CeedOperator op_fallback;
2224 
2225     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2226     if (op_fallback) {
2227       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2228       return CEED_ERROR_SUCCESS;
2229     }
2230   }
2231 
2232   // Default interface implementation
2233   Ceed ceed, ceed_parent;
2234   CeedCall(CeedOperatorGetCeed(op, &ceed));
2235   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2236   ceed_parent = ceed_parent ? ceed_parent : ceed;
2237   CeedQFunction qf;
2238   CeedCall(CeedOperatorGetQFunction(op, &qf));
2239 
2240   // Determine active input basis
2241   bool                interp = false, grad = false;
2242   CeedBasis           basis = NULL;
2243   CeedElemRestriction rstr  = NULL;
2244   CeedOperatorField  *op_fields;
2245   CeedQFunctionField *qf_fields;
2246   CeedInt             num_input_fields;
2247   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2248   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2249   for (CeedInt i = 0; i < num_input_fields; i++) {
2250     CeedVector vec;
2251     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2252     if (vec == CEED_VECTOR_ACTIVE) {
2253       CeedEvalMode eval_mode;
2254       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2255       interp = interp || eval_mode == CEED_EVAL_INTERP;
2256       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2257       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2258       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2259     }
2260   }
2261   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2262   CeedSize l_size = 1;
2263   CeedInt  P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2264   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2265   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2266   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2267   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2268   CeedCall(CeedBasisGetDimension(basis, &dim));
2269   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2270   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2271   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2272 
2273   // Build and diagonalize 1D Mass and Laplacian
2274   bool is_tensor_basis;
2275   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2276   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2277   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2278   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2279   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2280   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2281   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2282   CeedCall(CeedCalloc(P_1d, &lambda));
2283   // -- Build matrices
2284   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2285   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2286   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2287   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2288   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2289 
2290   // -- Diagonalize
2291   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2292   CeedCall(CeedFree(&mass));
2293   CeedCall(CeedFree(&laplace));
2294   for (CeedInt i = 0; i < P_1d; i++) {
2295     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2296   }
2297   CeedCall(CeedFree(&x));
2298 
2299   // Assemble QFunction
2300   CeedVector          assembled = NULL;
2301   CeedElemRestriction rstr_qf   = NULL;
2302   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2303   CeedInt layout[3];
2304   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2305   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2306   CeedScalar max_norm = 0;
2307   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2308 
2309   // Calculate element averages
2310   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2311   CeedScalar       *elem_avg;
2312   const CeedScalar *assembled_array, *q_weight_array;
2313   CeedVector        q_weight;
2314   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2315   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2316   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2317   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2318   CeedCall(CeedCalloc(num_elem, &elem_avg));
2319   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2320   for (CeedInt e = 0; e < num_elem; e++) {
2321     CeedInt count = 0;
2322     for (CeedInt q = 0; q < num_qpts; q++) {
2323       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2324         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2325           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2326           count++;
2327         }
2328       }
2329     }
2330     if (count) {
2331       elem_avg[e] /= count;
2332     } else {
2333       elem_avg[e] = 1.0;
2334     }
2335   }
2336   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2337   CeedCall(CeedVectorDestroy(&assembled));
2338   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2339   CeedCall(CeedVectorDestroy(&q_weight));
2340 
2341   // Build FDM diagonal
2342   CeedVector  q_data;
2343   CeedScalar *q_data_array, *fdm_diagonal;
2344   CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2345   const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2346   for (CeedInt c = 0; c < num_comp; c++) {
2347     for (CeedInt n = 0; n < num_nodes; n++) {
2348       if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2349       if (grad) {
2350         for (CeedInt d = 0; d < dim; d++) {
2351           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2352           fdm_diagonal[c * num_nodes + n] += lambda[i];
2353         }
2354       }
2355       if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2356     }
2357   }
2358   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2359   CeedCall(CeedVectorSetValue(q_data, 0.0));
2360   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2361   for (CeedInt e = 0; e < num_elem; e++) {
2362     for (CeedInt c = 0; c < num_comp; c++) {
2363       for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2364     }
2365   }
2366   CeedCall(CeedFree(&elem_avg));
2367   CeedCall(CeedFree(&fdm_diagonal));
2368   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2369 
2370   // Setup FDM operator
2371   // -- Basis
2372   CeedBasis   fdm_basis;
2373   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2374   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2375   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2376   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2377   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2378   CeedCall(CeedFree(&fdm_interp));
2379   CeedCall(CeedFree(&grad_dummy));
2380   CeedCall(CeedFree(&q_ref_dummy));
2381   CeedCall(CeedFree(&q_weight_dummy));
2382   CeedCall(CeedFree(&lambda));
2383 
2384   // -- Restriction
2385   CeedElemRestriction rstr_qd_i;
2386   CeedInt             strides[3] = {1, num_nodes, num_nodes * num_comp};
2387   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2388   // -- QFunction
2389   CeedQFunction qf_fdm;
2390   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2391   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2392   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2393   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2394   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2395   // -- QFunction context
2396   CeedInt *num_comp_data;
2397   CeedCall(CeedCalloc(1, &num_comp_data));
2398   num_comp_data[0] = num_comp;
2399   CeedQFunctionContext ctx_fdm;
2400   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2401   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2402   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2403   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2404   // -- Operator
2405   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2406   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2407   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2408   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2409 
2410   // Cleanup
2411   CeedCall(CeedVectorDestroy(&q_data));
2412   CeedCall(CeedBasisDestroy(&fdm_basis));
2413   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2414   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2415 
2416   return CEED_ERROR_SUCCESS;
2417 }
2418 
2419 /// @}
2420