xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision 37eda346557d6c6a68044736ef6477e646c46425)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   char *source_path_with_name = NULL;
39 
40   // Check if NULL qf passed in
41   if (!qf) return CEED_ERROR_SUCCESS;
42 
43   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
44   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
45 
46   if (qf->source_path) {
47     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
48     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
49     memcpy(source_path_with_name, qf->source_path, path_len);
50     memcpy(&source_path_with_name[path_len], ":", 1);
51     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
52   } else {
53     CeedCall(CeedCalloc(1, &source_path_with_name));
54   }
55 
56   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
57   {
58     CeedQFunctionContext ctx;
59 
60     CeedCall(CeedQFunctionGetContext(qf, &ctx));
61     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
62   }
63   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
64     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
65   }
66   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
67     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
68   }
69   CeedCall(CeedFree(&source_path_with_name));
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   Ceed         ceed_fallback;
84   bool         is_composite;
85   CeedOperator op_fallback;
86 
87   // Check not already created
88   if (op->op_fallback) return CEED_ERROR_SUCCESS;
89 
90   // Fallback Ceed
91   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
92   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
93 
94   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
95   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
96 
97   // Clone Op
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114 
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
117     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
118     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
119     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
120       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
121                                     op->input_fields[i]->vec));
122     }
123     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
124       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
125                                     op->output_fields[i]->vec));
126     }
127     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
128     // Cleanup
129     CeedCall(CeedQFunctionDestroy(&qf_fallback));
130     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
131     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
132   }
133   CeedCall(CeedOperatorSetName(op_fallback, op->name));
134   CeedCall(CeedOperatorCheckReady(op_fallback));
135   // Note: No ref-counting here so we don't get caught in a reference loop.
136   //       The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback.
137   op->op_fallback                 = op_fallback;
138   op_fallback->op_fallback_parent = op;
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op));
155   if (op->op_fallback) {
156     bool is_debug;
157 
158     CeedCall(CeedIsDebug(op->ceed, &is_debug));
159     if (is_debug) {
160       Ceed        ceed, ceed_fallback;
161       const char *resource, *resource_fallback;
162 
163       CeedCall(CeedOperatorGetCeed(op, &ceed));
164       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
165       CeedCall(CeedGetResource(ceed, &resource));
166       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
167 
168       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
169       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
170                 op->op_fallback);
171     }
172   }
173   *op_fallback = op->op_fallback;
174   return CEED_ERROR_SUCCESS;
175 }
176 
177 /**
178   @brief Get the parent CeedOperator for a fallback CeedOperator
179 
180   @param[in]  op     CeedOperator context
181   @param[out] parent Variable to store parent CeedOperator context
182 
183   @return An error code: 0 - success, otherwise - failure
184 
185   @ref Developer
186 **/
187 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) {
188   *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL;
189   return CEED_ERROR_SUCCESS;
190 }
191 
192 /**
193   @brief Get the Ceed context of the parent CeedOperator for a fallback CeedOperator
194 
195   @param[in]  op     CeedOperator context
196   @param[out] parent Variable to store parent Ceed context
197 
198   @return An error code: 0 - success, otherwise - failure
199 
200   @ref Developer
201 **/
202 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) {
203   *parent = op->op_fallback_parent ? op->op_fallback_parent->ceed : op->ceed;
204   return CEED_ERROR_SUCCESS;
205 }
206 
207 /**
208   @brief Select correct basis matrix pointer based on CeedEvalMode
209 
210   @param[in]  basis     CeedBasis from which to get the basis matrix
211   @param[in]  eval_mode Current basis evaluation mode
212   @param[in]  identity  Pointer to identity matrix
213   @param[out] basis_ptr Basis pointer to set
214 
215   @ref Developer
216 **/
217 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
218   switch (eval_mode) {
219     case CEED_EVAL_NONE:
220       *basis_ptr = identity;
221       break;
222     case CEED_EVAL_INTERP:
223       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
224       break;
225     case CEED_EVAL_GRAD:
226       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
227       break;
228     case CEED_EVAL_DIV:
229       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
230       break;
231     case CEED_EVAL_CURL:
232       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
233       break;
234     case CEED_EVAL_WEIGHT:
235       break;  // Caught by QF Assembly
236   }
237   assert(*basis_ptr != NULL);
238   return CEED_ERROR_SUCCESS;
239 }
240 
241 /**
242   @brief Core logic for assembling operator diagonal or point block diagonal
243 
244   @param[in]  op             CeedOperator to assemble point block diagonal
245   @param[in]  request        Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
246   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
247   @param[out] assembled      CeedVector to store assembled diagonal
248 
249   @return An error code: 0 - success, otherwise - failure
250 
251   @ref Developer
252 **/
253 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_point_block, CeedVector assembled) {
254   Ceed ceed;
255   bool is_composite;
256 
257   CeedCall(CeedOperatorGetCeed(op, &ceed));
258   CeedCall(CeedOperatorIsComposite(op, &is_composite));
259   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
260 
261   // Assemble QFunction
262   CeedInt             layout_qf[3];
263   const CeedScalar   *assembled_qf_array;
264   CeedVector          assembled_qf        = NULL;
265   CeedElemRestriction assembled_elem_rstr = NULL;
266 
267   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
268   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf));
269   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
270   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
271 
272   // Get assembly data
273   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
274   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
275   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
276   CeedBasis               *active_bases_in, *active_bases_out;
277   CeedElemRestriction     *active_elem_rstrs_in, *active_elem_rstrs_out;
278   CeedOperatorAssemblyData data;
279 
280   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
281   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
282                                                 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out,
283                                                 &num_output_components));
284   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL));
285   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out));
286 
287   // Loop over all active bases (find matching input/output pairs)
288   for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) {
289     CeedInt             b_in, b_out, num_elem, num_nodes, num_qpts, num_comp;
290     bool                has_eval_none = false;
291     CeedScalar         *elem_diag_array, *identity = NULL;
292     CeedVector          elem_diag;
293     CeedElemRestriction diag_elem_rstr;
294 
295     if (num_active_bases_in <= num_active_bases_out) {
296       b_in = b;
297       for (b_out = 0; b_out < num_active_bases_out; b_out++) {
298         if (active_bases_in[b_in] == active_bases_out[b_out]) {
299           break;
300         }
301       }
302       if (b_out == num_active_bases_out) {
303         continue;
304       }  // No matching output basis found
305     } else {
306       b_out = b;
307       for (b_in = 0; b_in < num_active_bases_in; b_in++) {
308         if (active_bases_in[b_in] == active_bases_out[b_out]) {
309           break;
310         }
311       }
312       if (b_in == num_active_bases_in) {
313         continue;
314       }  // No matching output basis found
315     }
316     CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], ceed, CEED_ERROR_UNSUPPORTED,
317               "Cannot assemble operator diagonal with different input and output active element restrictions");
318 
319     // Assemble point block diagonal restriction, if needed
320     if (is_point_block) {
321       CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr));
322     } else {
323       CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr));
324     }
325 
326     // Create diagonal vector
327     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
328 
329     // Assemble element operator diagonals
330     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
331     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
332     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
333     CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes));
334     CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp));
335     if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes;
336     else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts));
337 
338     // Construct identity matrix for basis if required
339     for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) {
340       has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE);
341     }
342     for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) {
343       has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE);
344     }
345     if (has_eval_none) {
346       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
347       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
348     }
349 
350     // Compute the diagonal of B^T D B
351     // Each element
352     for (CeedSize e = 0; e < num_elem; e++) {
353       // Each basis eval mode pair
354       CeedInt      d_out              = 0, q_comp_out;
355       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
356 
357       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) {
358         CeedInt           d_in              = 0, q_comp_in;
359         const CeedScalar *B_t               = NULL;
360         CeedEvalMode      eval_mode_in_prev = CEED_EVAL_NONE;
361 
362         CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t));
363         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out));
364         if (q_comp_out > 1) {
365           if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0;
366           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
367         }
368         eval_mode_out_prev = eval_modes_out[b_out][e_out];
369 
370         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) {
371           const CeedScalar *B = NULL;
372 
373           CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B));
374           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in));
375           if (q_comp_in > 1) {
376             if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0;
377             else B = &B[(++d_in) * num_qpts * num_nodes];
378           }
379           eval_mode_in_prev = eval_modes_in[b_in][e_in];
380 
381           // Each component
382           for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
383             // Each qpt/node pair
384             for (CeedInt q = 0; q < num_qpts; q++) {
385               if (is_point_block) {
386                 // Point Block Diagonal
387                 for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
388                   const CeedSize c_offset =
389                       (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
390                   const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
391 
392                   for (CeedInt n = 0; n < num_nodes; n++) {
393                     elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
394                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
395                   }
396                 }
397               } else {
398                 // Diagonal Only
399                 const CeedInt c_offset =
400                     (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
401                 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
402 
403                 for (CeedInt n = 0; n < num_nodes; n++) {
404                   elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
405                 }
406               }
407             }
408           }
409         }
410       }
411     }
412     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
413 
414     // Assemble local operator diagonal
415     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
416 
417     // Cleanup
418     CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
419     CeedCall(CeedVectorDestroy(&elem_diag));
420     CeedCall(CeedFree(&identity));
421   }
422   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
423   CeedCall(CeedVectorDestroy(&assembled_qf));
424   return CEED_ERROR_SUCCESS;
425 }
426 
427 /**
428   @brief Core logic for assembling composite operator diagonal
429 
430   @param[in]  op             CeedOperator to assemble point block diagonal
431   @param[in]  request        Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
432   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
433   @param[out] assembled      CeedVector to store assembled diagonal
434 
435   @return An error code: 0 - success, otherwise - failure
436 
437   @ref Developer
438 **/
439 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block,
440                                                                  CeedVector assembled) {
441   CeedInt       num_sub;
442   CeedOperator *suboperators;
443 
444   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
445   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
446   for (CeedInt i = 0; i < num_sub; i++) {
447     if (is_point_block) {
448       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
449     } else {
450       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
451     }
452   }
453   return CEED_ERROR_SUCCESS;
454 }
455 
456 /**
457   @brief Build nonzero pattern for non-composite operator
458 
459   Users should generally use CeedOperatorLinearAssembleSymbolic()
460 
461   @param[in]  op     CeedOperator to assemble nonzero pattern
462   @param[in]  offset Offset for number of entries
463   @param[out] rows   Row number for each entry
464   @param[out] cols   Column number for each entry
465 
466   @return An error code: 0 - success, otherwise - failure
467 
468   @ref Developer
469 **/
470 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
471   Ceed                ceed;
472   bool                is_composite;
473   CeedSize            num_nodes_in, num_nodes_out, count = 0;
474   CeedInt             num_elem_in, elem_size_in, num_comp_in, layout_er_in[3];
475   CeedInt             num_elem_out, elem_size_out, num_comp_out, layout_er_out[3], local_num_entries;
476   CeedScalar         *array;
477   const CeedScalar   *elem_dof_a_in, *elem_dof_a_out;
478   CeedVector          index_vec_in, index_vec_out, elem_dof_in, elem_dof_out;
479   CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out;
480 
481   CeedCall(CeedOperatorGetCeed(op, &ceed));
482   CeedCall(CeedOperatorIsComposite(op, &is_composite));
483   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
484 
485   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out));
486   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
487   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
488   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
489   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
490   CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, &layout_er_in));
491 
492   // Determine elem_dof relation for input
493   CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in));
494   CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array));
495   for (CeedInt i = 0; i < num_nodes_in; i++) array[i] = i;
496   CeedCall(CeedVectorRestoreArray(index_vec_in, &array));
497   CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in));
498   CeedCall(CeedVectorSetValue(elem_dof_in, 0.0));
499   CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in));
500   CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE));
501   CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in));
502   CeedCall(CeedVectorDestroy(&index_vec_in));
503   CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in));
504 
505   if (elem_rstr_in != elem_rstr_out) {
506     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
507     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
508               "Active input and output operator restrictions must have the same number of elements");
509     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
510     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
511     CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, &layout_er_out));
512 
513     // Determine elem_dof relation for output
514     CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out));
515     CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array));
516     for (CeedInt i = 0; i < num_nodes_out; i++) array[i] = i;
517     CeedCall(CeedVectorRestoreArray(index_vec_out, &array));
518     CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out));
519     CeedCall(CeedVectorSetValue(elem_dof_out, 0.0));
520     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out));
521     CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE));
522     CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out));
523     CeedCall(CeedVectorDestroy(&index_vec_out));
524     CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out));
525   } else {
526     num_elem_out     = num_elem_in;
527     elem_size_out    = elem_size_in;
528     num_comp_out     = num_comp_in;
529     layout_er_out[0] = layout_er_in[0];
530     layout_er_out[1] = layout_er_in[1];
531     layout_er_out[2] = layout_er_in[2];
532     elem_dof_a_out   = elem_dof_a_in;
533   }
534   local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
535 
536   // Determine i, j locations for element matrices
537   for (CeedInt e = 0; e < num_elem_in; e++) {
538     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
539       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
540         for (CeedInt i = 0; i < elem_size_out; i++) {
541           for (CeedInt j = 0; j < elem_size_in; j++) {
542             const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2];
543             const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2];
544             const CeedInt row                = elem_dof_a_out[elem_dof_index_row];
545             const CeedInt col                = elem_dof_a_in[elem_dof_index_col];
546 
547             rows[offset + count] = row;
548             cols[offset + count] = col;
549             count++;
550           }
551         }
552       }
553     }
554   }
555   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
556   CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in));
557   CeedCall(CeedVectorDestroy(&elem_dof_in));
558   if (elem_rstr_in != elem_rstr_out) {
559     CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out));
560     CeedCall(CeedVectorDestroy(&elem_dof_out));
561   }
562   return CEED_ERROR_SUCCESS;
563 }
564 
565 /**
566   @brief Assemble nonzero entries for non-composite operator
567 
568   Users should generally use CeedOperatorLinearAssemble()
569 
570   @param[in]  op     CeedOperator to assemble
571   @param[in]  offset Offset for number of entries
572   @param[out] values Values to assemble into matrix
573 
574   @return An error code: 0 - success, otherwise - failure
575 
576   @ref Developer
577 **/
578 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
579   Ceed ceed;
580   bool is_composite;
581 
582   CeedCall(CeedOperatorGetCeed(op, &ceed));
583   CeedCall(CeedOperatorIsComposite(op, &is_composite));
584   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
585 
586   // Early exit for empty operator
587   {
588     CeedInt num_elem = 0;
589 
590     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
591     if (num_elem == 0) return CEED_ERROR_SUCCESS;
592   }
593 
594   if (op->LinearAssembleSingle) {
595     // Backend version
596     CeedCall(op->LinearAssembleSingle(op, offset, values));
597     return CEED_ERROR_SUCCESS;
598   } else {
599     // Operator fallback
600     CeedOperator op_fallback;
601 
602     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
603     if (op_fallback) {
604       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
605       return CEED_ERROR_SUCCESS;
606     }
607   }
608 
609   // Assemble QFunction
610   CeedInt             layout_qf[3];
611   const CeedScalar   *assembled_qf_array;
612   CeedVector          assembled_qf        = NULL;
613   CeedElemRestriction assembled_elem_rstr = NULL;
614 
615   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE));
616   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf));
617   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
618   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
619 
620   // Get assembly data
621   CeedInt                  num_elem_in, elem_size_in, num_comp_in, num_qpts_in;
622   CeedInt                  num_elem_out, elem_size_out, num_comp_out, num_qpts_out, local_num_entries;
623   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
624   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
625   CeedBasis               *active_bases_in, *active_bases_out, basis_in, basis_out;
626   const CeedScalar       **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out;
627   CeedElemRestriction      elem_rstr_in, elem_rstr_out;
628   CeedRestrictionType      elem_rstr_type_in, elem_rstr_type_out;
629   const bool              *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL;
630   const CeedInt8          *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL;
631   CeedOperatorAssemblyData data;
632 
633   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
634   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out,
635                                                 &num_eval_modes_out, &eval_modes_out, NULL, NULL));
636 
637   CeedCheck(num_active_bases_in == num_active_bases_out && num_active_bases_in == 1, ceed, CEED_ERROR_UNSUPPORTED,
638             "Cannot assemble operator with multiple active bases");
639   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs");
640 
641   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out));
642   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
643   basis_in  = active_bases_in[0];
644   basis_out = active_bases_out[0];
645   B_mat_in  = B_mats_in[0];
646   B_mat_out = B_mats_out[0];
647 
648   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
649   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
650   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
651   if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in;
652   else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in));
653 
654   CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in));
655   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
656     CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in));
657   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
658     CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in));
659   }
660 
661   if (elem_rstr_in != elem_rstr_out) {
662     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
663     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
664               "Active input and output operator restrictions must have the same number of elements");
665     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
666     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
667     if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out;
668     else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out));
669     CeedCheck(num_qpts_in == num_qpts_out, ceed, CEED_ERROR_UNSUPPORTED,
670               "Active input and output bases must have the same number of quadrature points");
671 
672     CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out));
673     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
674       CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out));
675     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
676       CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out));
677     }
678   } else {
679     num_elem_out  = num_elem_in;
680     elem_size_out = elem_size_in;
681     num_comp_out  = num_comp_in;
682     num_qpts_out  = num_qpts_in;
683 
684     elem_rstr_orients_out      = elem_rstr_orients_in;
685     elem_rstr_curl_orients_out = elem_rstr_curl_orients_in;
686   }
687   local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
688 
689   // Loop over elements and put in data structure
690   // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
691   CeedSize    count = 0;
692   CeedScalar *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL;
693 
694   CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat));
695   CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat));
696   if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b));
697 
698   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
699   for (CeedSize e = 0; e < num_elem_in; e++) {
700     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
701       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
702         // Compute B^T*D
703         for (CeedSize n = 0; n < elem_size_out; n++) {
704           for (CeedSize q = 0; q < num_qpts_in; q++) {
705             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
706               const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in;
707               CeedScalar     sum       = 0.0;
708 
709               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
710                 const CeedSize b_out_index     = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n;
711                 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out;
712                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
713 
714                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
715               }
716               BTD_mat[btd_index] = sum;
717             }
718           }
719         }
720 
721         // Form element matrix itself (for each block component)
722         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size_out, elem_size_in, num_qpts_in * num_eval_modes_in[0]));
723 
724         // Transform the element matrix if required
725         if (elem_rstr_orients_out) {
726           const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out];
727 
728           for (CeedInt i = 0; i < elem_size_out; i++) {
729             const double orient = elem_orients[i] ? -1.0 : 1.0;
730 
731             for (CeedInt j = 0; j < elem_size_in; j++) {
732               elem_mat[i * elem_size_in + j] *= orient;
733             }
734           }
735         } else if (elem_rstr_curl_orients_out) {
736           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out];
737 
738           // T^T*(B^T*D*B)
739           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
740           for (CeedInt i = 0; i < elem_size_out; i++) {
741             for (CeedInt j = 0; j < elem_size_in; j++) {
742               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] +
743                                                (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) +
744                                                (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0);
745             }
746           }
747         }
748         if (elem_rstr_orients_in) {
749           const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in];
750 
751           for (CeedInt i = 0; i < elem_size_out; i++) {
752             for (CeedInt j = 0; j < elem_size_in; j++) {
753               elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0;
754             }
755           }
756         } else if (elem_rstr_curl_orients_in) {
757           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in];
758 
759           // (B^T*D*B)*T
760           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
761           for (CeedInt i = 0; i < elem_size_out; i++) {
762             for (CeedInt j = 0; j < elem_size_in; j++) {
763               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] +
764                                                (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) +
765                                                (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0);
766             }
767           }
768         }
769 
770         // Put element matrix in coordinate data structure
771         for (CeedInt i = 0; i < elem_size_out; i++) {
772           for (CeedInt j = 0; j < elem_size_in; j++) {
773             vals[offset + count] = elem_mat[i * elem_size_in + j];
774             count++;
775           }
776         }
777       }
778     }
779   }
780   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries");
781   CeedCall(CeedVectorRestoreArray(values, &vals));
782 
783   // Cleanup
784   CeedCall(CeedFree(&BTD_mat));
785   CeedCall(CeedFree(&elem_mat));
786   CeedCall(CeedFree(&elem_mat_b));
787   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
788     CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in));
789   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
790     CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in));
791   }
792   if (elem_rstr_in != elem_rstr_out) {
793     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
794       CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out));
795     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
796       CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out));
797     }
798   }
799   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
800   CeedCall(CeedVectorDestroy(&assembled_qf));
801   return CEED_ERROR_SUCCESS;
802 }
803 
804 /**
805   @brief Count number of entries for assembled CeedOperator
806 
807   @param[in]  op          CeedOperator to assemble
808   @param[out] num_entries Number of entries in assembled representation
809 
810   @return An error code: 0 - success, otherwise - failure
811 
812   @ref Utility
813 **/
814 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) {
815   bool                is_composite;
816   CeedInt             num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out;
817   CeedElemRestriction rstr_in, rstr_out;
818 
819   CeedCall(CeedOperatorIsComposite(op, &is_composite));
820   CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
821 
822   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out));
823   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in));
824   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in));
825   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
826   if (rstr_in != rstr_out) {
827     CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out));
828     CeedCheck(num_elem_in == num_elem_out, op->ceed, CEED_ERROR_UNSUPPORTED,
829               "Active input and output operator restrictions must have the same number of elements");
830     CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out));
831     CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
832   } else {
833     num_elem_out  = num_elem_in;
834     elem_size_out = elem_size_in;
835     num_comp_out  = num_comp_in;
836   }
837   *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in;
838   return CEED_ERROR_SUCCESS;
839 }
840 
841 /**
842   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
843 
844   @param[in]  op_fine      Fine grid operator
845   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
846   @param[in]  rstr_coarse  Coarse grid restriction
847   @param[in]  basis_coarse Coarse grid active vector basis
848   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
849   @param[out] op_coarse    Coarse grid operator
850   @param[out] op_prolong   Coarse to fine operator, or NULL
851   @param[out] op_restrict  Fine to coarse operator, or NULL
852 
853   @return An error code: 0 - success, otherwise - failure
854 
855   @ref Developer
856 **/
857 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
858                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
859   bool                is_composite;
860   Ceed                ceed;
861   CeedInt             num_comp;
862   CeedVector          mult_vec         = NULL;
863   CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL;
864 
865   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
866 
867   // Check for composite operator
868   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
869   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
870 
871   // Coarse Grid
872   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
873   // -- Clone input fields
874   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
875     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
876       rstr_fine = op_fine->input_fields[i]->elem_rstr;
877       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
878     } else {
879       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
880                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
881     }
882   }
883   // -- Clone output fields
884   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
885     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
886       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
887     } else {
888       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
889                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
890     }
891   }
892   // -- Clone QFunctionAssemblyData
893   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
894 
895   // Multiplicity vector
896   if (op_restrict || op_prolong) {
897     CeedVector          mult_e_vec;
898     CeedRestrictionType rstr_type;
899 
900     CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type));
901     CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED,
902               "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported");
903     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
904     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
905     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
906     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
907     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
908     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
909     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
910     CeedCall(CeedVectorDestroy(&mult_e_vec));
911     CeedCall(CeedVectorReciprocal(mult_vec));
912   }
913 
914   // Clone name
915   bool   has_name = op_fine->name;
916   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
917   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
918 
919   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
920   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
921             "Prolongation or restriction operator creation requires coarse-to-fine basis");
922 
923   // Restriction/Prolongation Operators
924   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
925 
926   // Restriction
927   if (op_restrict) {
928     CeedInt             *num_comp_r_data;
929     CeedQFunctionContext ctx_r;
930     CeedQFunction        qf_restrict;
931 
932     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
933     CeedCall(CeedCalloc(1, &num_comp_r_data));
934     num_comp_r_data[0] = num_comp;
935     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
936     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
937     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
938     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
939     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
940     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
941     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
942     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
943 
944     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
945     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
946     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
947     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
948 
949     // Set name
950     char *restriction_name;
951 
952     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
953     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
954     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
955     CeedCall(CeedFree(&restriction_name));
956 
957     // Check
958     CeedCall(CeedOperatorCheckReady(*op_restrict));
959 
960     // Cleanup
961     CeedCall(CeedQFunctionDestroy(&qf_restrict));
962   }
963 
964   // Prolongation
965   if (op_prolong) {
966     CeedInt             *num_comp_p_data;
967     CeedQFunctionContext ctx_p;
968     CeedQFunction        qf_prolong;
969 
970     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
971     CeedCall(CeedCalloc(1, &num_comp_p_data));
972     num_comp_p_data[0] = num_comp;
973     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
974     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
975     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
976     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
977     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
978     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
979     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
980     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
981 
982     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
983     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
984     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
985     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
986 
987     // Set name
988     char *prolongation_name;
989 
990     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
991     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
992     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
993     CeedCall(CeedFree(&prolongation_name));
994 
995     // Check
996     CeedCall(CeedOperatorCheckReady(*op_prolong));
997 
998     // Cleanup
999     CeedCall(CeedQFunctionDestroy(&qf_prolong));
1000   }
1001 
1002   // Check
1003   CeedCall(CeedOperatorCheckReady(*op_coarse));
1004 
1005   // Cleanup
1006   CeedCall(CeedVectorDestroy(&mult_vec));
1007   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
1008   CeedCall(CeedBasisDestroy(&basis_c_to_f));
1009   return CEED_ERROR_SUCCESS;
1010 }
1011 
1012 /**
1013   @brief Build 1D mass matrix and Laplacian with perturbation
1014 
1015   @param[in]  interp_1d   Interpolation matrix in one dimension
1016   @param[in]  grad_1d     Gradient matrix in one dimension
1017   @param[in]  q_weight_1d Quadrature weights in one dimension
1018   @param[in]  P_1d        Number of basis nodes in one dimension
1019   @param[in]  Q_1d        Number of quadrature points in one dimension
1020   @param[in]  dim         Dimension of basis
1021   @param[out] mass        Assembled mass matrix in one dimension
1022   @param[out] laplace     Assembled perturbed Laplacian in one dimension
1023 
1024   @return An error code: 0 - success, otherwise - failure
1025 
1026   @ref Developer
1027 **/
1028 CeedPragmaOptimizeOff
1029 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
1030                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
1031   for (CeedInt i = 0; i < P_1d; i++) {
1032     for (CeedInt j = 0; j < P_1d; j++) {
1033       CeedScalar sum = 0.0;
1034       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
1035       mass[i + j * P_1d] = sum;
1036     }
1037   }
1038   // -- Laplacian
1039   for (CeedInt i = 0; i < P_1d; i++) {
1040     for (CeedInt j = 0; j < P_1d; j++) {
1041       CeedScalar sum = 0.0;
1042 
1043       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
1044       laplace[i + j * P_1d] = sum;
1045     }
1046   }
1047   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
1048   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
1049   return CEED_ERROR_SUCCESS;
1050 }
1051 CeedPragmaOptimizeOn
1052 
1053 /// @}
1054 
1055 /// ----------------------------------------------------------------------------
1056 /// CeedOperator Backend API
1057 /// ----------------------------------------------------------------------------
1058 /// @addtogroup CeedOperatorBackend
1059 /// @{
1060 
1061 /**
1062   @brief Create point block restriction for active operator field
1063 
1064   @param[in]  rstr             Original CeedElemRestriction for active field
1065   @param[out] point_block_rstr Address of the variable where the newly created CeedElemRestriction will be stored
1066 
1067   @return An error code: 0 - success, otherwise - failure
1068 
1069   @ref Backend
1070 **/
1071 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) {
1072   Ceed           ceed;
1073   CeedInt        num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets;
1074   CeedSize       l_size;
1075   const CeedInt *offsets;
1076 
1077   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
1078   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
1079 
1080   // Expand offsets
1081   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
1082   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
1083   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
1084   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
1085   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
1086   shift = num_comp;
1087   if (comp_stride != 1) shift *= num_comp;
1088   CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets));
1089   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
1090     point_block_offsets[i] = offsets[i] * shift;
1091   }
1092 
1093   // Create new restriction
1094   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
1095                                      point_block_offsets, point_block_rstr));
1096 
1097   // Cleanup
1098   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
1099   return CEED_ERROR_SUCCESS;
1100 }
1101 
1102 /**
1103   @brief Create object holding CeedQFunction assembly data for CeedOperator
1104 
1105   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
1106   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
1107 
1108   @return An error code: 0 - success, otherwise - failure
1109 
1110   @ref Backend
1111 **/
1112 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
1113   CeedCall(CeedCalloc(1, data));
1114   (*data)->ref_count = 1;
1115   (*data)->ceed      = ceed;
1116   CeedCall(CeedReference(ceed));
1117   return CEED_ERROR_SUCCESS;
1118 }
1119 
1120 /**
1121   @brief Increment the reference counter for a CeedQFunctionAssemblyData
1122 
1123   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
1124 
1125   @return An error code: 0 - success, otherwise - failure
1126 
1127   @ref Backend
1128 **/
1129 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1130   data->ref_count++;
1131   return CEED_ERROR_SUCCESS;
1132 }
1133 
1134 /**
1135   @brief Set re-use of CeedQFunctionAssemblyData
1136 
1137   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
1138   @param[in]     reuse_data Boolean flag indicating data re-use
1139 
1140   @return An error code: 0 - success, otherwise - failure
1141 
1142   @ref Backend
1143 **/
1144 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
1145   data->reuse_data        = reuse_data;
1146   data->needs_data_update = true;
1147   return CEED_ERROR_SUCCESS;
1148 }
1149 
1150 /**
1151   @brief Mark QFunctionAssemblyData as stale
1152 
1153   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
1154   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
1155 
1156   @return An error code: 0 - success, otherwise - failure
1157 
1158   @ref Backend
1159 **/
1160 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
1161   data->needs_data_update = needs_data_update;
1162   return CEED_ERROR_SUCCESS;
1163 }
1164 
1165 /**
1166   @brief Determine if QFunctionAssemblyData needs update
1167 
1168   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
1169   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1170 
1171   @return An error code: 0 - success, otherwise - failure
1172 
1173   @ref Backend
1174 **/
1175 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1176   *is_update_needed = !data->reuse_data || data->needs_data_update;
1177   return CEED_ERROR_SUCCESS;
1178 }
1179 
1180 /**
1181   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1182 
1183   Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1184 
1185   Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
1186         CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
1187         CeedQFunctionAssemblyData.
1188 
1189   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1190   @param[in,out] data_copy Variable to store copied reference
1191 
1192   @return An error code: 0 - success, otherwise - failure
1193 
1194   @ref Backend
1195 **/
1196 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1197   CeedCall(CeedQFunctionAssemblyDataReference(data));
1198   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1199   *data_copy = data;
1200   return CEED_ERROR_SUCCESS;
1201 }
1202 
1203 /**
1204   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1205 
1206   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1207   @param[out] is_setup Boolean flag for setup status
1208 
1209   @return An error code: 0 - success, otherwise - failure
1210 
1211   @ref Backend
1212 **/
1213 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1214   *is_setup = data->is_setup;
1215   return CEED_ERROR_SUCCESS;
1216 }
1217 
1218 /**
1219   @brief Set internal objects for CeedQFunctionAssemblyData
1220 
1221   @param[in,out] data CeedQFunctionAssemblyData to set objects
1222   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1223   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1224 
1225   @return An error code: 0 - success, otherwise - failure
1226 
1227   @ref Backend
1228 **/
1229 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1230   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1231   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1232 
1233   data->is_setup = true;
1234   return CEED_ERROR_SUCCESS;
1235 }
1236 
1237 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1238   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1239 
1240   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1241   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1242   return CEED_ERROR_SUCCESS;
1243 }
1244 
1245 /**
1246   @brief Destroy CeedQFunctionAssemblyData
1247 
1248   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1249 
1250   @return An error code: 0 - success, otherwise - failure
1251 
1252   @ref Backend
1253 **/
1254 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1255   if (!*data || --(*data)->ref_count > 0) {
1256     *data = NULL;
1257     return CEED_ERROR_SUCCESS;
1258   }
1259   CeedCall(CeedDestroy(&(*data)->ceed));
1260   CeedCall(CeedVectorDestroy(&(*data)->vec));
1261   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1262 
1263   CeedCall(CeedFree(data));
1264   return CEED_ERROR_SUCCESS;
1265 }
1266 
1267 /**
1268   @brief Get CeedOperatorAssemblyData
1269 
1270   @param[in]  op   CeedOperator to assemble
1271   @param[out] data CeedQFunctionAssemblyData
1272 
1273   @return An error code: 0 - success, otherwise - failure
1274 
1275   @ref Backend
1276 **/
1277 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1278   if (!op->op_assembled) {
1279     CeedOperatorAssemblyData data;
1280 
1281     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1282     op->op_assembled = data;
1283   }
1284   *data = op->op_assembled;
1285   return CEED_ERROR_SUCCESS;
1286 }
1287 
1288 /**
1289   @brief Create object holding CeedOperator assembly data.
1290 
1291   The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1292   An array with references to the corresponding active CeedElemRestrictions is also stored.
1293   For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1294   The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1295 CeedEvalMode.
1296   The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1297   Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1298 
1299   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1300   @param[in]  op   CeedOperator to be assembled
1301   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1302 
1303   @return An error code: 0 - success, otherwise - failure
1304 
1305   @ref Backend
1306 **/
1307 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1308   CeedInt             num_active_bases_in = 0, num_active_bases_out = 0, offset = 0;
1309   CeedInt             num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL;
1310   CeedSize          **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1311   CeedEvalMode      **eval_modes_in = NULL, **eval_modes_out = NULL;
1312   CeedQFunctionField *qf_fields;
1313   CeedQFunction       qf;
1314   CeedOperatorField  *op_fields;
1315   bool                is_composite;
1316 
1317   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1318   CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators.");
1319 
1320   // Allocate
1321   CeedCall(CeedCalloc(1, data));
1322   (*data)->ceed = ceed;
1323   CeedCall(CeedReference(ceed));
1324 
1325   // Build OperatorAssembly data
1326   CeedCall(CeedOperatorGetQFunction(op, &qf));
1327   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1328   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1329 
1330   // Determine active input basis
1331   for (CeedInt i = 0; i < num_input_fields; i++) {
1332     CeedVector vec;
1333 
1334     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1335     if (vec == CEED_VECTOR_ACTIVE) {
1336       CeedInt      index = -1, num_comp, q_comp;
1337       CeedEvalMode eval_mode;
1338       CeedBasis    basis_in = NULL;
1339 
1340       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1341       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1342       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1343       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1344       for (CeedInt i = 0; i < num_active_bases_in; i++) {
1345         if ((*data)->active_bases_in[i] == basis_in) index = i;
1346       }
1347       if (index == -1) {
1348         CeedElemRestriction elem_rstr_in;
1349 
1350         index = num_active_bases_in;
1351         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in));
1352         (*data)->active_bases_in[num_active_bases_in] = NULL;
1353         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in]));
1354         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in));
1355         (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL;
1356         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1357         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in]));
1358         CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in));
1359         num_eval_modes_in[index] = 0;
1360         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in));
1361         eval_modes_in[index] = NULL;
1362         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in));
1363         eval_mode_offsets_in[index] = NULL;
1364         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in));
1365         (*data)->assembled_bases_in[index] = NULL;
1366         num_active_bases_in++;
1367       }
1368       if (eval_mode != CEED_EVAL_WEIGHT) {
1369         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1370         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1371         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1372         for (CeedInt d = 0; d < q_comp; d++) {
1373           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1374           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1375           offset += num_comp;
1376         }
1377         num_eval_modes_in[index] += q_comp;
1378       }
1379     }
1380   }
1381 
1382   // Determine active output basis
1383   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1384   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1385   offset = 0;
1386   for (CeedInt i = 0; i < num_output_fields; i++) {
1387     CeedVector vec;
1388 
1389     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1390     if (vec == CEED_VECTOR_ACTIVE) {
1391       CeedInt      index = -1, num_comp, q_comp;
1392       CeedEvalMode eval_mode;
1393       CeedBasis    basis_out = NULL;
1394 
1395       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1396       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1397       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1398       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1399       for (CeedInt i = 0; i < num_active_bases_out; i++) {
1400         if ((*data)->active_bases_out[i] == basis_out) index = i;
1401       }
1402       if (index == -1) {
1403         CeedElemRestriction elem_rstr_out;
1404 
1405         index = num_active_bases_out;
1406         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out));
1407         (*data)->active_bases_out[num_active_bases_out] = NULL;
1408         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out]));
1409         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out));
1410         (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL;
1411         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1412         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out]));
1413         CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out));
1414         num_eval_modes_out[index] = 0;
1415         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out));
1416         eval_modes_out[index] = NULL;
1417         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out));
1418         eval_mode_offsets_out[index] = NULL;
1419         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out));
1420         (*data)->assembled_bases_out[index] = NULL;
1421         num_active_bases_out++;
1422       }
1423       if (eval_mode != CEED_EVAL_WEIGHT) {
1424         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1425         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1426         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1427         for (CeedInt d = 0; d < q_comp; d++) {
1428           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1429           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1430           offset += num_comp;
1431         }
1432         num_eval_modes_out[index] += q_comp;
1433       }
1434     }
1435   }
1436   (*data)->num_active_bases_in   = num_active_bases_in;
1437   (*data)->num_eval_modes_in     = num_eval_modes_in;
1438   (*data)->eval_modes_in         = eval_modes_in;
1439   (*data)->eval_mode_offsets_in  = eval_mode_offsets_in;
1440   (*data)->num_active_bases_out  = num_active_bases_out;
1441   (*data)->num_eval_modes_out    = num_eval_modes_out;
1442   (*data)->eval_modes_out        = eval_modes_out;
1443   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1444   (*data)->num_output_components = offset;
1445   return CEED_ERROR_SUCCESS;
1446 }
1447 
1448 /**
1449   @brief Get CeedOperator CeedEvalModes for assembly.
1450 
1451   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1452 
1453   @param[in]  data                  CeedOperatorAssemblyData
1454   @param[out] num_active_bases_in   Total number of active bases for input
1455   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1456                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1457   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1458   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1459   @param[out] num_active_bases_out  Total number of active bases for output
1460   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1461   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1462   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1463   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1464                                       including contributions of all active bases
1465 
1466   @return An error code: 0 - success, otherwise - failure
1467 
1468   @ref Backend
1469 **/
1470 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in,
1471                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out,
1472                                          CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out,
1473                                          CeedSize *num_output_components) {
1474   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1475   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1476   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1477   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1478   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1479   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1480   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1481   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1482   if (num_output_components) *num_output_components = data->num_output_components;
1483   return CEED_ERROR_SUCCESS;
1484 }
1485 
1486 /**
1487   @brief Get CeedOperator CeedBasis data for assembly.
1488 
1489   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1490 
1491   @param[in]  data                 CeedOperatorAssemblyData
1492   @param[out] num_active_bases_in  Number of active input bases, or NULL
1493   @param[out] active_bases_in      Pointer to hold active input CeedBasis, or NULL
1494   @param[out] assembled_bases_in   Pointer to hold assembled active input B, or NULL
1495   @param[out] num_active_bases_out Number of active output bases, or NULL
1496   @param[out] active_bases_out     Pointer to hold active output CeedBasis, or NULL
1497   @param[out] assembled_bases_out  Pointer to hold assembled active output B, or NULL
1498 
1499   @return An error code: 0 - success, otherwise - failure
1500 
1501   @ref Backend
1502 **/
1503 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in,
1504                                      const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out,
1505                                      const CeedScalar ***assembled_bases_out) {
1506   // Assemble B_in, B_out if needed
1507   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1508     CeedInt num_qpts;
1509 
1510     if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts));
1511     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts));
1512     for (CeedInt b = 0; b < data->num_active_bases_in; b++) {
1513       bool        has_eval_none = false;
1514       CeedInt     num_nodes;
1515       CeedScalar *B_in = NULL, *identity = NULL;
1516 
1517       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes));
1518       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1519 
1520       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1521         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1522       }
1523       if (has_eval_none) {
1524         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1525         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1526           identity[i * num_nodes + i] = 1.0;
1527         }
1528       }
1529 
1530       for (CeedInt q = 0; q < num_qpts; q++) {
1531         for (CeedInt n = 0; n < num_nodes; n++) {
1532           CeedInt      d_in              = 0, q_comp_in;
1533           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1534 
1535           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1536             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1537             const CeedScalar *B  = NULL;
1538 
1539             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B));
1540             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in));
1541             if (q_comp_in > 1) {
1542               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1543               else B = &B[(++d_in) * num_qpts * num_nodes];
1544             }
1545             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1546             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1547           }
1548         }
1549       }
1550       if (identity) CeedCall(CeedFree(&identity));
1551       data->assembled_bases_in[b] = B_in;
1552     }
1553   }
1554 
1555   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1556     CeedInt num_qpts;
1557 
1558     if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts));
1559     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts));
1560     for (CeedInt b = 0; b < data->num_active_bases_out; b++) {
1561       bool        has_eval_none = false;
1562       CeedInt     num_nodes;
1563       CeedScalar *B_out = NULL, *identity = NULL;
1564 
1565       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes));
1566       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1567 
1568       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1569         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1570       }
1571       if (has_eval_none) {
1572         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1573         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1574           identity[i * num_nodes + i] = 1.0;
1575         }
1576       }
1577 
1578       for (CeedInt q = 0; q < num_qpts; q++) {
1579         for (CeedInt n = 0; n < num_nodes; n++) {
1580           CeedInt      d_out              = 0, q_comp_out;
1581           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1582 
1583           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1584             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1585             const CeedScalar *B  = NULL;
1586 
1587             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B));
1588             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out));
1589             if (q_comp_out > 1) {
1590               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1591               else B = &B[(++d_out) * num_qpts * num_nodes];
1592             }
1593             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1594             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1595           }
1596         }
1597       }
1598       if (identity) CeedCall(CeedFree(&identity));
1599       data->assembled_bases_out[b] = B_out;
1600     }
1601   }
1602 
1603   // Pass out assembled data
1604   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1605   if (active_bases_in) *active_bases_in = data->active_bases_in;
1606   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1607   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1608   if (active_bases_out) *active_bases_out = data->active_bases_out;
1609   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1610   return CEED_ERROR_SUCCESS;
1611 }
1612 
1613 /**
1614   @brief Get CeedOperator CeedBasis data for assembly.
1615 
1616   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1617 
1618   @param[in]  data                      CeedOperatorAssemblyData
1619   @param[out] num_active_elem_rstrs_in  Number of active input element restrictions, or NULL
1620   @param[out] active_elem_rstrs_in      Pointer to hold active input CeedElemRestrictions, or NULL
1621   @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or NULL
1622   @param[out] active_elem_rstrs_out     Pointer to hold active output CeedElemRestrictions, or NULL
1623 
1624   @return An error code: 0 - success, otherwise - failure
1625 
1626   @ref Backend
1627 **/
1628 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in,
1629                                                 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out,
1630                                                 CeedElemRestriction **active_elem_rstrs_out) {
1631   if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in;
1632   if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in;
1633   if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out;
1634   if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out;
1635   return CEED_ERROR_SUCCESS;
1636 }
1637 
1638 /**
1639   @brief Destroy CeedOperatorAssemblyData
1640 
1641   @param[in,out] data CeedOperatorAssemblyData to destroy
1642 
1643   @return An error code: 0 - success, otherwise - failure
1644 
1645   @ref Backend
1646 **/
1647 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1648   if (!*data) {
1649     *data = NULL;
1650     return CEED_ERROR_SUCCESS;
1651   }
1652   CeedCall(CeedDestroy(&(*data)->ceed));
1653   for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) {
1654     CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b]));
1655     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b]));
1656     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1657     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1658     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1659   }
1660   for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) {
1661     CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b]));
1662     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b]));
1663     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1664     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1665     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1666   }
1667   CeedCall(CeedFree(&(*data)->active_bases_in));
1668   CeedCall(CeedFree(&(*data)->active_bases_out));
1669   CeedCall(CeedFree(&(*data)->active_elem_rstrs_in));
1670   CeedCall(CeedFree(&(*data)->active_elem_rstrs_out));
1671   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1672   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1673   CeedCall(CeedFree(&(*data)->eval_modes_in));
1674   CeedCall(CeedFree(&(*data)->eval_modes_out));
1675   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1676   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1677   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1678   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1679 
1680   CeedCall(CeedFree(data));
1681   return CEED_ERROR_SUCCESS;
1682 }
1683 
1684 /// @}
1685 
1686 /// ----------------------------------------------------------------------------
1687 /// CeedOperator Public API
1688 /// ----------------------------------------------------------------------------
1689 /// @addtogroup CeedOperatorUser
1690 /// @{
1691 
1692 /**
1693   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1694 
1695   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1696   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1697 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1698 
1699   Inputs and outputs are in the order provided by the user when adding CeedOperator fields.
1700   For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction
1701 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1702 
1703   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1704 
1705   @param[in]  op        CeedOperator to assemble CeedQFunction
1706   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1707   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1708   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1709 
1710   @return An error code: 0 - success, otherwise - failure
1711 
1712   @ref User
1713 **/
1714 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1715   CeedCall(CeedOperatorCheckReady(op));
1716 
1717   if (op->LinearAssembleQFunction) {
1718     // Backend version
1719     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1720   } else {
1721     // Operator fallback
1722     CeedOperator op_fallback;
1723 
1724     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1725     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1726     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1727   }
1728   return CEED_ERROR_SUCCESS;
1729 }
1730 
1731 /**
1732   @brief Assemble CeedQFunction and store result internally.
1733 
1734   Return copied references of stored data to the caller.
1735   Caller is responsible for ownership and destruction of the copied references.
1736   See also @ref CeedOperatorLinearAssembleQFunction
1737 
1738   Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers.
1739         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
1740 
1741   @param[in]  op        CeedOperator to assemble CeedQFunction
1742   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1743   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1744   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1745 
1746   @return An error code: 0 - success, otherwise - failure
1747 
1748   @ref User
1749 **/
1750 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1751   int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL;
1752   CeedOperator op_assemble                                                                           = NULL;
1753   CeedOperator op_fallback_parent                                                                    = NULL;
1754 
1755   CeedCall(CeedOperatorCheckReady(op));
1756 
1757   // Determine if fallback parent or operator has implementation
1758   CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent));
1759   if (op_fallback_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) {
1760     // -- Backend version for op fallback parent is faster, if it exists
1761     LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate;
1762     op_assemble                   = op_fallback_parent;
1763   } else if (op->LinearAssembleQFunctionUpdate) {
1764     // -- Backend version for op
1765     LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate;
1766     op_assemble                   = op;
1767   }
1768 
1769   // Assemble QFunction
1770   if (LinearAssembleQFunctionUpdate) {
1771     // Backend or fallback parent version
1772     bool                qf_assembled_is_setup;
1773     CeedVector          assembled_vec  = NULL;
1774     CeedElemRestriction assembled_rstr = NULL;
1775 
1776     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1777     if (qf_assembled_is_setup) {
1778       bool update_needed;
1779 
1780       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1781       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1782       if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request));
1783     } else {
1784       CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request));
1785       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1786     }
1787     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1788 
1789     // Copy reference from internally held copy
1790     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1791     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1792     CeedCall(CeedVectorDestroy(&assembled_vec));
1793     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1794   } else {
1795     // Operator fallback
1796     CeedOperator op_fallback;
1797 
1798     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1799     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1800     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1801   }
1802   return CEED_ERROR_SUCCESS;
1803 }
1804 
1805 /**
1806   @brief Assemble the diagonal of a square linear CeedOperator
1807 
1808   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1809 
1810   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1811 
1812   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1813 
1814   @param[in]  op        CeedOperator to assemble CeedQFunction
1815   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1816   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1817 
1818   @return An error code: 0 - success, otherwise - failure
1819 
1820   @ref User
1821 **/
1822 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1823   bool     is_composite;
1824   CeedSize input_size = 0, output_size = 0;
1825 
1826   CeedCall(CeedOperatorCheckReady(op));
1827   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1828 
1829   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1830   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1831 
1832   // Early exit for empty operator
1833   if (!is_composite) {
1834     CeedInt num_elem = 0;
1835 
1836     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1837     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1838   }
1839 
1840   if (op->LinearAssembleDiagonal) {
1841     // Backend version
1842     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1843     return CEED_ERROR_SUCCESS;
1844   } else if (op->LinearAssembleAddDiagonal) {
1845     // Backend version with zeroing first
1846     CeedCall(CeedVectorSetValue(assembled, 0.0));
1847     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1848     return CEED_ERROR_SUCCESS;
1849   } else {
1850     // Operator fallback
1851     CeedOperator op_fallback;
1852 
1853     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1854     if (op_fallback) {
1855       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1856       return CEED_ERROR_SUCCESS;
1857     }
1858   }
1859   // Default interface implementation
1860   CeedCall(CeedVectorSetValue(assembled, 0.0));
1861   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1862   return CEED_ERROR_SUCCESS;
1863 }
1864 
1865 /**
1866   @brief Assemble the diagonal of a square linear CeedOperator
1867 
1868   This sums into a CeedVector the diagonal of a linear CeedOperator.
1869 
1870   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1871 
1872   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1873 
1874   @param[in]  op        CeedOperator to assemble CeedQFunction
1875   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1876   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1877 
1878   @return An error code: 0 - success, otherwise - failure
1879 
1880   @ref User
1881 **/
1882 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1883   bool     is_composite;
1884   CeedSize input_size = 0, output_size = 0;
1885 
1886   CeedCall(CeedOperatorCheckReady(op));
1887   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1888 
1889   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1890   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1891 
1892   // Early exit for empty operator
1893   if (!is_composite) {
1894     CeedInt num_elem = 0;
1895 
1896     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1897     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1898   }
1899 
1900   if (op->LinearAssembleAddDiagonal) {
1901     // Backend version
1902     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1903     return CEED_ERROR_SUCCESS;
1904   } else {
1905     // Operator fallback
1906     CeedOperator op_fallback;
1907 
1908     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1909     if (op_fallback) {
1910       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1911       return CEED_ERROR_SUCCESS;
1912     }
1913   }
1914   // Default interface implementation
1915   if (is_composite) {
1916     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1917   } else {
1918     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1919   }
1920   return CEED_ERROR_SUCCESS;
1921 }
1922 
1923 /**
1924    @brief Fully assemble the point-block diagonal pattern of a linear operator.
1925 
1926    Expected to be used in conjunction with CeedOperatorLinearAssemblePointBlockDiagonal().
1927 
1928    The assembly routines use coordinate format, with `num_entries` tuples of the form (i, j, value) which indicate that value should be added to the
1929 matrix in entry (i, j).
1930   Note that the (i, j) pairs are unique.
1931   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in
1932 the same ordering.
1933 
1934    This will generally be slow unless your operator is low-order.
1935 
1936    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1937 
1938    @param[in]  op          CeedOperator to assemble
1939    @param[out] num_entries Number of entries in coordinate nonzero pattern
1940    @param[out] rows        Row number for each entry
1941    @param[out] cols        Column number for each entry
1942 
1943    @ref User
1944 **/
1945 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1946   Ceed          ceed;
1947   bool          is_composite;
1948   CeedInt       num_active_components, num_sub_operators;
1949   CeedOperator *sub_operators;
1950 
1951   CeedCall(CeedOperatorGetCeed(op, &ceed));
1952   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1953 
1954   CeedSize input_size = 0, output_size = 0;
1955   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1956   CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1957 
1958   if (is_composite) {
1959     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_operators));
1960     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1961   } else {
1962     sub_operators     = &op;
1963     num_sub_operators = 1;
1964   }
1965 
1966   // Verify operator can be assembled correctly
1967   {
1968     CeedOperatorAssemblyData data;
1969     CeedInt                  num_active_elem_rstrs, comp_stride;
1970     CeedElemRestriction     *active_elem_rstrs;
1971 
1972     // Get initial values to check against
1973     CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data));
1974     CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
1975     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride));
1976     CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components));
1977 
1978     // Verify that all active element restrictions have same component stride and number of components
1979     for (CeedInt k = 0; k < num_sub_operators; k++) {
1980       CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data));
1981       CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
1982       for (CeedInt i = 0; i < num_active_elem_rstrs; i++) {
1983         CeedInt comp_stride_sub, num_active_components_sub;
1984 
1985         CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub));
1986         CeedCheck(comp_stride == comp_stride_sub, ceed, CEED_ERROR_DIMENSION,
1987                   "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub);
1988         CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub));
1989         CeedCheck(num_active_components == num_active_components_sub, ceed, CEED_ERROR_INCOMPATIBLE,
1990                   "All suboperators must have the same number of output components");
1991       }
1992     }
1993   }
1994   *num_entries = input_size * num_active_components;
1995   CeedCall(CeedCalloc(*num_entries, rows));
1996   CeedCall(CeedCalloc(*num_entries, cols));
1997 
1998   for (CeedInt o = 0; o < num_sub_operators; o++) {
1999     CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr;
2000     CeedInt             comp_stride, num_elem, elem_size;
2001     const CeedInt      *offsets, *point_block_offsets;
2002 
2003     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr));
2004     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride));
2005     CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem));
2006     CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size));
2007     CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets));
2008 
2009     CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr));
2010     CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets));
2011 
2012     for (CeedSize i = 0; i < num_elem * elem_size; i++) {
2013       for (CeedInt c_out = 0; c_out < num_active_components; c_out++) {
2014         for (CeedInt c_in = 0; c_in < num_active_components; c_in++) {
2015           (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride;
2016           (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride;
2017         }
2018       }
2019     }
2020 
2021     CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets));
2022     CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets));
2023     CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr));
2024   }
2025   return CEED_ERROR_SUCCESS;
2026 }
2027 
2028 /**
2029   @brief Assemble the point block diagonal of a square linear CeedOperator
2030 
2031   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
2032 
2033   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
2034 
2035   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2036 
2037   @param[in]  op        CeedOperator to assemble CeedQFunction
2038   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
2039 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
2040 component in].
2041   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2042 
2043   @return An error code: 0 - success, otherwise - failure
2044 
2045   @ref User
2046 **/
2047 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2048   bool     is_composite;
2049   CeedSize input_size = 0, output_size = 0;
2050 
2051   CeedCall(CeedOperatorCheckReady(op));
2052   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2053 
2054   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2055   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
2056 
2057   // Early exit for empty operator
2058   if (!is_composite) {
2059     CeedInt num_elem = 0;
2060 
2061     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2062     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2063   }
2064 
2065   if (op->LinearAssemblePointBlockDiagonal) {
2066     // Backend version
2067     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
2068     return CEED_ERROR_SUCCESS;
2069   } else if (op->LinearAssembleAddPointBlockDiagonal) {
2070     // Backend version with zeroing first
2071     CeedCall(CeedVectorSetValue(assembled, 0.0));
2072     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2073     return CEED_ERROR_SUCCESS;
2074   } else {
2075     // Operator fallback
2076     CeedOperator op_fallback;
2077 
2078     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2079     if (op_fallback) {
2080       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
2081       return CEED_ERROR_SUCCESS;
2082     }
2083   }
2084   // Default interface implementation
2085   CeedCall(CeedVectorSetValue(assembled, 0.0));
2086   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2087   return CEED_ERROR_SUCCESS;
2088 }
2089 
2090 /**
2091   @brief Assemble the point block diagonal of a square linear CeedOperator
2092 
2093   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
2094 
2095   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
2096 
2097   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2098 
2099   @param[in]  op        CeedOperator to assemble CeedQFunction
2100   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
2101 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
2102 component in].
2103   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2104 
2105   @return An error code: 0 - success, otherwise - failure
2106 
2107   @ref User
2108 **/
2109 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2110   bool     is_composite;
2111   CeedSize input_size = 0, output_size = 0;
2112 
2113   CeedCall(CeedOperatorCheckReady(op));
2114   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2115 
2116   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2117   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
2118 
2119   // Early exit for empty operator
2120   if (!is_composite) {
2121     CeedInt num_elem = 0;
2122 
2123     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2124     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2125   }
2126 
2127   if (op->LinearAssembleAddPointBlockDiagonal) {
2128     // Backend version
2129     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
2130     return CEED_ERROR_SUCCESS;
2131   } else {
2132     // Operator fallback
2133     CeedOperator op_fallback;
2134 
2135     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2136     if (op_fallback) {
2137       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
2138       return CEED_ERROR_SUCCESS;
2139     }
2140   }
2141   // Default interface implementation
2142   if (is_composite) {
2143     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
2144   } else {
2145     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
2146   }
2147   return CEED_ERROR_SUCCESS;
2148 }
2149 
2150 /**
2151    @brief Fully assemble the nonzero pattern of a linear operator.
2152 
2153    Expected to be used in conjunction with CeedOperatorLinearAssemble().
2154 
2155    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
2156 matrix in entry (i, j).
2157   Note that the (i, j) pairs are not unique and may repeat.
2158   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
2159 
2160    This will generally be slow unless your operator is low-order.
2161 
2162    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2163 
2164    @param[in]  op          CeedOperator to assemble
2165    @param[out] num_entries Number of entries in coordinate nonzero pattern
2166    @param[out] rows        Row number for each entry
2167    @param[out] cols        Column number for each entry
2168 
2169    @ref User
2170 **/
2171 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2172   bool          is_composite;
2173   CeedInt       num_suboperators, offset = 0;
2174   CeedSize      single_entries;
2175   CeedOperator *sub_operators;
2176 
2177   CeedCall(CeedOperatorCheckReady(op));
2178   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2179 
2180   if (op->LinearAssembleSymbolic) {
2181     // Backend version
2182     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
2183     return CEED_ERROR_SUCCESS;
2184   } else {
2185     // Operator fallback
2186     CeedOperator op_fallback;
2187 
2188     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2189     if (op_fallback) {
2190       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
2191       return CEED_ERROR_SUCCESS;
2192     }
2193   }
2194 
2195   // Default interface implementation
2196 
2197   // Count entries and allocate rows, cols arrays
2198   *num_entries = 0;
2199   if (is_composite) {
2200     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2201     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2202     for (CeedInt k = 0; k < num_suboperators; ++k) {
2203       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2204       *num_entries += single_entries;
2205     }
2206   } else {
2207     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
2208     *num_entries += single_entries;
2209   }
2210   CeedCall(CeedCalloc(*num_entries, rows));
2211   CeedCall(CeedCalloc(*num_entries, cols));
2212 
2213   // Assemble nonzero locations
2214   if (is_composite) {
2215     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2216     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2217     for (CeedInt k = 0; k < num_suboperators; ++k) {
2218       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
2219       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2220       offset += single_entries;
2221     }
2222   } else {
2223     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
2224   }
2225   return CEED_ERROR_SUCCESS;
2226 }
2227 
2228 /**
2229    @brief Fully assemble the nonzero entries of a linear operator.
2230 
2231    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
2232 
2233    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
2234 matrix in entry (i, j).
2235   Note that the (i, j) pairs are not unique and may repeat.
2236   This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
2237 
2238    This will generally be slow unless your operator is low-order.
2239 
2240    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2241 
2242    @param[in]  op     CeedOperator to assemble
2243    @param[out] values Values to assemble into matrix
2244 
2245    @ref User
2246 **/
2247 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
2248   bool          is_composite;
2249   CeedInt       num_suboperators, offset = 0;
2250   CeedSize      single_entries = 0;
2251   CeedOperator *sub_operators;
2252 
2253   CeedCall(CeedOperatorCheckReady(op));
2254   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2255 
2256   // Early exit for empty operator
2257   if (!is_composite) {
2258     CeedInt num_elem = 0;
2259 
2260     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2261     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2262   }
2263 
2264   if (op->LinearAssemble) {
2265     // Backend version
2266     CeedCall(op->LinearAssemble(op, values));
2267     return CEED_ERROR_SUCCESS;
2268   } else {
2269     // Operator fallback
2270     CeedOperator op_fallback;
2271 
2272     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2273     if (op_fallback) {
2274       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2275       return CEED_ERROR_SUCCESS;
2276     }
2277   }
2278 
2279   // Default interface implementation
2280   CeedCall(CeedVectorSetValue(values, 0.0));
2281   if (is_composite) {
2282     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2283     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2284     for (CeedInt k = 0; k < num_suboperators; k++) {
2285       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2286       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2287       offset += single_entries;
2288     }
2289   } else {
2290     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2291   }
2292   return CEED_ERROR_SUCCESS;
2293 }
2294 
2295 /**
2296   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2297 
2298   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2299 
2300   @param[in]  op               Composite CeedOperator
2301   @param[in]  num_skip_indices Number of suboperators to skip
2302   @param[in]  skip_indices     Array of indices of suboperators to skip
2303   @param[out] mult             Vector to store multiplicity (of size l_size)
2304 
2305   @return An error code: 0 - success, otherwise - failure
2306 
2307   @ref User
2308 **/
2309 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2310   Ceed                ceed;
2311   CeedInt             num_suboperators;
2312   CeedSize            l_vec_len;
2313   CeedScalar         *mult_array;
2314   CeedVector          ones_l_vec;
2315   CeedElemRestriction elem_rstr, mult_elem_rstr;
2316   CeedOperator       *sub_operators;
2317 
2318   CeedCall(CeedOperatorCheckReady(op));
2319 
2320   CeedCall(CeedOperatorGetCeed(op, &ceed));
2321 
2322   // Zero mult vector
2323   CeedCall(CeedVectorSetValue(mult, 0.0));
2324 
2325   // Get suboperators
2326   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2327   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2328   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2329 
2330   // Work vector
2331   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2332   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2333   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2334   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2335 
2336   // Compute multiplicity across suboperators
2337   for (CeedInt i = 0; i < num_suboperators; i++) {
2338     const CeedScalar *sub_mult_array;
2339     CeedVector        sub_mult_l_vec, ones_e_vec;
2340 
2341     // -- Check for suboperator to skip
2342     for (CeedInt j = 0; j < num_skip_indices; j++) {
2343       if (skip_indices[j] == i) continue;
2344     }
2345 
2346     // -- Sub operator multiplicity
2347     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2348     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr));
2349     CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2350     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2351     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2352     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2353     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2354     // ---- Flag every node present in the current suboperator
2355     for (CeedInt j = 0; j < l_vec_len; j++) {
2356       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2357     }
2358     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2359     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2360     CeedCall(CeedVectorDestroy(&ones_e_vec));
2361     CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr));
2362   }
2363   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2364   CeedCall(CeedVectorDestroy(&ones_l_vec));
2365   return CEED_ERROR_SUCCESS;
2366 }
2367 
2368 /**
2369   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2370 grid interpolation
2371 
2372   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2373 
2374   @param[in]  op_fine      Fine grid operator
2375   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2376   @param[in]  rstr_coarse  Coarse grid restriction
2377   @param[in]  basis_coarse Coarse grid active vector basis
2378   @param[out] op_coarse    Coarse grid operator
2379   @param[out] op_prolong   Coarse to fine operator, or NULL
2380   @param[out] op_restrict  Fine to coarse operator, or NULL
2381 
2382   @return An error code: 0 - success, otherwise - failure
2383 
2384   @ref User
2385 **/
2386 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2387                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2388   CeedBasis basis_c_to_f = NULL;
2389 
2390   CeedCall(CeedOperatorCheckReady(op_fine));
2391 
2392   // Build prolongation matrix, if required
2393   if (op_prolong || op_restrict) {
2394     CeedBasis basis_fine;
2395 
2396     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2397     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2398   }
2399 
2400   // Core code
2401   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2402   return CEED_ERROR_SUCCESS;
2403 }
2404 
2405 /**
2406   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2407 
2408   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2409 
2410   @param[in]  op_fine       Fine grid operator
2411   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2412   @param[in]  rstr_coarse   Coarse grid restriction
2413   @param[in]  basis_coarse  Coarse grid active vector basis
2414   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2415   @param[out] op_coarse     Coarse grid operator
2416   @param[out] op_prolong    Coarse to fine operator, or NULL
2417   @param[out] op_restrict   Fine to coarse operator, or NULL
2418 
2419   @return An error code: 0 - success, otherwise - failure
2420 
2421   @ref User
2422 **/
2423 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2424                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2425                                              CeedOperator *op_restrict) {
2426   Ceed      ceed;
2427   CeedInt   Q_f, Q_c;
2428   CeedBasis basis_fine, basis_c_to_f = NULL;
2429 
2430   CeedCall(CeedOperatorCheckReady(op_fine));
2431   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2432 
2433   // Check for compatible quadrature spaces
2434   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2435   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2436   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2437   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2438 
2439   // Create coarse to fine basis, if required
2440   if (op_prolong || op_restrict) {
2441     CeedInt     dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2442     CeedScalar *q_ref, *q_weight, *grad;
2443 
2444     // Check if interpolation matrix is provided
2445     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2446               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2447     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2448     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2449     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2450     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2451     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2452     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2453     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2454     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2455     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2456     CeedCall(CeedFree(&q_ref));
2457     CeedCall(CeedFree(&q_weight));
2458     CeedCall(CeedFree(&grad));
2459   }
2460 
2461   // Core code
2462   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2463   return CEED_ERROR_SUCCESS;
2464 }
2465 
2466 /**
2467   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2468 
2469   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2470 
2471   @param[in]  op_fine       Fine grid operator
2472   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2473   @param[in]  rstr_coarse   Coarse grid restriction
2474   @param[in]  basis_coarse  Coarse grid active vector basis
2475   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2476   @param[out] op_coarse     Coarse grid operator
2477   @param[out] op_prolong    Coarse to fine operator, or NULL
2478   @param[out] op_restrict   Fine to coarse operator, or NULL
2479 
2480   @return An error code: 0 - success, otherwise - failure
2481 
2482   @ref User
2483 **/
2484 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2485                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2486                                        CeedOperator *op_restrict) {
2487   Ceed      ceed;
2488   CeedInt   Q_f, Q_c;
2489   CeedBasis basis_fine, basis_c_to_f = NULL;
2490 
2491   CeedCall(CeedOperatorCheckReady(op_fine));
2492   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2493 
2494   // Check for compatible quadrature spaces
2495   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2496   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2497   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2498   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2499 
2500   // Coarse to fine basis
2501   if (op_prolong || op_restrict) {
2502     CeedInt          dim, num_comp, num_nodes_c, num_nodes_f;
2503     CeedScalar      *q_ref, *q_weight, *grad;
2504     CeedElemTopology topo;
2505 
2506     // Check if interpolation matrix is provided
2507     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2508               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2509     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2510     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2511     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2512     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2513     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2514     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2515     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2516     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2517     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2518     CeedCall(CeedFree(&q_ref));
2519     CeedCall(CeedFree(&q_weight));
2520     CeedCall(CeedFree(&grad));
2521   }
2522 
2523   // Core code
2524   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2525   return CEED_ERROR_SUCCESS;
2526 }
2527 
2528 /**
2529   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2530 
2531   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2532   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2533   The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2534 \hat S V\f$.
2535   The CeedOperator must be linear and non-composite.
2536   The associated CeedQFunction must therefore also be linear.
2537 
2538   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2539 
2540   @param[in]  op      CeedOperator to create element inverses
2541   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2542   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2543 
2544   @return An error code: 0 - success, otherwise - failure
2545 
2546   @ref User
2547 **/
2548 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2549   Ceed                 ceed, ceed_parent;
2550   bool                 interp = false, grad = false, is_tensor_basis = true;
2551   CeedInt              num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2552   CeedSize             l_size = 1;
2553   CeedScalar          *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg;
2554   const CeedScalar    *interp_1d, *grad_1d, *q_weight_1d;
2555   CeedVector           q_data;
2556   CeedElemRestriction  rstr  = NULL, rstr_qd_i;
2557   CeedBasis            basis = NULL, fdm_basis;
2558   CeedQFunctionContext ctx_fdm;
2559   CeedQFunctionField  *qf_fields;
2560   CeedQFunction        qf, qf_fdm;
2561   CeedOperatorField   *op_fields;
2562 
2563   CeedCall(CeedOperatorCheckReady(op));
2564 
2565   if (op->CreateFDMElementInverse) {
2566     // Backend version
2567     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2568     return CEED_ERROR_SUCCESS;
2569   } else {
2570     // Operator fallback
2571     CeedOperator op_fallback;
2572 
2573     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2574     if (op_fallback) {
2575       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2576       return CEED_ERROR_SUCCESS;
2577     }
2578   }
2579 
2580   // Default interface implementation
2581   CeedCall(CeedOperatorGetCeed(op, &ceed));
2582   CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent));
2583   CeedCall(CeedOperatorGetQFunction(op, &qf));
2584 
2585   // Determine active input basis
2586   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2587   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2588   for (CeedInt i = 0; i < num_input_fields; i++) {
2589     CeedVector vec;
2590 
2591     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2592     if (vec == CEED_VECTOR_ACTIVE) {
2593       CeedEvalMode eval_mode;
2594 
2595       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2596       interp = interp || eval_mode == CEED_EVAL_INTERP;
2597       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2598       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2599       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2600     }
2601   }
2602   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2603   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2604   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2605   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2606   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2607   CeedCall(CeedBasisGetDimension(basis, &dim));
2608   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2609   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2610   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2611 
2612   // Build and diagonalize 1D Mass and Laplacian
2613   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2614   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2615   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2616   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2617   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2618   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2619   CeedCall(CeedCalloc(P_1d, &lambda));
2620   // -- Build matrices
2621   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2622   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2623   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2624   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2625 
2626   // -- Diagonalize
2627   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2628   CeedCall(CeedFree(&mass));
2629   CeedCall(CeedFree(&laplace));
2630   for (CeedInt i = 0; i < P_1d; i++) {
2631     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2632   }
2633   CeedCall(CeedFree(&x));
2634 
2635   {
2636     CeedInt             layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2637     CeedScalar          max_norm = 0;
2638     const CeedScalar   *assembled_array, *q_weight_array;
2639     CeedVector          assembled = NULL, q_weight;
2640     CeedElemRestriction rstr_qf   = NULL;
2641 
2642     // Assemble QFunction
2643     CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2644     CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2645     CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2646     CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2647 
2648     // Calculate element averages
2649     CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2650     CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2651     CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2652     CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2653     CeedCall(CeedCalloc(num_elem, &elem_avg));
2654     const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2655 
2656     for (CeedInt e = 0; e < num_elem; e++) {
2657       CeedInt count = 0;
2658 
2659       for (CeedInt q = 0; q < num_qpts; q++) {
2660         for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2661           if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2662             elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2663             count++;
2664           }
2665         }
2666       }
2667       if (count) {
2668         elem_avg[e] /= count;
2669       } else {
2670         elem_avg[e] = 1.0;
2671       }
2672     }
2673     CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2674     CeedCall(CeedVectorDestroy(&assembled));
2675     CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2676     CeedCall(CeedVectorDestroy(&q_weight));
2677   }
2678 
2679   // Build FDM diagonal
2680   {
2681     CeedScalar *q_data_array, *fdm_diagonal;
2682 
2683     CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2684     const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2685     for (CeedInt c = 0; c < num_comp; c++) {
2686       for (CeedInt n = 0; n < num_nodes; n++) {
2687         if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2688         if (grad) {
2689           for (CeedInt d = 0; d < dim; d++) {
2690             CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2691             fdm_diagonal[c * num_nodes + n] += lambda[i];
2692           }
2693         }
2694         if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2695       }
2696     }
2697     CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2698     CeedCall(CeedVectorSetValue(q_data, 0.0));
2699     CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2700     for (CeedInt e = 0; e < num_elem; e++) {
2701       for (CeedInt c = 0; c < num_comp; c++) {
2702         for (CeedInt n = 0; n < num_nodes; n++)
2703           q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2704       }
2705     }
2706     CeedCall(CeedFree(&elem_avg));
2707     CeedCall(CeedFree(&fdm_diagonal));
2708     CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2709   }
2710 
2711   // Setup FDM operator
2712   // -- Basis
2713   {
2714     CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2715 
2716     CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2717     CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2718     CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2719     CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2720     CeedCall(CeedFree(&fdm_interp));
2721     CeedCall(CeedFree(&grad_dummy));
2722     CeedCall(CeedFree(&q_ref_dummy));
2723     CeedCall(CeedFree(&q_weight_dummy));
2724     CeedCall(CeedFree(&lambda));
2725   }
2726 
2727   // -- Restriction
2728   {
2729     CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp};
2730     CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2731   }
2732 
2733   // -- QFunction
2734   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2735   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2736   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2737   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2738   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2739 
2740   // -- QFunction context
2741   {
2742     CeedInt *num_comp_data;
2743 
2744     CeedCall(CeedCalloc(1, &num_comp_data));
2745     num_comp_data[0] = num_comp;
2746     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2747     CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2748   }
2749   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2750   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2751 
2752   // -- Operator
2753   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2754   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2755   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data));
2756   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2757 
2758   // Cleanup
2759   CeedCall(CeedVectorDestroy(&q_data));
2760   CeedCall(CeedBasisDestroy(&fdm_basis));
2761   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2762   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2763   return CEED_ERROR_SUCCESS;
2764 }
2765 
2766 /// @}
2767