xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision c6b97828840c5bbe0e9b412e40a21facc8a9e5fd)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   char *source_path_with_name = NULL;
39 
40   // Check if NULL qf passed in
41   if (!qf) return CEED_ERROR_SUCCESS;
42 
43   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
44   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
45 
46   if (qf->source_path) {
47     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
48     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
49     memcpy(source_path_with_name, qf->source_path, path_len);
50     memcpy(&source_path_with_name[path_len], ":", 1);
51     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
52   } else {
53     CeedCall(CeedCalloc(1, &source_path_with_name));
54   }
55 
56   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
57   {
58     CeedQFunctionContext ctx;
59 
60     CeedCall(CeedQFunctionGetContext(qf, &ctx));
61     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
62   }
63   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
64     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
65   }
66   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
67     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
68   }
69   CeedCall(CeedFree(&source_path_with_name));
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   Ceed         ceed_fallback;
84   bool         is_composite;
85   CeedOperator op_fallback;
86 
87   // Check not already created
88   if (op->op_fallback) return CEED_ERROR_SUCCESS;
89 
90   // Fallback Ceed
91   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
92   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
93 
94   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
95   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
96 
97   // Clone Op
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114 
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
117     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
118     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
119     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
120       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
121                                     op->input_fields[i]->vec));
122     }
123     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
124       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
125                                     op->output_fields[i]->vec));
126     }
127     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
128     if (op_fallback->num_qpts == 0) CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     // Cleanup
130     CeedCall(CeedQFunctionDestroy(&qf_fallback));
131     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
133   }
134   CeedCall(CeedOperatorSetName(op_fallback, op->name));
135   CeedCall(CeedOperatorCheckReady(op_fallback));
136   // Note: No ref-counting here so we don't get caught in a reference loop.
137   //       The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback.
138   op->op_fallback                 = op_fallback;
139   op_fallback->op_fallback_parent = op;
140   return CEED_ERROR_SUCCESS;
141 }
142 
143 /**
144   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
145 
146   @param[in]  op          CeedOperator to retrieve fallback for
147   @param[out] op_fallback Fallback CeedOperator
148 
149   @return An error code: 0 - success, otherwise - failure
150 
151   @ref Developer
152 **/
153 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
154   // Create if needed
155   if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op));
156   if (op->op_fallback) {
157     bool is_debug;
158 
159     CeedCall(CeedIsDebug(op->ceed, &is_debug));
160     if (is_debug) {
161       Ceed        ceed, ceed_fallback;
162       const char *resource, *resource_fallback;
163 
164       CeedCall(CeedOperatorGetCeed(op, &ceed));
165       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
166       CeedCall(CeedGetResource(ceed, &resource));
167       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
168 
169       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
170       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
171                 op->op_fallback);
172     }
173   }
174   *op_fallback = op->op_fallback;
175   return CEED_ERROR_SUCCESS;
176 }
177 
178 /**
179   @brief Get the parent CeedOperator for a fallback CeedOperator
180 
181   @param[in]  op     CeedOperator context
182   @param[out] parent Variable to store parent CeedOperator context
183 
184   @return An error code: 0 - success, otherwise - failure
185 
186   @ref Developer
187 **/
188 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) {
189   *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL;
190   return CEED_ERROR_SUCCESS;
191 }
192 
193 /**
194   @brief Get the Ceed context of the parent CeedOperator for a fallback CeedOperator
195 
196   @param[in]  op     CeedOperator context
197   @param[out] parent Variable to store parent Ceed context
198 
199   @return An error code: 0 - success, otherwise - failure
200 
201   @ref Developer
202 **/
203 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) {
204   *parent = op->op_fallback_parent ? op->op_fallback_parent->ceed : op->ceed;
205   return CEED_ERROR_SUCCESS;
206 }
207 
208 /**
209   @brief Select correct basis matrix pointer based on CeedEvalMode
210 
211   @param[in]  basis     CeedBasis from which to get the basis matrix
212   @param[in]  eval_mode Current basis evaluation mode
213   @param[in]  identity  Pointer to identity matrix
214   @param[out] basis_ptr Basis pointer to set
215 
216   @ref Developer
217 **/
218 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
219   switch (eval_mode) {
220     case CEED_EVAL_NONE:
221       *basis_ptr = identity;
222       break;
223     case CEED_EVAL_INTERP:
224       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
225       break;
226     case CEED_EVAL_GRAD:
227       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
228       break;
229     case CEED_EVAL_DIV:
230       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
231       break;
232     case CEED_EVAL_CURL:
233       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
234       break;
235     case CEED_EVAL_WEIGHT:
236       break;  // Caught by QF Assembly
237   }
238   assert(*basis_ptr != NULL);
239   return CEED_ERROR_SUCCESS;
240 }
241 
242 /**
243   @brief Core logic for assembling operator diagonal or point block diagonal
244 
245   @param[in]  op             CeedOperator to assemble point block diagonal
246   @param[in]  request        Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
247   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
248   @param[out] assembled      CeedVector to store assembled diagonal
249 
250   @return An error code: 0 - success, otherwise - failure
251 
252   @ref Developer
253 **/
254 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_point_block, CeedVector assembled) {
255   Ceed ceed;
256   bool is_composite;
257 
258   CeedCall(CeedOperatorGetCeed(op, &ceed));
259   CeedCall(CeedOperatorIsComposite(op, &is_composite));
260   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
261 
262   // Assemble QFunction
263   CeedInt             layout_qf[3];
264   const CeedScalar   *assembled_qf_array;
265   CeedVector          assembled_qf        = NULL;
266   CeedElemRestriction assembled_elem_rstr = NULL;
267 
268   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
269   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf));
270   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
271   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
272 
273   // Get assembly data
274   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
275   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
276   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
277   CeedBasis               *active_bases_in, *active_bases_out;
278   CeedElemRestriction     *active_elem_rstrs_in, *active_elem_rstrs_out;
279   CeedOperatorAssemblyData data;
280 
281   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
282   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
283                                                 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out,
284                                                 &num_output_components));
285   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL));
286   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out));
287 
288   // Loop over all active bases (find matching input/output pairs)
289   for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) {
290     CeedInt             b_in, b_out, num_elem, num_nodes, num_qpts, num_comp;
291     bool                has_eval_none = false;
292     CeedScalar         *elem_diag_array, *identity = NULL;
293     CeedVector          elem_diag;
294     CeedElemRestriction diag_elem_rstr;
295 
296     if (num_active_bases_in <= num_active_bases_out) {
297       b_in = b;
298       for (b_out = 0; b_out < num_active_bases_out; b_out++) {
299         if (active_bases_in[b_in] == active_bases_out[b_out]) {
300           break;
301         }
302       }
303       if (b_out == num_active_bases_out) {
304         continue;
305       }  // No matching output basis found
306     } else {
307       b_out = b;
308       for (b_in = 0; b_in < num_active_bases_in; b_in++) {
309         if (active_bases_in[b_in] == active_bases_out[b_out]) {
310           break;
311         }
312       }
313       if (b_in == num_active_bases_in) {
314         continue;
315       }  // No matching output basis found
316     }
317     CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], ceed, CEED_ERROR_UNSUPPORTED,
318               "Cannot assemble operator diagonal with different input and output active element restrictions");
319 
320     // Assemble point block diagonal restriction, if needed
321     if (is_point_block) {
322       CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr));
323     } else {
324       CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr));
325     }
326 
327     // Create diagonal vector
328     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
329 
330     // Assemble element operator diagonals
331     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
332     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
333     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
334     CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes));
335     CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp));
336     if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes;
337     else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts));
338 
339     // Construct identity matrix for basis if required
340     for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) {
341       has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE);
342     }
343     for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) {
344       has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE);
345     }
346     if (has_eval_none) {
347       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
348       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
349     }
350 
351     // Compute the diagonal of B^T D B
352     // Each element
353     for (CeedSize e = 0; e < num_elem; e++) {
354       // Each basis eval mode pair
355       CeedInt      d_out              = 0, q_comp_out;
356       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
357 
358       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) {
359         CeedInt           d_in              = 0, q_comp_in;
360         const CeedScalar *B_t               = NULL;
361         CeedEvalMode      eval_mode_in_prev = CEED_EVAL_NONE;
362 
363         CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t));
364         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out));
365         if (q_comp_out > 1) {
366           if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0;
367           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
368         }
369         eval_mode_out_prev = eval_modes_out[b_out][e_out];
370 
371         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) {
372           const CeedScalar *B = NULL;
373 
374           CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B));
375           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in));
376           if (q_comp_in > 1) {
377             if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0;
378             else B = &B[(++d_in) * num_qpts * num_nodes];
379           }
380           eval_mode_in_prev = eval_modes_in[b_in][e_in];
381 
382           // Each component
383           for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
384             // Each qpt/node pair
385             for (CeedInt q = 0; q < num_qpts; q++) {
386               if (is_point_block) {
387                 // Point Block Diagonal
388                 for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
389                   const CeedSize c_offset =
390                       (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
391                   const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
392 
393                   for (CeedInt n = 0; n < num_nodes; n++) {
394                     elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
395                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
396                   }
397                 }
398               } else {
399                 // Diagonal Only
400                 const CeedInt c_offset =
401                     (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
402                 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
403 
404                 for (CeedInt n = 0; n < num_nodes; n++) {
405                   elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
406                 }
407               }
408             }
409           }
410         }
411       }
412     }
413     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
414 
415     // Assemble local operator diagonal
416     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
417 
418     // Cleanup
419     CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
420     CeedCall(CeedVectorDestroy(&elem_diag));
421     CeedCall(CeedFree(&identity));
422   }
423   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
424   CeedCall(CeedVectorDestroy(&assembled_qf));
425   return CEED_ERROR_SUCCESS;
426 }
427 
428 /**
429   @brief Core logic for assembling composite operator diagonal
430 
431   @param[in]  op             CeedOperator to assemble point block diagonal
432   @param[in]  request        Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
433   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
434   @param[out] assembled      CeedVector to store assembled diagonal
435 
436   @return An error code: 0 - success, otherwise - failure
437 
438   @ref Developer
439 **/
440 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block,
441                                                                  CeedVector assembled) {
442   CeedInt       num_sub;
443   CeedOperator *suboperators;
444 
445   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
446   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
447   for (CeedInt i = 0; i < num_sub; i++) {
448     if (is_point_block) {
449       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
450     } else {
451       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
452     }
453   }
454   return CEED_ERROR_SUCCESS;
455 }
456 
457 /**
458   @brief Build nonzero pattern for non-composite operator
459 
460   Users should generally use CeedOperatorLinearAssembleSymbolic()
461 
462   @param[in]  op     CeedOperator to assemble nonzero pattern
463   @param[in]  offset Offset for number of entries
464   @param[out] rows   Row number for each entry
465   @param[out] cols   Column number for each entry
466 
467   @return An error code: 0 - success, otherwise - failure
468 
469   @ref Developer
470 **/
471 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
472   Ceed                ceed;
473   bool                is_composite;
474   CeedSize            num_nodes_in, num_nodes_out, count = 0;
475   CeedInt             num_elem_in, elem_size_in, num_comp_in, layout_er_in[3];
476   CeedInt             num_elem_out, elem_size_out, num_comp_out, layout_er_out[3], local_num_entries;
477   CeedScalar         *array;
478   const CeedScalar   *elem_dof_a_in, *elem_dof_a_out;
479   CeedVector          index_vec_in, index_vec_out, elem_dof_in, elem_dof_out;
480   CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out;
481 
482   CeedCall(CeedOperatorGetCeed(op, &ceed));
483   CeedCall(CeedOperatorIsComposite(op, &is_composite));
484   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
485 
486   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out));
487   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
488   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
489   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
490   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
491   CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, &layout_er_in));
492 
493   // Determine elem_dof relation for input
494   CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in));
495   CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array));
496   for (CeedInt i = 0; i < num_nodes_in; i++) array[i] = i;
497   CeedCall(CeedVectorRestoreArray(index_vec_in, &array));
498   CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in));
499   CeedCall(CeedVectorSetValue(elem_dof_in, 0.0));
500   CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in));
501   CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE));
502   CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in));
503   CeedCall(CeedVectorDestroy(&index_vec_in));
504   CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in));
505 
506   if (elem_rstr_in != elem_rstr_out) {
507     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
508     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
509               "Active input and output operator restrictions must have the same number of elements");
510     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
511     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
512     CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, &layout_er_out));
513 
514     // Determine elem_dof relation for output
515     CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out));
516     CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array));
517     for (CeedInt i = 0; i < num_nodes_out; i++) array[i] = i;
518     CeedCall(CeedVectorRestoreArray(index_vec_out, &array));
519     CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out));
520     CeedCall(CeedVectorSetValue(elem_dof_out, 0.0));
521     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out));
522     CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE));
523     CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out));
524     CeedCall(CeedVectorDestroy(&index_vec_out));
525     CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out));
526   } else {
527     num_elem_out     = num_elem_in;
528     elem_size_out    = elem_size_in;
529     num_comp_out     = num_comp_in;
530     layout_er_out[0] = layout_er_in[0];
531     layout_er_out[1] = layout_er_in[1];
532     layout_er_out[2] = layout_er_in[2];
533     elem_dof_a_out   = elem_dof_a_in;
534   }
535   local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
536 
537   // Determine i, j locations for element matrices
538   for (CeedInt e = 0; e < num_elem_in; e++) {
539     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
540       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
541         for (CeedInt i = 0; i < elem_size_out; i++) {
542           for (CeedInt j = 0; j < elem_size_in; j++) {
543             const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2];
544             const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2];
545             const CeedInt row                = elem_dof_a_out[elem_dof_index_row];
546             const CeedInt col                = elem_dof_a_in[elem_dof_index_col];
547 
548             rows[offset + count] = row;
549             cols[offset + count] = col;
550             count++;
551           }
552         }
553       }
554     }
555   }
556   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
557   CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in));
558   CeedCall(CeedVectorDestroy(&elem_dof_in));
559   if (elem_rstr_in != elem_rstr_out) {
560     CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out));
561     CeedCall(CeedVectorDestroy(&elem_dof_out));
562   }
563   return CEED_ERROR_SUCCESS;
564 }
565 
566 /**
567   @brief Assemble nonzero entries for non-composite operator
568 
569   Users should generally use CeedOperatorLinearAssemble()
570 
571   @param[in]  op     CeedOperator to assemble
572   @param[in]  offset Offset for number of entries
573   @param[out] values Values to assemble into matrix
574 
575   @return An error code: 0 - success, otherwise - failure
576 
577   @ref Developer
578 **/
579 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
580   Ceed ceed;
581   bool is_composite;
582 
583   CeedCall(CeedOperatorGetCeed(op, &ceed));
584   CeedCall(CeedOperatorIsComposite(op, &is_composite));
585   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
586 
587   // Early exit for empty operator
588   {
589     CeedInt num_elem = 0;
590 
591     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
592     if (num_elem == 0) return CEED_ERROR_SUCCESS;
593   }
594 
595   if (op->LinearAssembleSingle) {
596     // Backend version
597     CeedCall(op->LinearAssembleSingle(op, offset, values));
598     return CEED_ERROR_SUCCESS;
599   } else {
600     // Operator fallback
601     CeedOperator op_fallback;
602 
603     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
604     if (op_fallback) {
605       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
606       return CEED_ERROR_SUCCESS;
607     }
608   }
609 
610   // Assemble QFunction
611   CeedInt             layout_qf[3];
612   const CeedScalar   *assembled_qf_array;
613   CeedVector          assembled_qf        = NULL;
614   CeedElemRestriction assembled_elem_rstr = NULL;
615 
616   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE));
617   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf));
618   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
619   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
620 
621   // Get assembly data
622   CeedInt                  num_elem_in, elem_size_in, num_comp_in, num_qpts_in;
623   CeedInt                  num_elem_out, elem_size_out, num_comp_out, num_qpts_out, local_num_entries;
624   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
625   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
626   CeedBasis               *active_bases_in, *active_bases_out, basis_in, basis_out;
627   const CeedScalar       **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out;
628   CeedElemRestriction      elem_rstr_in, elem_rstr_out;
629   CeedRestrictionType      elem_rstr_type_in, elem_rstr_type_out;
630   const bool              *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL;
631   const CeedInt8          *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL;
632   CeedOperatorAssemblyData data;
633 
634   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
635   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out,
636                                                 &num_eval_modes_out, &eval_modes_out, NULL, NULL));
637 
638   CeedCheck(num_active_bases_in == num_active_bases_out && num_active_bases_in == 1, ceed, CEED_ERROR_UNSUPPORTED,
639             "Cannot assemble operator with multiple active bases");
640   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs");
641 
642   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out));
643   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
644   basis_in  = active_bases_in[0];
645   basis_out = active_bases_out[0];
646   B_mat_in  = B_mats_in[0];
647   B_mat_out = B_mats_out[0];
648 
649   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
650   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
651   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
652   if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in;
653   else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in));
654 
655   CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in));
656   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
657     CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in));
658   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
659     CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in));
660   }
661 
662   if (elem_rstr_in != elem_rstr_out) {
663     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
664     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
665               "Active input and output operator restrictions must have the same number of elements");
666     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
667     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
668     if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out;
669     else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out));
670     CeedCheck(num_qpts_in == num_qpts_out, ceed, CEED_ERROR_UNSUPPORTED,
671               "Active input and output bases must have the same number of quadrature points");
672 
673     CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out));
674     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
675       CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out));
676     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
677       CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out));
678     }
679   } else {
680     num_elem_out  = num_elem_in;
681     elem_size_out = elem_size_in;
682     num_comp_out  = num_comp_in;
683     num_qpts_out  = num_qpts_in;
684 
685     elem_rstr_orients_out      = elem_rstr_orients_in;
686     elem_rstr_curl_orients_out = elem_rstr_curl_orients_in;
687   }
688   local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
689 
690   // Loop over elements and put in data structure
691   // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
692   CeedSize    count = 0;
693   CeedScalar *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL;
694 
695   CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat));
696   CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat));
697   if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b));
698 
699   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
700   for (CeedSize e = 0; e < num_elem_in; e++) {
701     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
702       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
703         // Compute B^T*D
704         for (CeedSize n = 0; n < elem_size_out; n++) {
705           for (CeedSize q = 0; q < num_qpts_in; q++) {
706             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
707               const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in;
708               CeedScalar     sum       = 0.0;
709 
710               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
711                 const CeedSize b_out_index     = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n;
712                 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out;
713                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
714 
715                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
716               }
717               BTD_mat[btd_index] = sum;
718             }
719           }
720         }
721 
722         // Form element matrix itself (for each block component)
723         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size_out, elem_size_in, num_qpts_in * num_eval_modes_in[0]));
724 
725         // Transform the element matrix if required
726         if (elem_rstr_orients_out) {
727           const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out];
728 
729           for (CeedInt i = 0; i < elem_size_out; i++) {
730             const double orient = elem_orients[i] ? -1.0 : 1.0;
731 
732             for (CeedInt j = 0; j < elem_size_in; j++) {
733               elem_mat[i * elem_size_in + j] *= orient;
734             }
735           }
736         } else if (elem_rstr_curl_orients_out) {
737           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out];
738 
739           // T^T*(B^T*D*B)
740           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
741           for (CeedInt i = 0; i < elem_size_out; i++) {
742             for (CeedInt j = 0; j < elem_size_in; j++) {
743               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] +
744                                                (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) +
745                                                (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0);
746             }
747           }
748         }
749         if (elem_rstr_orients_in) {
750           const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in];
751 
752           for (CeedInt i = 0; i < elem_size_out; i++) {
753             for (CeedInt j = 0; j < elem_size_in; j++) {
754               elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0;
755             }
756           }
757         } else if (elem_rstr_curl_orients_in) {
758           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in];
759 
760           // (B^T*D*B)*T
761           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
762           for (CeedInt i = 0; i < elem_size_out; i++) {
763             for (CeedInt j = 0; j < elem_size_in; j++) {
764               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] +
765                                                (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) +
766                                                (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0);
767             }
768           }
769         }
770 
771         // Put element matrix in coordinate data structure
772         for (CeedInt i = 0; i < elem_size_out; i++) {
773           for (CeedInt j = 0; j < elem_size_in; j++) {
774             vals[offset + count] = elem_mat[i * elem_size_in + j];
775             count++;
776           }
777         }
778       }
779     }
780   }
781   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries");
782   CeedCall(CeedVectorRestoreArray(values, &vals));
783 
784   // Cleanup
785   CeedCall(CeedFree(&BTD_mat));
786   CeedCall(CeedFree(&elem_mat));
787   CeedCall(CeedFree(&elem_mat_b));
788   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
789     CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in));
790   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
791     CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in));
792   }
793   if (elem_rstr_in != elem_rstr_out) {
794     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
795       CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out));
796     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
797       CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out));
798     }
799   }
800   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
801   CeedCall(CeedVectorDestroy(&assembled_qf));
802   return CEED_ERROR_SUCCESS;
803 }
804 
805 /**
806   @brief Count number of entries for assembled CeedOperator
807 
808   @param[in]  op          CeedOperator to assemble
809   @param[out] num_entries Number of entries in assembled representation
810 
811   @return An error code: 0 - success, otherwise - failure
812 
813   @ref Utility
814 **/
815 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) {
816   bool                is_composite;
817   CeedInt             num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out;
818   CeedElemRestriction rstr_in, rstr_out;
819 
820   CeedCall(CeedOperatorIsComposite(op, &is_composite));
821   CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
822 
823   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out));
824   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in));
825   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in));
826   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
827   if (rstr_in != rstr_out) {
828     CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out));
829     CeedCheck(num_elem_in == num_elem_out, op->ceed, CEED_ERROR_UNSUPPORTED,
830               "Active input and output operator restrictions must have the same number of elements");
831     CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out));
832     CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
833   } else {
834     num_elem_out  = num_elem_in;
835     elem_size_out = elem_size_in;
836     num_comp_out  = num_comp_in;
837   }
838   *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in;
839   return CEED_ERROR_SUCCESS;
840 }
841 
842 /**
843   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
844 
845   @param[in]  op_fine      Fine grid operator
846   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
847   @param[in]  rstr_coarse  Coarse grid restriction
848   @param[in]  basis_coarse Coarse grid active vector basis
849   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
850   @param[out] op_coarse    Coarse grid operator
851   @param[out] op_prolong   Coarse to fine operator, or NULL
852   @param[out] op_restrict  Fine to coarse operator, or NULL
853 
854   @return An error code: 0 - success, otherwise - failure
855 
856   @ref Developer
857 **/
858 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
859                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
860   bool                is_composite;
861   Ceed                ceed;
862   CeedInt             num_comp;
863   CeedVector          mult_vec         = NULL;
864   CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL;
865 
866   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
867 
868   // Check for composite operator
869   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
870   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
871 
872   // Coarse Grid
873   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
874   // -- Clone input fields
875   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
876     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
877       rstr_fine = op_fine->input_fields[i]->elem_rstr;
878       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
879     } else {
880       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
881                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
882     }
883   }
884   // -- Clone output fields
885   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
886     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
887       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
888     } else {
889       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
890                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
891     }
892   }
893   // -- Clone QFunctionAssemblyData
894   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
895 
896   // Multiplicity vector
897   if (op_restrict || op_prolong) {
898     CeedVector          mult_e_vec;
899     CeedRestrictionType rstr_type;
900 
901     CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type));
902     CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED,
903               "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported");
904     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
905     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
906     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
907     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
908     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
909     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
910     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
911     CeedCall(CeedVectorDestroy(&mult_e_vec));
912     CeedCall(CeedVectorReciprocal(mult_vec));
913   }
914 
915   // Clone name
916   bool   has_name = op_fine->name;
917   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
918   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
919 
920   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
921   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
922             "Prolongation or restriction operator creation requires coarse-to-fine basis");
923 
924   // Restriction/Prolongation Operators
925   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
926 
927   // Restriction
928   if (op_restrict) {
929     CeedInt             *num_comp_r_data;
930     CeedQFunctionContext ctx_r;
931     CeedQFunction        qf_restrict;
932 
933     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
934     CeedCall(CeedCalloc(1, &num_comp_r_data));
935     num_comp_r_data[0] = num_comp;
936     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
937     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
938     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
939     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
940     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
941     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
942     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
943     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
944 
945     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
946     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
947     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
948     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
949 
950     // Set name
951     char *restriction_name;
952 
953     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
954     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
955     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
956     CeedCall(CeedFree(&restriction_name));
957 
958     // Check
959     CeedCall(CeedOperatorCheckReady(*op_restrict));
960 
961     // Cleanup
962     CeedCall(CeedQFunctionDestroy(&qf_restrict));
963   }
964 
965   // Prolongation
966   if (op_prolong) {
967     CeedInt             *num_comp_p_data;
968     CeedQFunctionContext ctx_p;
969     CeedQFunction        qf_prolong;
970 
971     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
972     CeedCall(CeedCalloc(1, &num_comp_p_data));
973     num_comp_p_data[0] = num_comp;
974     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
975     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
976     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
977     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
978     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
979     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
980     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
981     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
982 
983     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
984     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
985     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
986     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
987 
988     // Set name
989     char *prolongation_name;
990 
991     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
992     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
993     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
994     CeedCall(CeedFree(&prolongation_name));
995 
996     // Check
997     CeedCall(CeedOperatorCheckReady(*op_prolong));
998 
999     // Cleanup
1000     CeedCall(CeedQFunctionDestroy(&qf_prolong));
1001   }
1002 
1003   // Check
1004   CeedCall(CeedOperatorCheckReady(*op_coarse));
1005 
1006   // Cleanup
1007   CeedCall(CeedVectorDestroy(&mult_vec));
1008   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
1009   CeedCall(CeedBasisDestroy(&basis_c_to_f));
1010   return CEED_ERROR_SUCCESS;
1011 }
1012 
1013 /**
1014   @brief Build 1D mass matrix and Laplacian with perturbation
1015 
1016   @param[in]  interp_1d   Interpolation matrix in one dimension
1017   @param[in]  grad_1d     Gradient matrix in one dimension
1018   @param[in]  q_weight_1d Quadrature weights in one dimension
1019   @param[in]  P_1d        Number of basis nodes in one dimension
1020   @param[in]  Q_1d        Number of quadrature points in one dimension
1021   @param[in]  dim         Dimension of basis
1022   @param[out] mass        Assembled mass matrix in one dimension
1023   @param[out] laplace     Assembled perturbed Laplacian in one dimension
1024 
1025   @return An error code: 0 - success, otherwise - failure
1026 
1027   @ref Developer
1028 **/
1029 CeedPragmaOptimizeOff
1030 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
1031                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
1032   for (CeedInt i = 0; i < P_1d; i++) {
1033     for (CeedInt j = 0; j < P_1d; j++) {
1034       CeedScalar sum = 0.0;
1035       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
1036       mass[i + j * P_1d] = sum;
1037     }
1038   }
1039   // -- Laplacian
1040   for (CeedInt i = 0; i < P_1d; i++) {
1041     for (CeedInt j = 0; j < P_1d; j++) {
1042       CeedScalar sum = 0.0;
1043 
1044       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
1045       laplace[i + j * P_1d] = sum;
1046     }
1047   }
1048   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
1049   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
1050   return CEED_ERROR_SUCCESS;
1051 }
1052 CeedPragmaOptimizeOn
1053 
1054 /// @}
1055 
1056 /// ----------------------------------------------------------------------------
1057 /// CeedOperator Backend API
1058 /// ----------------------------------------------------------------------------
1059 /// @addtogroup CeedOperatorBackend
1060 /// @{
1061 
1062 /**
1063   @brief Create point block restriction for active operator field
1064 
1065   @param[in]  rstr             Original CeedElemRestriction for active field
1066   @param[out] point_block_rstr Address of the variable where the newly created CeedElemRestriction will be stored
1067 
1068   @return An error code: 0 - success, otherwise - failure
1069 
1070   @ref Backend
1071 **/
1072 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) {
1073   Ceed           ceed;
1074   CeedInt        num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets;
1075   CeedSize       l_size;
1076   const CeedInt *offsets;
1077 
1078   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
1079   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
1080 
1081   // Expand offsets
1082   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
1083   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
1084   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
1085   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
1086   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
1087   shift = num_comp;
1088   if (comp_stride != 1) shift *= num_comp;
1089   CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets));
1090   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
1091     point_block_offsets[i] = offsets[i] * shift;
1092   }
1093 
1094   // Create new restriction
1095   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
1096                                      point_block_offsets, point_block_rstr));
1097 
1098   // Cleanup
1099   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
1100   return CEED_ERROR_SUCCESS;
1101 }
1102 
1103 /**
1104   @brief Create object holding CeedQFunction assembly data for CeedOperator
1105 
1106   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
1107   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
1108 
1109   @return An error code: 0 - success, otherwise - failure
1110 
1111   @ref Backend
1112 **/
1113 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
1114   CeedCall(CeedCalloc(1, data));
1115   (*data)->ref_count = 1;
1116   (*data)->ceed      = ceed;
1117   CeedCall(CeedReference(ceed));
1118   return CEED_ERROR_SUCCESS;
1119 }
1120 
1121 /**
1122   @brief Increment the reference counter for a CeedQFunctionAssemblyData
1123 
1124   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
1125 
1126   @return An error code: 0 - success, otherwise - failure
1127 
1128   @ref Backend
1129 **/
1130 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1131   data->ref_count++;
1132   return CEED_ERROR_SUCCESS;
1133 }
1134 
1135 /**
1136   @brief Set re-use of CeedQFunctionAssemblyData
1137 
1138   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
1139   @param[in]     reuse_data Boolean flag indicating data re-use
1140 
1141   @return An error code: 0 - success, otherwise - failure
1142 
1143   @ref Backend
1144 **/
1145 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
1146   data->reuse_data        = reuse_data;
1147   data->needs_data_update = true;
1148   return CEED_ERROR_SUCCESS;
1149 }
1150 
1151 /**
1152   @brief Mark QFunctionAssemblyData as stale
1153 
1154   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
1155   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
1156 
1157   @return An error code: 0 - success, otherwise - failure
1158 
1159   @ref Backend
1160 **/
1161 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
1162   data->needs_data_update = needs_data_update;
1163   return CEED_ERROR_SUCCESS;
1164 }
1165 
1166 /**
1167   @brief Determine if QFunctionAssemblyData needs update
1168 
1169   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
1170   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1171 
1172   @return An error code: 0 - success, otherwise - failure
1173 
1174   @ref Backend
1175 **/
1176 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1177   *is_update_needed = !data->reuse_data || data->needs_data_update;
1178   return CEED_ERROR_SUCCESS;
1179 }
1180 
1181 /**
1182   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1183 
1184   Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1185 
1186   Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
1187         CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
1188         CeedQFunctionAssemblyData.
1189 
1190   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1191   @param[in,out] data_copy Variable to store copied reference
1192 
1193   @return An error code: 0 - success, otherwise - failure
1194 
1195   @ref Backend
1196 **/
1197 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1198   CeedCall(CeedQFunctionAssemblyDataReference(data));
1199   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1200   *data_copy = data;
1201   return CEED_ERROR_SUCCESS;
1202 }
1203 
1204 /**
1205   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1206 
1207   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1208   @param[out] is_setup Boolean flag for setup status
1209 
1210   @return An error code: 0 - success, otherwise - failure
1211 
1212   @ref Backend
1213 **/
1214 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1215   *is_setup = data->is_setup;
1216   return CEED_ERROR_SUCCESS;
1217 }
1218 
1219 /**
1220   @brief Set internal objects for CeedQFunctionAssemblyData
1221 
1222   @param[in,out] data CeedQFunctionAssemblyData to set objects
1223   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1224   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1225 
1226   @return An error code: 0 - success, otherwise - failure
1227 
1228   @ref Backend
1229 **/
1230 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1231   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1232   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1233 
1234   data->is_setup = true;
1235   return CEED_ERROR_SUCCESS;
1236 }
1237 
1238 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1239   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1240 
1241   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1242   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1243   return CEED_ERROR_SUCCESS;
1244 }
1245 
1246 /**
1247   @brief Destroy CeedQFunctionAssemblyData
1248 
1249   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1250 
1251   @return An error code: 0 - success, otherwise - failure
1252 
1253   @ref Backend
1254 **/
1255 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1256   if (!*data || --(*data)->ref_count > 0) {
1257     *data = NULL;
1258     return CEED_ERROR_SUCCESS;
1259   }
1260   CeedCall(CeedDestroy(&(*data)->ceed));
1261   CeedCall(CeedVectorDestroy(&(*data)->vec));
1262   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1263 
1264   CeedCall(CeedFree(data));
1265   return CEED_ERROR_SUCCESS;
1266 }
1267 
1268 /**
1269   @brief Get CeedOperatorAssemblyData
1270 
1271   @param[in]  op   CeedOperator to assemble
1272   @param[out] data CeedQFunctionAssemblyData
1273 
1274   @return An error code: 0 - success, otherwise - failure
1275 
1276   @ref Backend
1277 **/
1278 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1279   if (!op->op_assembled) {
1280     CeedOperatorAssemblyData data;
1281 
1282     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1283     op->op_assembled = data;
1284   }
1285   *data = op->op_assembled;
1286   return CEED_ERROR_SUCCESS;
1287 }
1288 
1289 /**
1290   @brief Create object holding CeedOperator assembly data.
1291 
1292   The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1293   An array with references to the corresponding active CeedElemRestrictions is also stored.
1294   For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1295   The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1296 CeedEvalMode.
1297   The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1298   Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1299 
1300   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1301   @param[in]  op   CeedOperator to be assembled
1302   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1303 
1304   @return An error code: 0 - success, otherwise - failure
1305 
1306   @ref Backend
1307 **/
1308 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1309   CeedInt             num_active_bases_in = 0, num_active_bases_out = 0, offset = 0;
1310   CeedInt             num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL;
1311   CeedSize          **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1312   CeedEvalMode      **eval_modes_in = NULL, **eval_modes_out = NULL;
1313   CeedQFunctionField *qf_fields;
1314   CeedQFunction       qf;
1315   CeedOperatorField  *op_fields;
1316   bool                is_composite;
1317 
1318   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1319   CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators.");
1320 
1321   // Allocate
1322   CeedCall(CeedCalloc(1, data));
1323   (*data)->ceed = ceed;
1324   CeedCall(CeedReference(ceed));
1325 
1326   // Build OperatorAssembly data
1327   CeedCall(CeedOperatorGetQFunction(op, &qf));
1328   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1329   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1330 
1331   // Determine active input basis
1332   for (CeedInt i = 0; i < num_input_fields; i++) {
1333     CeedVector vec;
1334 
1335     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1336     if (vec == CEED_VECTOR_ACTIVE) {
1337       CeedInt      index = -1, num_comp, q_comp;
1338       CeedEvalMode eval_mode;
1339       CeedBasis    basis_in = NULL;
1340 
1341       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1342       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1343       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1344       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1345       for (CeedInt i = 0; i < num_active_bases_in; i++) {
1346         if ((*data)->active_bases_in[i] == basis_in) index = i;
1347       }
1348       if (index == -1) {
1349         CeedElemRestriction elem_rstr_in;
1350 
1351         index = num_active_bases_in;
1352         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in));
1353         (*data)->active_bases_in[num_active_bases_in] = NULL;
1354         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in]));
1355         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in));
1356         (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL;
1357         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1358         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in]));
1359         CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in));
1360         num_eval_modes_in[index] = 0;
1361         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in));
1362         eval_modes_in[index] = NULL;
1363         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in));
1364         eval_mode_offsets_in[index] = NULL;
1365         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in));
1366         (*data)->assembled_bases_in[index] = NULL;
1367         num_active_bases_in++;
1368       }
1369       if (eval_mode != CEED_EVAL_WEIGHT) {
1370         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1371         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1372         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1373         for (CeedInt d = 0; d < q_comp; d++) {
1374           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1375           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1376           offset += num_comp;
1377         }
1378         num_eval_modes_in[index] += q_comp;
1379       }
1380     }
1381   }
1382 
1383   // Determine active output basis
1384   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1385   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1386   offset = 0;
1387   for (CeedInt i = 0; i < num_output_fields; i++) {
1388     CeedVector vec;
1389 
1390     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1391     if (vec == CEED_VECTOR_ACTIVE) {
1392       CeedInt      index = -1, num_comp, q_comp;
1393       CeedEvalMode eval_mode;
1394       CeedBasis    basis_out = NULL;
1395 
1396       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1397       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1398       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1399       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1400       for (CeedInt i = 0; i < num_active_bases_out; i++) {
1401         if ((*data)->active_bases_out[i] == basis_out) index = i;
1402       }
1403       if (index == -1) {
1404         CeedElemRestriction elem_rstr_out;
1405 
1406         index = num_active_bases_out;
1407         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out));
1408         (*data)->active_bases_out[num_active_bases_out] = NULL;
1409         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out]));
1410         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out));
1411         (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL;
1412         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1413         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out]));
1414         CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out));
1415         num_eval_modes_out[index] = 0;
1416         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out));
1417         eval_modes_out[index] = NULL;
1418         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out));
1419         eval_mode_offsets_out[index] = NULL;
1420         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out));
1421         (*data)->assembled_bases_out[index] = NULL;
1422         num_active_bases_out++;
1423       }
1424       if (eval_mode != CEED_EVAL_WEIGHT) {
1425         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1426         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1427         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1428         for (CeedInt d = 0; d < q_comp; d++) {
1429           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1430           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1431           offset += num_comp;
1432         }
1433         num_eval_modes_out[index] += q_comp;
1434       }
1435     }
1436   }
1437   (*data)->num_active_bases_in   = num_active_bases_in;
1438   (*data)->num_eval_modes_in     = num_eval_modes_in;
1439   (*data)->eval_modes_in         = eval_modes_in;
1440   (*data)->eval_mode_offsets_in  = eval_mode_offsets_in;
1441   (*data)->num_active_bases_out  = num_active_bases_out;
1442   (*data)->num_eval_modes_out    = num_eval_modes_out;
1443   (*data)->eval_modes_out        = eval_modes_out;
1444   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1445   (*data)->num_output_components = offset;
1446   return CEED_ERROR_SUCCESS;
1447 }
1448 
1449 /**
1450   @brief Get CeedOperator CeedEvalModes for assembly.
1451 
1452   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1453 
1454   @param[in]  data                  CeedOperatorAssemblyData
1455   @param[out] num_active_bases_in   Total number of active bases for input
1456   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1457                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1458   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1459   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1460   @param[out] num_active_bases_out  Total number of active bases for output
1461   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1462   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1463   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1464   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1465                                       including contributions of all active bases
1466 
1467   @return An error code: 0 - success, otherwise - failure
1468 
1469   @ref Backend
1470 **/
1471 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in,
1472                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out,
1473                                          CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out,
1474                                          CeedSize *num_output_components) {
1475   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1476   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1477   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1478   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1479   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1480   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1481   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1482   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1483   if (num_output_components) *num_output_components = data->num_output_components;
1484   return CEED_ERROR_SUCCESS;
1485 }
1486 
1487 /**
1488   @brief Get CeedOperator CeedBasis data for assembly.
1489 
1490   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1491 
1492   @param[in]  data                 CeedOperatorAssemblyData
1493   @param[out] num_active_bases_in  Number of active input bases, or NULL
1494   @param[out] active_bases_in      Pointer to hold active input CeedBasis, or NULL
1495   @param[out] assembled_bases_in   Pointer to hold assembled active input B, or NULL
1496   @param[out] num_active_bases_out Number of active output bases, or NULL
1497   @param[out] active_bases_out     Pointer to hold active output CeedBasis, or NULL
1498   @param[out] assembled_bases_out  Pointer to hold assembled active output B, or NULL
1499 
1500   @return An error code: 0 - success, otherwise - failure
1501 
1502   @ref Backend
1503 **/
1504 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in,
1505                                      const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out,
1506                                      const CeedScalar ***assembled_bases_out) {
1507   // Assemble B_in, B_out if needed
1508   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1509     CeedInt num_qpts;
1510 
1511     if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts));
1512     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts));
1513     for (CeedInt b = 0; b < data->num_active_bases_in; b++) {
1514       bool        has_eval_none = false;
1515       CeedInt     num_nodes;
1516       CeedScalar *B_in = NULL, *identity = NULL;
1517 
1518       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes));
1519       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1520 
1521       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1522         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1523       }
1524       if (has_eval_none) {
1525         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1526         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1527           identity[i * num_nodes + i] = 1.0;
1528         }
1529       }
1530 
1531       for (CeedInt q = 0; q < num_qpts; q++) {
1532         for (CeedInt n = 0; n < num_nodes; n++) {
1533           CeedInt      d_in              = 0, q_comp_in;
1534           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1535 
1536           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1537             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1538             const CeedScalar *B  = NULL;
1539 
1540             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B));
1541             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in));
1542             if (q_comp_in > 1) {
1543               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1544               else B = &B[(++d_in) * num_qpts * num_nodes];
1545             }
1546             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1547             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1548           }
1549         }
1550       }
1551       if (identity) CeedCall(CeedFree(&identity));
1552       data->assembled_bases_in[b] = B_in;
1553     }
1554   }
1555 
1556   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1557     CeedInt num_qpts;
1558 
1559     if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts));
1560     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts));
1561     for (CeedInt b = 0; b < data->num_active_bases_out; b++) {
1562       bool        has_eval_none = false;
1563       CeedInt     num_nodes;
1564       CeedScalar *B_out = NULL, *identity = NULL;
1565 
1566       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes));
1567       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1568 
1569       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1570         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1571       }
1572       if (has_eval_none) {
1573         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1574         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1575           identity[i * num_nodes + i] = 1.0;
1576         }
1577       }
1578 
1579       for (CeedInt q = 0; q < num_qpts; q++) {
1580         for (CeedInt n = 0; n < num_nodes; n++) {
1581           CeedInt      d_out              = 0, q_comp_out;
1582           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1583 
1584           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1585             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1586             const CeedScalar *B  = NULL;
1587 
1588             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B));
1589             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out));
1590             if (q_comp_out > 1) {
1591               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1592               else B = &B[(++d_out) * num_qpts * num_nodes];
1593             }
1594             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1595             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1596           }
1597         }
1598       }
1599       if (identity) CeedCall(CeedFree(&identity));
1600       data->assembled_bases_out[b] = B_out;
1601     }
1602   }
1603 
1604   // Pass out assembled data
1605   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1606   if (active_bases_in) *active_bases_in = data->active_bases_in;
1607   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1608   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1609   if (active_bases_out) *active_bases_out = data->active_bases_out;
1610   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1611   return CEED_ERROR_SUCCESS;
1612 }
1613 
1614 /**
1615   @brief Get CeedOperator CeedBasis data for assembly.
1616 
1617   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1618 
1619   @param[in]  data                      CeedOperatorAssemblyData
1620   @param[out] num_active_elem_rstrs_in  Number of active input element restrictions, or NULL
1621   @param[out] active_elem_rstrs_in      Pointer to hold active input CeedElemRestrictions, or NULL
1622   @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or NULL
1623   @param[out] active_elem_rstrs_out     Pointer to hold active output CeedElemRestrictions, or NULL
1624 
1625   @return An error code: 0 - success, otherwise - failure
1626 
1627   @ref Backend
1628 **/
1629 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in,
1630                                                 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out,
1631                                                 CeedElemRestriction **active_elem_rstrs_out) {
1632   if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in;
1633   if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in;
1634   if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out;
1635   if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out;
1636   return CEED_ERROR_SUCCESS;
1637 }
1638 
1639 /**
1640   @brief Destroy CeedOperatorAssemblyData
1641 
1642   @param[in,out] data CeedOperatorAssemblyData to destroy
1643 
1644   @return An error code: 0 - success, otherwise - failure
1645 
1646   @ref Backend
1647 **/
1648 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1649   if (!*data) {
1650     *data = NULL;
1651     return CEED_ERROR_SUCCESS;
1652   }
1653   CeedCall(CeedDestroy(&(*data)->ceed));
1654   for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) {
1655     CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b]));
1656     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b]));
1657     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1658     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1659     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1660   }
1661   for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) {
1662     CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b]));
1663     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b]));
1664     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1665     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1666     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1667   }
1668   CeedCall(CeedFree(&(*data)->active_bases_in));
1669   CeedCall(CeedFree(&(*data)->active_bases_out));
1670   CeedCall(CeedFree(&(*data)->active_elem_rstrs_in));
1671   CeedCall(CeedFree(&(*data)->active_elem_rstrs_out));
1672   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1673   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1674   CeedCall(CeedFree(&(*data)->eval_modes_in));
1675   CeedCall(CeedFree(&(*data)->eval_modes_out));
1676   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1677   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1678   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1679   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1680 
1681   CeedCall(CeedFree(data));
1682   return CEED_ERROR_SUCCESS;
1683 }
1684 
1685 /// @}
1686 
1687 /// ----------------------------------------------------------------------------
1688 /// CeedOperator Public API
1689 /// ----------------------------------------------------------------------------
1690 /// @addtogroup CeedOperatorUser
1691 /// @{
1692 
1693 /**
1694   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1695 
1696   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1697   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1698 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1699 
1700   Inputs and outputs are in the order provided by the user when adding CeedOperator fields.
1701   For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction
1702 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1703 
1704   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1705 
1706   @param[in]  op        CeedOperator to assemble CeedQFunction
1707   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1708   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1709   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1710 
1711   @return An error code: 0 - success, otherwise - failure
1712 
1713   @ref User
1714 **/
1715 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1716   CeedCall(CeedOperatorCheckReady(op));
1717 
1718   if (op->LinearAssembleQFunction) {
1719     // Backend version
1720     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1721   } else {
1722     // Operator fallback
1723     CeedOperator op_fallback;
1724 
1725     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1726     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1727     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1728   }
1729   return CEED_ERROR_SUCCESS;
1730 }
1731 
1732 /**
1733   @brief Assemble CeedQFunction and store result internally.
1734 
1735   Return copied references of stored data to the caller.
1736   Caller is responsible for ownership and destruction of the copied references.
1737   See also @ref CeedOperatorLinearAssembleQFunction
1738 
1739   Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers.
1740         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
1741 
1742   @param[in]  op        CeedOperator to assemble CeedQFunction
1743   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1744   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1745   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1746 
1747   @return An error code: 0 - success, otherwise - failure
1748 
1749   @ref User
1750 **/
1751 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1752   int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL;
1753   CeedOperator op_assemble                                                                           = NULL;
1754   CeedOperator op_fallback_parent                                                                    = NULL;
1755 
1756   CeedCall(CeedOperatorCheckReady(op));
1757 
1758   // Determine if fallback parent or operator has implementation
1759   CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent));
1760   if (op_fallback_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) {
1761     // -- Backend version for op fallback parent is faster, if it exists
1762     LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate;
1763     op_assemble                   = op_fallback_parent;
1764   } else if (op->LinearAssembleQFunctionUpdate) {
1765     // -- Backend version for op
1766     LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate;
1767     op_assemble                   = op;
1768   }
1769 
1770   // Assemble QFunction
1771   if (LinearAssembleQFunctionUpdate) {
1772     // Backend or fallback parent version
1773     bool                qf_assembled_is_setup;
1774     CeedVector          assembled_vec  = NULL;
1775     CeedElemRestriction assembled_rstr = NULL;
1776 
1777     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1778     if (qf_assembled_is_setup) {
1779       bool update_needed;
1780 
1781       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1782       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1783       if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request));
1784     } else {
1785       CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request));
1786       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1787     }
1788     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1789 
1790     // Copy reference from internally held copy
1791     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1792     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1793     CeedCall(CeedVectorDestroy(&assembled_vec));
1794     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1795   } else {
1796     // Operator fallback
1797     CeedOperator op_fallback;
1798 
1799     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1800     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1801     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1802   }
1803   return CEED_ERROR_SUCCESS;
1804 }
1805 
1806 /**
1807   @brief Assemble the diagonal of a square linear CeedOperator
1808 
1809   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1810 
1811   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1812 
1813   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1814 
1815   @param[in]  op        CeedOperator to assemble CeedQFunction
1816   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1817   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1818 
1819   @return An error code: 0 - success, otherwise - failure
1820 
1821   @ref User
1822 **/
1823 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1824   bool     is_composite;
1825   CeedSize input_size = 0, output_size = 0;
1826 
1827   CeedCall(CeedOperatorCheckReady(op));
1828   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1829 
1830   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1831   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1832 
1833   // Early exit for empty operator
1834   if (!is_composite) {
1835     CeedInt num_elem = 0;
1836 
1837     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1838     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1839   }
1840 
1841   if (op->LinearAssembleDiagonal) {
1842     // Backend version
1843     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1844     return CEED_ERROR_SUCCESS;
1845   } else if (op->LinearAssembleAddDiagonal) {
1846     // Backend version with zeroing first
1847     CeedCall(CeedVectorSetValue(assembled, 0.0));
1848     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1849     return CEED_ERROR_SUCCESS;
1850   } else {
1851     // Operator fallback
1852     CeedOperator op_fallback;
1853 
1854     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1855     if (op_fallback) {
1856       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1857       return CEED_ERROR_SUCCESS;
1858     }
1859   }
1860   // Default interface implementation
1861   CeedCall(CeedVectorSetValue(assembled, 0.0));
1862   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1863   return CEED_ERROR_SUCCESS;
1864 }
1865 
1866 /**
1867   @brief Assemble the diagonal of a square linear CeedOperator
1868 
1869   This sums into a CeedVector the diagonal of a linear CeedOperator.
1870 
1871   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1872 
1873   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1874 
1875   @param[in]  op        CeedOperator to assemble CeedQFunction
1876   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1877   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1878 
1879   @return An error code: 0 - success, otherwise - failure
1880 
1881   @ref User
1882 **/
1883 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1884   bool     is_composite;
1885   CeedSize input_size = 0, output_size = 0;
1886 
1887   CeedCall(CeedOperatorCheckReady(op));
1888   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1889 
1890   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1891   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1892 
1893   // Early exit for empty operator
1894   if (!is_composite) {
1895     CeedInt num_elem = 0;
1896 
1897     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1898     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1899   }
1900 
1901   if (op->LinearAssembleAddDiagonal) {
1902     // Backend version
1903     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1904     return CEED_ERROR_SUCCESS;
1905   } else {
1906     // Operator fallback
1907     CeedOperator op_fallback;
1908 
1909     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1910     if (op_fallback) {
1911       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1912       return CEED_ERROR_SUCCESS;
1913     }
1914   }
1915   // Default interface implementation
1916   if (is_composite) {
1917     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1918   } else {
1919     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1920   }
1921   return CEED_ERROR_SUCCESS;
1922 }
1923 
1924 /**
1925    @brief Fully assemble the point-block diagonal pattern of a linear operator.
1926 
1927    Expected to be used in conjunction with CeedOperatorLinearAssemblePointBlockDiagonal().
1928 
1929    The assembly routines use coordinate format, with `num_entries` tuples of the form (i, j, value) which indicate that value should be added to the
1930 matrix in entry (i, j).
1931   Note that the (i, j) pairs are unique.
1932   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in
1933 the same ordering.
1934 
1935    This will generally be slow unless your operator is low-order.
1936 
1937    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1938 
1939    @param[in]  op          CeedOperator to assemble
1940    @param[out] num_entries Number of entries in coordinate nonzero pattern
1941    @param[out] rows        Row number for each entry
1942    @param[out] cols        Column number for each entry
1943 
1944    @ref User
1945 **/
1946 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1947   Ceed          ceed;
1948   bool          is_composite;
1949   CeedInt       num_active_components, num_sub_operators;
1950   CeedOperator *sub_operators;
1951 
1952   CeedCall(CeedOperatorGetCeed(op, &ceed));
1953   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1954 
1955   CeedSize input_size = 0, output_size = 0;
1956   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1957   CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1958 
1959   if (is_composite) {
1960     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_operators));
1961     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1962   } else {
1963     sub_operators     = &op;
1964     num_sub_operators = 1;
1965   }
1966 
1967   // Verify operator can be assembled correctly
1968   {
1969     CeedOperatorAssemblyData data;
1970     CeedInt                  num_active_elem_rstrs, comp_stride;
1971     CeedElemRestriction     *active_elem_rstrs;
1972 
1973     // Get initial values to check against
1974     CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data));
1975     CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
1976     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride));
1977     CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components));
1978 
1979     // Verify that all active element restrictions have same component stride and number of components
1980     for (CeedInt k = 0; k < num_sub_operators; k++) {
1981       CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data));
1982       CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
1983       for (CeedInt i = 0; i < num_active_elem_rstrs; i++) {
1984         CeedInt comp_stride_sub, num_active_components_sub;
1985 
1986         CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub));
1987         CeedCheck(comp_stride == comp_stride_sub, ceed, CEED_ERROR_DIMENSION,
1988                   "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub);
1989         CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub));
1990         CeedCheck(num_active_components == num_active_components_sub, ceed, CEED_ERROR_INCOMPATIBLE,
1991                   "All suboperators must have the same number of output components");
1992       }
1993     }
1994   }
1995   *num_entries = input_size * num_active_components;
1996   CeedCall(CeedCalloc(*num_entries, rows));
1997   CeedCall(CeedCalloc(*num_entries, cols));
1998 
1999   for (CeedInt o = 0; o < num_sub_operators; o++) {
2000     CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr;
2001     CeedInt             comp_stride, num_elem, elem_size;
2002     const CeedInt      *offsets, *point_block_offsets;
2003 
2004     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr));
2005     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride));
2006     CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem));
2007     CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size));
2008     CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets));
2009 
2010     CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr));
2011     CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets));
2012 
2013     for (CeedSize i = 0; i < num_elem * elem_size; i++) {
2014       for (CeedInt c_out = 0; c_out < num_active_components; c_out++) {
2015         for (CeedInt c_in = 0; c_in < num_active_components; c_in++) {
2016           (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride;
2017           (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride;
2018         }
2019       }
2020     }
2021 
2022     CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets));
2023     CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets));
2024     CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr));
2025   }
2026   return CEED_ERROR_SUCCESS;
2027 }
2028 
2029 /**
2030   @brief Assemble the point block diagonal of a square linear CeedOperator
2031 
2032   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
2033 
2034   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
2035 
2036   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2037 
2038   @param[in]  op        CeedOperator to assemble CeedQFunction
2039   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
2040 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
2041 component in].
2042   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2043 
2044   @return An error code: 0 - success, otherwise - failure
2045 
2046   @ref User
2047 **/
2048 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2049   bool     is_composite;
2050   CeedSize input_size = 0, output_size = 0;
2051 
2052   CeedCall(CeedOperatorCheckReady(op));
2053   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2054 
2055   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2056   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
2057 
2058   // Early exit for empty operator
2059   if (!is_composite) {
2060     CeedInt num_elem = 0;
2061 
2062     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2063     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2064   }
2065 
2066   if (op->LinearAssemblePointBlockDiagonal) {
2067     // Backend version
2068     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
2069     return CEED_ERROR_SUCCESS;
2070   } else if (op->LinearAssembleAddPointBlockDiagonal) {
2071     // Backend version with zeroing first
2072     CeedCall(CeedVectorSetValue(assembled, 0.0));
2073     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2074     return CEED_ERROR_SUCCESS;
2075   } else {
2076     // Operator fallback
2077     CeedOperator op_fallback;
2078 
2079     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2080     if (op_fallback) {
2081       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
2082       return CEED_ERROR_SUCCESS;
2083     }
2084   }
2085   // Default interface implementation
2086   CeedCall(CeedVectorSetValue(assembled, 0.0));
2087   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2088   return CEED_ERROR_SUCCESS;
2089 }
2090 
2091 /**
2092   @brief Assemble the point block diagonal of a square linear CeedOperator
2093 
2094   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
2095 
2096   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
2097 
2098   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2099 
2100   @param[in]  op        CeedOperator to assemble CeedQFunction
2101   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
2102 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
2103 component in].
2104   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2105 
2106   @return An error code: 0 - success, otherwise - failure
2107 
2108   @ref User
2109 **/
2110 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2111   bool     is_composite;
2112   CeedSize input_size = 0, output_size = 0;
2113 
2114   CeedCall(CeedOperatorCheckReady(op));
2115   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2116 
2117   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2118   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
2119 
2120   // Early exit for empty operator
2121   if (!is_composite) {
2122     CeedInt num_elem = 0;
2123 
2124     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2125     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2126   }
2127 
2128   if (op->LinearAssembleAddPointBlockDiagonal) {
2129     // Backend version
2130     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
2131     return CEED_ERROR_SUCCESS;
2132   } else {
2133     // Operator fallback
2134     CeedOperator op_fallback;
2135 
2136     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2137     if (op_fallback) {
2138       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
2139       return CEED_ERROR_SUCCESS;
2140     }
2141   }
2142   // Default interface implementation
2143   if (is_composite) {
2144     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
2145   } else {
2146     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
2147   }
2148   return CEED_ERROR_SUCCESS;
2149 }
2150 
2151 /**
2152    @brief Fully assemble the nonzero pattern of a linear operator.
2153 
2154    Expected to be used in conjunction with CeedOperatorLinearAssemble().
2155 
2156    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
2157 matrix in entry (i, j).
2158   Note that the (i, j) pairs are not unique and may repeat.
2159   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
2160 
2161    This will generally be slow unless your operator is low-order.
2162 
2163    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2164 
2165    @param[in]  op          CeedOperator to assemble
2166    @param[out] num_entries Number of entries in coordinate nonzero pattern
2167    @param[out] rows        Row number for each entry
2168    @param[out] cols        Column number for each entry
2169 
2170    @ref User
2171 **/
2172 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2173   bool          is_composite;
2174   CeedInt       num_suboperators, offset = 0;
2175   CeedSize      single_entries;
2176   CeedOperator *sub_operators;
2177 
2178   CeedCall(CeedOperatorCheckReady(op));
2179   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2180 
2181   if (op->LinearAssembleSymbolic) {
2182     // Backend version
2183     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
2184     return CEED_ERROR_SUCCESS;
2185   } else {
2186     // Operator fallback
2187     CeedOperator op_fallback;
2188 
2189     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2190     if (op_fallback) {
2191       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
2192       return CEED_ERROR_SUCCESS;
2193     }
2194   }
2195 
2196   // Default interface implementation
2197 
2198   // Count entries and allocate rows, cols arrays
2199   *num_entries = 0;
2200   if (is_composite) {
2201     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2202     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2203     for (CeedInt k = 0; k < num_suboperators; ++k) {
2204       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2205       *num_entries += single_entries;
2206     }
2207   } else {
2208     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
2209     *num_entries += single_entries;
2210   }
2211   CeedCall(CeedCalloc(*num_entries, rows));
2212   CeedCall(CeedCalloc(*num_entries, cols));
2213 
2214   // Assemble nonzero locations
2215   if (is_composite) {
2216     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2217     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2218     for (CeedInt k = 0; k < num_suboperators; ++k) {
2219       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
2220       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2221       offset += single_entries;
2222     }
2223   } else {
2224     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
2225   }
2226   return CEED_ERROR_SUCCESS;
2227 }
2228 
2229 /**
2230    @brief Fully assemble the nonzero entries of a linear operator.
2231 
2232    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
2233 
2234    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
2235 matrix in entry (i, j).
2236   Note that the (i, j) pairs are not unique and may repeat.
2237   This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
2238 
2239    This will generally be slow unless your operator is low-order.
2240 
2241    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2242 
2243    @param[in]  op     CeedOperator to assemble
2244    @param[out] values Values to assemble into matrix
2245 
2246    @ref User
2247 **/
2248 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
2249   bool          is_composite;
2250   CeedInt       num_suboperators, offset = 0;
2251   CeedSize      single_entries = 0;
2252   CeedOperator *sub_operators;
2253 
2254   CeedCall(CeedOperatorCheckReady(op));
2255   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2256 
2257   // Early exit for empty operator
2258   if (!is_composite) {
2259     CeedInt num_elem = 0;
2260 
2261     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2262     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2263   }
2264 
2265   if (op->LinearAssemble) {
2266     // Backend version
2267     CeedCall(op->LinearAssemble(op, values));
2268     return CEED_ERROR_SUCCESS;
2269   } else {
2270     // Operator fallback
2271     CeedOperator op_fallback;
2272 
2273     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2274     if (op_fallback) {
2275       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2276       return CEED_ERROR_SUCCESS;
2277     }
2278   }
2279 
2280   // Default interface implementation
2281   CeedCall(CeedVectorSetValue(values, 0.0));
2282   if (is_composite) {
2283     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2284     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2285     for (CeedInt k = 0; k < num_suboperators; k++) {
2286       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2287       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2288       offset += single_entries;
2289     }
2290   } else {
2291     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2292   }
2293   return CEED_ERROR_SUCCESS;
2294 }
2295 
2296 /**
2297   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2298 
2299   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2300 
2301   @param[in]  op               Composite CeedOperator
2302   @param[in]  num_skip_indices Number of suboperators to skip
2303   @param[in]  skip_indices     Array of indices of suboperators to skip
2304   @param[out] mult             Vector to store multiplicity (of size l_size)
2305 
2306   @return An error code: 0 - success, otherwise - failure
2307 
2308   @ref User
2309 **/
2310 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2311   Ceed                ceed;
2312   CeedInt             num_suboperators;
2313   CeedSize            l_vec_len;
2314   CeedScalar         *mult_array;
2315   CeedVector          ones_l_vec;
2316   CeedElemRestriction elem_rstr, mult_elem_rstr;
2317   CeedOperator       *sub_operators;
2318 
2319   CeedCall(CeedOperatorCheckReady(op));
2320 
2321   CeedCall(CeedOperatorGetCeed(op, &ceed));
2322 
2323   // Zero mult vector
2324   CeedCall(CeedVectorSetValue(mult, 0.0));
2325 
2326   // Get suboperators
2327   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2328   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2329   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2330 
2331   // Work vector
2332   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2333   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2334   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2335   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2336 
2337   // Compute multiplicity across suboperators
2338   for (CeedInt i = 0; i < num_suboperators; i++) {
2339     const CeedScalar *sub_mult_array;
2340     CeedVector        sub_mult_l_vec, ones_e_vec;
2341 
2342     // -- Check for suboperator to skip
2343     for (CeedInt j = 0; j < num_skip_indices; j++) {
2344       if (skip_indices[j] == i) continue;
2345     }
2346 
2347     // -- Sub operator multiplicity
2348     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2349     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr));
2350     CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2351     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2352     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2353     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2354     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2355     // ---- Flag every node present in the current suboperator
2356     for (CeedInt j = 0; j < l_vec_len; j++) {
2357       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2358     }
2359     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2360     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2361     CeedCall(CeedVectorDestroy(&ones_e_vec));
2362     CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr));
2363   }
2364   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2365   CeedCall(CeedVectorDestroy(&ones_l_vec));
2366   return CEED_ERROR_SUCCESS;
2367 }
2368 
2369 /**
2370   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2371 grid interpolation
2372 
2373   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2374 
2375   @param[in]  op_fine      Fine grid operator
2376   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2377   @param[in]  rstr_coarse  Coarse grid restriction
2378   @param[in]  basis_coarse Coarse grid active vector basis
2379   @param[out] op_coarse    Coarse grid operator
2380   @param[out] op_prolong   Coarse to fine operator, or NULL
2381   @param[out] op_restrict  Fine to coarse operator, or NULL
2382 
2383   @return An error code: 0 - success, otherwise - failure
2384 
2385   @ref User
2386 **/
2387 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2388                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2389   CeedBasis basis_c_to_f = NULL;
2390 
2391   CeedCall(CeedOperatorCheckReady(op_fine));
2392 
2393   // Build prolongation matrix, if required
2394   if (op_prolong || op_restrict) {
2395     CeedBasis basis_fine;
2396 
2397     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2398     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2399   }
2400 
2401   // Core code
2402   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2403   return CEED_ERROR_SUCCESS;
2404 }
2405 
2406 /**
2407   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2408 
2409   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2410 
2411   @param[in]  op_fine       Fine grid operator
2412   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2413   @param[in]  rstr_coarse   Coarse grid restriction
2414   @param[in]  basis_coarse  Coarse grid active vector basis
2415   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2416   @param[out] op_coarse     Coarse grid operator
2417   @param[out] op_prolong    Coarse to fine operator, or NULL
2418   @param[out] op_restrict   Fine to coarse operator, or NULL
2419 
2420   @return An error code: 0 - success, otherwise - failure
2421 
2422   @ref User
2423 **/
2424 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2425                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2426                                              CeedOperator *op_restrict) {
2427   Ceed      ceed;
2428   CeedInt   Q_f, Q_c;
2429   CeedBasis basis_fine, basis_c_to_f = NULL;
2430 
2431   CeedCall(CeedOperatorCheckReady(op_fine));
2432   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2433 
2434   // Check for compatible quadrature spaces
2435   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2436   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2437   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2438   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2439 
2440   // Create coarse to fine basis, if required
2441   if (op_prolong || op_restrict) {
2442     CeedInt     dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2443     CeedScalar *q_ref, *q_weight, *grad;
2444 
2445     // Check if interpolation matrix is provided
2446     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2447               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2448     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2449     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2450     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2451     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2452     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2453     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2454     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2455     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2456     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2457     CeedCall(CeedFree(&q_ref));
2458     CeedCall(CeedFree(&q_weight));
2459     CeedCall(CeedFree(&grad));
2460   }
2461 
2462   // Core code
2463   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2464   return CEED_ERROR_SUCCESS;
2465 }
2466 
2467 /**
2468   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2469 
2470   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2471 
2472   @param[in]  op_fine       Fine grid operator
2473   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2474   @param[in]  rstr_coarse   Coarse grid restriction
2475   @param[in]  basis_coarse  Coarse grid active vector basis
2476   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2477   @param[out] op_coarse     Coarse grid operator
2478   @param[out] op_prolong    Coarse to fine operator, or NULL
2479   @param[out] op_restrict   Fine to coarse operator, or NULL
2480 
2481   @return An error code: 0 - success, otherwise - failure
2482 
2483   @ref User
2484 **/
2485 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2486                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2487                                        CeedOperator *op_restrict) {
2488   Ceed      ceed;
2489   CeedInt   Q_f, Q_c;
2490   CeedBasis basis_fine, basis_c_to_f = NULL;
2491 
2492   CeedCall(CeedOperatorCheckReady(op_fine));
2493   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2494 
2495   // Check for compatible quadrature spaces
2496   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2497   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2498   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2499   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2500 
2501   // Coarse to fine basis
2502   if (op_prolong || op_restrict) {
2503     CeedInt          dim, num_comp, num_nodes_c, num_nodes_f;
2504     CeedScalar      *q_ref, *q_weight, *grad;
2505     CeedElemTopology topo;
2506 
2507     // Check if interpolation matrix is provided
2508     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2509               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2510     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2511     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2512     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2513     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2514     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2515     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2516     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2517     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2518     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2519     CeedCall(CeedFree(&q_ref));
2520     CeedCall(CeedFree(&q_weight));
2521     CeedCall(CeedFree(&grad));
2522   }
2523 
2524   // Core code
2525   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2526   return CEED_ERROR_SUCCESS;
2527 }
2528 
2529 /**
2530   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2531 
2532   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2533   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2534   The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2535 \hat S V\f$.
2536   The CeedOperator must be linear and non-composite.
2537   The associated CeedQFunction must therefore also be linear.
2538 
2539   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2540 
2541   @param[in]  op      CeedOperator to create element inverses
2542   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2543   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2544 
2545   @return An error code: 0 - success, otherwise - failure
2546 
2547   @ref User
2548 **/
2549 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2550   Ceed                 ceed, ceed_parent;
2551   bool                 interp = false, grad = false, is_tensor_basis = true;
2552   CeedInt              num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2553   CeedSize             l_size = 1;
2554   CeedScalar          *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg;
2555   const CeedScalar    *interp_1d, *grad_1d, *q_weight_1d;
2556   CeedVector           q_data;
2557   CeedElemRestriction  rstr  = NULL, rstr_qd_i;
2558   CeedBasis            basis = NULL, fdm_basis;
2559   CeedQFunctionContext ctx_fdm;
2560   CeedQFunctionField  *qf_fields;
2561   CeedQFunction        qf, qf_fdm;
2562   CeedOperatorField   *op_fields;
2563 
2564   CeedCall(CeedOperatorCheckReady(op));
2565 
2566   if (op->CreateFDMElementInverse) {
2567     // Backend version
2568     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2569     return CEED_ERROR_SUCCESS;
2570   } else {
2571     // Operator fallback
2572     CeedOperator op_fallback;
2573 
2574     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2575     if (op_fallback) {
2576       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2577       return CEED_ERROR_SUCCESS;
2578     }
2579   }
2580 
2581   // Default interface implementation
2582   CeedCall(CeedOperatorGetCeed(op, &ceed));
2583   CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent));
2584   CeedCall(CeedOperatorGetQFunction(op, &qf));
2585 
2586   // Determine active input basis
2587   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2588   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2589   for (CeedInt i = 0; i < num_input_fields; i++) {
2590     CeedVector vec;
2591 
2592     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2593     if (vec == CEED_VECTOR_ACTIVE) {
2594       CeedEvalMode eval_mode;
2595 
2596       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2597       interp = interp || eval_mode == CEED_EVAL_INTERP;
2598       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2599       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2600       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2601     }
2602   }
2603   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2604   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2605   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2606   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2607   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2608   CeedCall(CeedBasisGetDimension(basis, &dim));
2609   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2610   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2611   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2612 
2613   // Build and diagonalize 1D Mass and Laplacian
2614   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2615   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2616   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2617   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2618   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2619   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2620   CeedCall(CeedCalloc(P_1d, &lambda));
2621   // -- Build matrices
2622   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2623   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2624   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2625   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2626 
2627   // -- Diagonalize
2628   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2629   CeedCall(CeedFree(&mass));
2630   CeedCall(CeedFree(&laplace));
2631   for (CeedInt i = 0; i < P_1d; i++) {
2632     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2633   }
2634   CeedCall(CeedFree(&x));
2635 
2636   {
2637     CeedInt             layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2638     CeedScalar          max_norm = 0;
2639     const CeedScalar   *assembled_array, *q_weight_array;
2640     CeedVector          assembled = NULL, q_weight;
2641     CeedElemRestriction rstr_qf   = NULL;
2642 
2643     // Assemble QFunction
2644     CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2645     CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2646     CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2647     CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2648 
2649     // Calculate element averages
2650     CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2651     CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2652     CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2653     CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2654     CeedCall(CeedCalloc(num_elem, &elem_avg));
2655     const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2656 
2657     for (CeedInt e = 0; e < num_elem; e++) {
2658       CeedInt count = 0;
2659 
2660       for (CeedInt q = 0; q < num_qpts; q++) {
2661         for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2662           if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2663             elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2664             count++;
2665           }
2666         }
2667       }
2668       if (count) {
2669         elem_avg[e] /= count;
2670       } else {
2671         elem_avg[e] = 1.0;
2672       }
2673     }
2674     CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2675     CeedCall(CeedVectorDestroy(&assembled));
2676     CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2677     CeedCall(CeedVectorDestroy(&q_weight));
2678   }
2679 
2680   // Build FDM diagonal
2681   {
2682     CeedScalar *q_data_array, *fdm_diagonal;
2683 
2684     CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2685     const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2686     for (CeedInt c = 0; c < num_comp; c++) {
2687       for (CeedInt n = 0; n < num_nodes; n++) {
2688         if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2689         if (grad) {
2690           for (CeedInt d = 0; d < dim; d++) {
2691             CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2692             fdm_diagonal[c * num_nodes + n] += lambda[i];
2693           }
2694         }
2695         if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2696       }
2697     }
2698     CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2699     CeedCall(CeedVectorSetValue(q_data, 0.0));
2700     CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2701     for (CeedInt e = 0; e < num_elem; e++) {
2702       for (CeedInt c = 0; c < num_comp; c++) {
2703         for (CeedInt n = 0; n < num_nodes; n++)
2704           q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2705       }
2706     }
2707     CeedCall(CeedFree(&elem_avg));
2708     CeedCall(CeedFree(&fdm_diagonal));
2709     CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2710   }
2711 
2712   // Setup FDM operator
2713   // -- Basis
2714   {
2715     CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2716 
2717     CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2718     CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2719     CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2720     CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2721     CeedCall(CeedFree(&fdm_interp));
2722     CeedCall(CeedFree(&grad_dummy));
2723     CeedCall(CeedFree(&q_ref_dummy));
2724     CeedCall(CeedFree(&q_weight_dummy));
2725     CeedCall(CeedFree(&lambda));
2726   }
2727 
2728   // -- Restriction
2729   {
2730     CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp};
2731     CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2732   }
2733 
2734   // -- QFunction
2735   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2736   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2737   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2738   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2739   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2740 
2741   // -- QFunction context
2742   {
2743     CeedInt *num_comp_data;
2744 
2745     CeedCall(CeedCalloc(1, &num_comp_data));
2746     num_comp_data[0] = num_comp;
2747     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2748     CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2749   }
2750   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2751   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2752 
2753   // -- Operator
2754   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2755   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2756   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data));
2757   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2758 
2759   // Cleanup
2760   CeedCall(CeedVectorDestroy(&q_data));
2761   CeedCall(CeedBasisDestroy(&fdm_basis));
2762   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2763   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2764   return CEED_ERROR_SUCCESS;
2765 }
2766 
2767 /// @}
2768