xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision 506b1a0c535297a01950817d4491440b9ddb5287)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   char *source_path_with_name = NULL;
39 
40   // Check if NULL qf passed in
41   if (!qf) return CEED_ERROR_SUCCESS;
42 
43   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
44   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
45 
46   if (qf->source_path) {
47     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
48     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
49     memcpy(source_path_with_name, qf->source_path, path_len);
50     memcpy(&source_path_with_name[path_len], ":", 1);
51     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
52   } else {
53     CeedCall(CeedCalloc(1, &source_path_with_name));
54   }
55 
56   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
57   {
58     CeedQFunctionContext ctx;
59 
60     CeedCall(CeedQFunctionGetContext(qf, &ctx));
61     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
62   }
63   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
64     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
65   }
66   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
67     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
68   }
69   CeedCall(CeedFree(&source_path_with_name));
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   Ceed         ceed_fallback;
84   bool         is_composite;
85   CeedOperator op_fallback;
86 
87   // Check not already created
88   if (op->op_fallback) return CEED_ERROR_SUCCESS;
89 
90   // Fallback Ceed
91   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
92   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
93 
94   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
95   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
96 
97   // Clone Op
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114 
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
117     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
118     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
119     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
120       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
121                                     op->input_fields[i]->vec));
122     }
123     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
124       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
125                                     op->output_fields[i]->vec));
126     }
127     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
128     if (op_fallback->num_qpts == 0) CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     // Cleanup
130     CeedCall(CeedQFunctionDestroy(&qf_fallback));
131     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
133   }
134   CeedCall(CeedOperatorSetName(op_fallback, op->name));
135   CeedCall(CeedOperatorCheckReady(op_fallback));
136   // Note: No ref-counting here so we don't get caught in a reference loop.
137   //       The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback.
138   op->op_fallback                 = op_fallback;
139   op_fallback->op_fallback_parent = op;
140   return CEED_ERROR_SUCCESS;
141 }
142 
143 /**
144   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
145 
146   @param[in]  op          CeedOperator to retrieve fallback for
147   @param[out] op_fallback Fallback CeedOperator
148 
149   @return An error code: 0 - success, otherwise - failure
150 
151   @ref Developer
152 **/
153 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
154   // Create if needed
155   if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op));
156   if (op->op_fallback) {
157     bool is_debug;
158 
159     CeedCall(CeedIsDebug(op->ceed, &is_debug));
160     if (is_debug) {
161       Ceed        ceed, ceed_fallback;
162       const char *resource, *resource_fallback;
163 
164       CeedCall(CeedOperatorGetCeed(op, &ceed));
165       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
166       CeedCall(CeedGetResource(ceed, &resource));
167       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
168 
169       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
170       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
171                 op->op_fallback);
172     }
173   }
174   *op_fallback = op->op_fallback;
175   return CEED_ERROR_SUCCESS;
176 }
177 
178 /**
179   @brief Get the parent CeedOperator for a fallback CeedOperator
180 
181   @param[in]  op     CeedOperator context
182   @param[out] parent Variable to store parent CeedOperator context
183 
184   @return An error code: 0 - success, otherwise - failure
185 
186   @ref Developer
187 **/
188 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) {
189   *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL;
190   return CEED_ERROR_SUCCESS;
191 }
192 
193 /**
194   @brief Get the Ceed context of the parent CeedOperator for a fallback CeedOperator
195 
196   @param[in]  op     CeedOperator context
197   @param[out] parent Variable to store parent Ceed context
198 
199   @return An error code: 0 - success, otherwise - failure
200 
201   @ref Developer
202 **/
203 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) {
204   *parent = op->op_fallback_parent ? op->op_fallback_parent->ceed : op->ceed;
205   return CEED_ERROR_SUCCESS;
206 }
207 
208 /**
209   @brief Select correct basis matrix pointer based on CeedEvalMode
210 
211   @param[in]  basis     CeedBasis from which to get the basis matrix
212   @param[in]  eval_mode Current basis evaluation mode
213   @param[in]  identity  Pointer to identity matrix
214   @param[out] basis_ptr Basis pointer to set
215 
216   @ref Developer
217 **/
218 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
219   switch (eval_mode) {
220     case CEED_EVAL_NONE:
221       *basis_ptr = identity;
222       break;
223     case CEED_EVAL_INTERP:
224       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
225       break;
226     case CEED_EVAL_GRAD:
227       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
228       break;
229     case CEED_EVAL_DIV:
230       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
231       break;
232     case CEED_EVAL_CURL:
233       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
234       break;
235     case CEED_EVAL_WEIGHT:
236       break;  // Caught by QF Assembly
237   }
238   assert(*basis_ptr != NULL);
239   return CEED_ERROR_SUCCESS;
240 }
241 
242 /**
243   @brief Core logic for assembling operator diagonal or point block diagonal
244 
245   @param[in]  op             CeedOperator to assemble point block diagonal
246   @param[in]  request        Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
247   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
248   @param[out] assembled      CeedVector to store assembled diagonal
249 
250   @return An error code: 0 - success, otherwise - failure
251 
252   @ref Developer
253 **/
254 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_point_block, CeedVector assembled) {
255   Ceed ceed;
256   bool is_composite;
257 
258   CeedCall(CeedOperatorGetCeed(op, &ceed));
259   CeedCall(CeedOperatorIsComposite(op, &is_composite));
260   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
261 
262   // Assemble QFunction
263   CeedInt             layout_qf[3];
264   const CeedScalar   *assembled_qf_array;
265   CeedVector          assembled_qf        = NULL;
266   CeedElemRestriction assembled_elem_rstr = NULL;
267 
268   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
269   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf));
270   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
271   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
272 
273   // Get assembly data
274   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
275   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
276   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
277   CeedBasis               *active_bases_in, *active_bases_out;
278   CeedElemRestriction     *active_elem_rstrs_in, *active_elem_rstrs_out;
279   CeedOperatorAssemblyData data;
280 
281   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
282   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
283                                                 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out,
284                                                 &num_output_components));
285   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL));
286   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out));
287 
288   CeedCheck(num_active_bases_in == num_active_bases_out, ceed, CEED_ERROR_UNSUPPORTED,
289             "Cannot assemble operator diagonal with different numbers of input and output active bases");
290 
291   // Loop over all active bases
292   for (CeedInt b = 0; b < num_active_bases_in; b++) {
293     bool                has_eval_none = false;
294     CeedInt             num_elem, num_nodes, num_qpts, num_comp;
295     CeedScalar         *elem_diag_array, *identity = NULL;
296     CeedVector          elem_diag;
297     CeedElemRestriction diag_elem_rstr;
298 
299     CeedCheck(active_elem_rstrs_in[b] == active_elem_rstrs_out[b], ceed, CEED_ERROR_UNSUPPORTED,
300               "Cannot assemble operator diagonal with different input and output active element restrictions");
301 
302     // Assemble point block diagonal restriction, if needed
303     if (is_point_block) {
304       CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b], &diag_elem_rstr));
305     } else {
306       CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b], &diag_elem_rstr));
307     }
308 
309     // Create diagonal vector
310     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
311 
312     // Assemble element operator diagonals
313     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
314     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
315     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
316     CeedCall(CeedBasisGetNumNodes(active_bases_in[b], &num_nodes));
317     CeedCall(CeedBasisGetNumComponents(active_bases_in[b], &num_comp));
318     if (active_bases_in[b] == CEED_BASIS_NONE) num_qpts = num_nodes;
319     else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b], &num_qpts));
320 
321     if (active_bases_in[b] != active_bases_out[b]) {
322       CeedInt num_nodes_out, num_qpts_out, num_comp_out;
323 
324       CeedCall(CeedBasisGetNumNodes(active_bases_out[b], &num_nodes_out));
325       CeedCheck(num_nodes == num_nodes_out, ceed, CEED_ERROR_UNSUPPORTED, "Active input and output bases must have the same number of nodes");
326       CeedCall(CeedBasisGetNumComponents(active_bases_out[b], &num_comp_out));
327       CeedCheck(num_comp == num_comp_out, ceed, CEED_ERROR_UNSUPPORTED, "Active input and output bases must have the same number of components");
328       if (active_bases_out[b] == CEED_BASIS_NONE) num_qpts_out = num_nodes_out;
329       else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_out[b], &num_qpts_out));
330       CeedCheck(num_qpts == num_qpts_out, ceed, CEED_ERROR_UNSUPPORTED,
331                 "Active input and output bases must have the same number of quadrature points");
332     }
333 
334     // Construct identity matrix for basis if required
335     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
336       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
337     }
338     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
339       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
340     }
341     if (has_eval_none) {
342       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
343       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
344     }
345 
346     // Compute the diagonal of B^T D B
347     // Each element
348     for (CeedSize e = 0; e < num_elem; e++) {
349       // Each basis eval mode pair
350       CeedInt      d_out              = 0, q_comp_out;
351       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
352 
353       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
354         CeedInt           d_in              = 0, q_comp_in;
355         const CeedScalar *B_t               = NULL;
356         CeedEvalMode      eval_mode_in_prev = CEED_EVAL_NONE;
357 
358         CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b], eval_modes_out[b][e_out], identity, &B_t));
359         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b], eval_modes_out[b][e_out], &q_comp_out));
360         if (q_comp_out > 1) {
361           if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
362           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
363         }
364         eval_mode_out_prev = eval_modes_out[b][e_out];
365 
366         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
367           const CeedScalar *B = NULL;
368 
369           CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b], eval_modes_in[b][e_in], identity, &B));
370           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b], eval_modes_in[b][e_in], &q_comp_in));
371           if (q_comp_in > 1) {
372             if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
373             else B = &B[(++d_in) * num_qpts * num_nodes];
374           }
375           eval_mode_in_prev = eval_modes_in[b][e_in];
376 
377           // Each component
378           for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
379             // Each qpt/node pair
380             for (CeedInt q = 0; q < num_qpts; q++) {
381               if (is_point_block) {
382                 // Point Block Diagonal
383                 for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
384                   const CeedSize c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
385                   const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
386 
387                   for (CeedInt n = 0; n < num_nodes; n++) {
388                     elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
389                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
390                   }
391                 }
392               } else {
393                 // Diagonal Only
394                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
395                 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
396 
397                 for (CeedInt n = 0; n < num_nodes; n++) {
398                   elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
399                 }
400               }
401             }
402           }
403         }
404       }
405     }
406     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
407 
408     // Assemble local operator diagonal
409     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
410 
411     // Cleanup
412     CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
413     CeedCall(CeedVectorDestroy(&elem_diag));
414     CeedCall(CeedFree(&identity));
415   }
416   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
417   CeedCall(CeedVectorDestroy(&assembled_qf));
418   return CEED_ERROR_SUCCESS;
419 }
420 
421 /**
422   @brief Core logic for assembling composite operator diagonal
423 
424   @param[in]  op             CeedOperator to assemble point block diagonal
425   @param[in]  request        Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
426   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
427   @param[out] assembled      CeedVector to store assembled diagonal
428 
429   @return An error code: 0 - success, otherwise - failure
430 
431   @ref Developer
432 **/
433 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block,
434                                                                  CeedVector assembled) {
435   CeedInt       num_sub;
436   CeedOperator *suboperators;
437 
438   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
439   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
440   for (CeedInt i = 0; i < num_sub; i++) {
441     if (is_point_block) {
442       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
443     } else {
444       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
445     }
446   }
447   return CEED_ERROR_SUCCESS;
448 }
449 
450 /**
451   @brief Build nonzero pattern for non-composite operator
452 
453   Users should generally use CeedOperatorLinearAssembleSymbolic()
454 
455   @param[in]  op     CeedOperator to assemble nonzero pattern
456   @param[in]  offset Offset for number of entries
457   @param[out] rows   Row number for each entry
458   @param[out] cols   Column number for each entry
459 
460   @return An error code: 0 - success, otherwise - failure
461 
462   @ref Developer
463 **/
464 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
465   Ceed                ceed;
466   bool                is_composite;
467   CeedSize            num_nodes_in, num_nodes_out, count = 0;
468   CeedInt             num_elem_in, elem_size_in, num_comp_in, layout_er_in[3];
469   CeedInt             num_elem_out, elem_size_out, num_comp_out, layout_er_out[3], local_num_entries;
470   CeedScalar         *array;
471   const CeedScalar   *elem_dof_a_in, *elem_dof_a_out;
472   CeedVector          index_vec_in, index_vec_out, elem_dof_in, elem_dof_out;
473   CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out;
474 
475   CeedCall(CeedOperatorGetCeed(op, &ceed));
476   CeedCall(CeedOperatorIsComposite(op, &is_composite));
477   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
478 
479   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out));
480   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
481   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
482   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
483   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
484   CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, &layout_er_in));
485 
486   // Determine elem_dof relation for input
487   CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in));
488   CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array));
489   for (CeedInt i = 0; i < num_nodes_in; i++) array[i] = i;
490   CeedCall(CeedVectorRestoreArray(index_vec_in, &array));
491   CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in));
492   CeedCall(CeedVectorSetValue(elem_dof_in, 0.0));
493   CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in));
494   CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE));
495   CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in));
496   CeedCall(CeedVectorDestroy(&index_vec_in));
497   CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in));
498 
499   if (elem_rstr_in != elem_rstr_out) {
500     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
501     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
502               "Active input and output operator restrictions must have the same number of elements");
503     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
504     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
505     CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, &layout_er_out));
506 
507     // Determine elem_dof relation for output
508     CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out));
509     CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array));
510     for (CeedInt i = 0; i < num_nodes_out; i++) array[i] = i;
511     CeedCall(CeedVectorRestoreArray(index_vec_out, &array));
512     CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out));
513     CeedCall(CeedVectorSetValue(elem_dof_out, 0.0));
514     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out));
515     CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE));
516     CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out));
517     CeedCall(CeedVectorDestroy(&index_vec_out));
518     CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out));
519   } else {
520     num_elem_out     = num_elem_in;
521     elem_size_out    = elem_size_in;
522     num_comp_out     = num_comp_in;
523     layout_er_out[0] = layout_er_in[0];
524     layout_er_out[1] = layout_er_in[1];
525     layout_er_out[2] = layout_er_in[2];
526     elem_dof_a_out   = elem_dof_a_in;
527   }
528   local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
529 
530   // Determine i, j locations for element matrices
531   for (CeedInt e = 0; e < num_elem_in; e++) {
532     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
533       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
534         for (CeedInt i = 0; i < elem_size_out; i++) {
535           for (CeedInt j = 0; j < elem_size_in; j++) {
536             const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2];
537             const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2];
538             const CeedInt row                = elem_dof_a_out[elem_dof_index_row];
539             const CeedInt col                = elem_dof_a_in[elem_dof_index_col];
540 
541             rows[offset + count] = row;
542             cols[offset + count] = col;
543             count++;
544           }
545         }
546       }
547     }
548   }
549   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
550   CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in));
551   CeedCall(CeedVectorDestroy(&elem_dof_in));
552   if (elem_rstr_in != elem_rstr_out) {
553     CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out));
554     CeedCall(CeedVectorDestroy(&elem_dof_out));
555   }
556   return CEED_ERROR_SUCCESS;
557 }
558 
559 /**
560   @brief Assemble nonzero entries for non-composite operator
561 
562   Users should generally use CeedOperatorLinearAssemble()
563 
564   @param[in]  op     CeedOperator to assemble
565   @param[in]  offset Offset for number of entries
566   @param[out] values Values to assemble into matrix
567 
568   @return An error code: 0 - success, otherwise - failure
569 
570   @ref Developer
571 **/
572 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
573   Ceed ceed;
574   bool is_composite;
575 
576   CeedCall(CeedOperatorGetCeed(op, &ceed));
577   CeedCall(CeedOperatorIsComposite(op, &is_composite));
578   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
579 
580   // Early exit for empty operator
581   {
582     CeedInt num_elem = 0;
583 
584     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
585     if (num_elem == 0) return CEED_ERROR_SUCCESS;
586   }
587 
588   if (op->LinearAssembleSingle) {
589     // Backend version
590     CeedCall(op->LinearAssembleSingle(op, offset, values));
591     return CEED_ERROR_SUCCESS;
592   } else {
593     // Operator fallback
594     CeedOperator op_fallback;
595 
596     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
597     if (op_fallback) {
598       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
599       return CEED_ERROR_SUCCESS;
600     }
601   }
602 
603   // Assemble QFunction
604   CeedInt             layout_qf[3];
605   const CeedScalar   *assembled_qf_array;
606   CeedVector          assembled_qf        = NULL;
607   CeedElemRestriction assembled_elem_rstr = NULL;
608 
609   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE));
610   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf));
611   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
612   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
613 
614   // Get assembly data
615   CeedInt                  num_elem_in, elem_size_in, num_comp_in, num_qpts_in;
616   CeedInt                  num_elem_out, elem_size_out, num_comp_out, num_qpts_out, local_num_entries;
617   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
618   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
619   CeedBasis               *active_bases_in, *active_bases_out, basis_in, basis_out;
620   const CeedScalar       **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out;
621   CeedElemRestriction      elem_rstr_in, elem_rstr_out;
622   CeedRestrictionType      elem_rstr_type_in, elem_rstr_type_out;
623   const bool              *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL;
624   const CeedInt8          *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL;
625   CeedOperatorAssemblyData data;
626 
627   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
628   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out,
629                                                 &num_eval_modes_out, &eval_modes_out, NULL, NULL));
630 
631   CeedCheck(num_active_bases_in == num_active_bases_out && num_active_bases_in == 1, ceed, CEED_ERROR_UNSUPPORTED,
632             "Cannot assemble operator with multiple active bases");
633   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs");
634 
635   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out));
636   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
637   basis_in  = active_bases_in[0];
638   basis_out = active_bases_out[0];
639   B_mat_in  = B_mats_in[0];
640   B_mat_out = B_mats_out[0];
641 
642   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
643   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
644   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
645   if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in;
646   else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in));
647 
648   CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in));
649   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
650     CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in));
651   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
652     CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in));
653   }
654 
655   if (elem_rstr_in != elem_rstr_out) {
656     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
657     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
658               "Active input and output operator restrictions must have the same number of elements");
659     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
660     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
661     if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out;
662     else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out));
663     CeedCheck(num_qpts_in == num_qpts_out, ceed, CEED_ERROR_UNSUPPORTED,
664               "Active input and output bases must have the same number of quadrature points");
665 
666     CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out));
667     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
668       CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out));
669     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
670       CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out));
671     }
672   } else {
673     num_elem_out  = num_elem_in;
674     elem_size_out = elem_size_in;
675     num_comp_out  = num_comp_in;
676     num_qpts_out  = num_qpts_in;
677 
678     elem_rstr_orients_out      = elem_rstr_orients_in;
679     elem_rstr_curl_orients_out = elem_rstr_curl_orients_in;
680   }
681   local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
682 
683   // Loop over elements and put in data structure
684   // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
685   CeedSize    count = 0;
686   CeedScalar *vals, BTD_mat[elem_size_out * num_qpts_in * num_eval_modes_in[0]], elem_mat[elem_size_out * elem_size_in], *elem_mat_b = NULL;
687 
688   if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b));
689 
690   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
691   for (CeedSize e = 0; e < num_elem_in; e++) {
692     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
693       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
694         // Compute B^T*D
695         for (CeedSize n = 0; n < elem_size_out; n++) {
696           for (CeedSize q = 0; q < num_qpts_in; q++) {
697             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
698               const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in;
699               CeedScalar     sum       = 0.0;
700 
701               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
702                 const CeedSize b_out_index     = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n;
703                 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out;
704                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
705 
706                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
707               }
708               BTD_mat[btd_index] = sum;
709             }
710           }
711         }
712 
713         // Form element matrix itself (for each block component)
714         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size_out, elem_size_in, num_qpts_in * num_eval_modes_in[0]));
715 
716         // Transform the element matrix if required
717         if (elem_rstr_orients_out) {
718           const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out];
719 
720           for (CeedInt i = 0; i < elem_size_out; i++) {
721             const double orient = elem_orients[i] ? -1.0 : 1.0;
722 
723             for (CeedInt j = 0; j < elem_size_in; j++) {
724               elem_mat[i * elem_size_in + j] *= orient;
725             }
726           }
727         } else if (elem_rstr_curl_orients_out) {
728           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out];
729 
730           // T^T*(B^T*D*B)
731           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
732           for (CeedInt i = 0; i < elem_size_out; i++) {
733             for (CeedInt j = 0; j < elem_size_in; j++) {
734               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] +
735                                                (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) +
736                                                (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0);
737             }
738           }
739         }
740         if (elem_rstr_orients_in) {
741           const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in];
742 
743           for (CeedInt i = 0; i < elem_size_out; i++) {
744             for (CeedInt j = 0; j < elem_size_in; j++) {
745               elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0;
746             }
747           }
748         } else if (elem_rstr_curl_orients_in) {
749           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in];
750 
751           // (B^T*D*B)*T
752           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
753           for (CeedInt i = 0; i < elem_size_out; i++) {
754             for (CeedInt j = 0; j < elem_size_in; j++) {
755               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] +
756                                                (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) +
757                                                (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0);
758             }
759           }
760         }
761 
762         // Put element matrix in coordinate data structure
763         for (CeedInt i = 0; i < elem_size_out; i++) {
764           for (CeedInt j = 0; j < elem_size_in; j++) {
765             vals[offset + count] = elem_mat[i * elem_size_in + j];
766             count++;
767           }
768         }
769       }
770     }
771   }
772   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries");
773   CeedCall(CeedVectorRestoreArray(values, &vals));
774 
775   // Cleanup
776   CeedCall(CeedFree(&elem_mat_b));
777   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
778     CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in));
779   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
780     CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in));
781   }
782   if (elem_rstr_in != elem_rstr_out) {
783     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
784       CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out));
785     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
786       CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out));
787     }
788   }
789   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
790   CeedCall(CeedVectorDestroy(&assembled_qf));
791   return CEED_ERROR_SUCCESS;
792 }
793 
794 /**
795   @brief Count number of entries for assembled CeedOperator
796 
797   @param[in]  op          CeedOperator to assemble
798   @param[out] num_entries Number of entries in assembled representation
799 
800   @return An error code: 0 - success, otherwise - failure
801 
802   @ref Utility
803 **/
804 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) {
805   bool                is_composite;
806   CeedInt             num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out;
807   CeedElemRestriction rstr_in, rstr_out;
808 
809   CeedCall(CeedOperatorIsComposite(op, &is_composite));
810   CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
811 
812   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out));
813   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in));
814   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in));
815   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
816   if (rstr_in != rstr_out) {
817     CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out));
818     CeedCheck(num_elem_in == num_elem_out, op->ceed, CEED_ERROR_UNSUPPORTED,
819               "Active input and output operator restrictions must have the same number of elements");
820     CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out));
821     CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
822   } else {
823     num_elem_out  = num_elem_in;
824     elem_size_out = elem_size_in;
825     num_comp_out  = num_comp_in;
826   }
827   *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in;
828   return CEED_ERROR_SUCCESS;
829 }
830 
831 /**
832   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
833 
834   @param[in]  op_fine      Fine grid operator
835   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
836   @param[in]  rstr_coarse  Coarse grid restriction
837   @param[in]  basis_coarse Coarse grid active vector basis
838   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
839   @param[out] op_coarse    Coarse grid operator
840   @param[out] op_prolong   Coarse to fine operator, or NULL
841   @param[out] op_restrict  Fine to coarse operator, or NULL
842 
843   @return An error code: 0 - success, otherwise - failure
844 
845   @ref Developer
846 **/
847 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
848                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
849   bool                is_composite;
850   Ceed                ceed;
851   CeedInt             num_comp;
852   CeedVector          mult_vec         = NULL;
853   CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL;
854 
855   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
856 
857   // Check for composite operator
858   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
859   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
860 
861   // Coarse Grid
862   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
863   // -- Clone input fields
864   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
865     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
866       rstr_fine = op_fine->input_fields[i]->elem_rstr;
867       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
868     } else {
869       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
870                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
871     }
872   }
873   // -- Clone output fields
874   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
875     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
876       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
877     } else {
878       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
879                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
880     }
881   }
882   // -- Clone QFunctionAssemblyData
883   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
884 
885   // Multiplicity vector
886   if (op_restrict || op_prolong) {
887     CeedVector          mult_e_vec;
888     CeedRestrictionType rstr_type;
889 
890     CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type));
891     CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED,
892               "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported");
893     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
894     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
895     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
896     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
897     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
898     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
899     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
900     CeedCall(CeedVectorDestroy(&mult_e_vec));
901     CeedCall(CeedVectorReciprocal(mult_vec));
902   }
903 
904   // Clone name
905   bool   has_name = op_fine->name;
906   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
907   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
908 
909   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
910   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
911             "Prolongation or restriction operator creation requires coarse-to-fine basis");
912 
913   // Restriction/Prolongation Operators
914   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
915 
916   // Restriction
917   if (op_restrict) {
918     CeedInt             *num_comp_r_data;
919     CeedQFunctionContext ctx_r;
920     CeedQFunction        qf_restrict;
921 
922     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
923     CeedCall(CeedCalloc(1, &num_comp_r_data));
924     num_comp_r_data[0] = num_comp;
925     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
926     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
927     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
928     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
929     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
930     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
931     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
932     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
933 
934     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
935     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
936     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
937     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
938 
939     // Set name
940     char *restriction_name;
941 
942     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
943     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
944     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
945     CeedCall(CeedFree(&restriction_name));
946 
947     // Check
948     CeedCall(CeedOperatorCheckReady(*op_restrict));
949 
950     // Cleanup
951     CeedCall(CeedQFunctionDestroy(&qf_restrict));
952   }
953 
954   // Prolongation
955   if (op_prolong) {
956     CeedInt             *num_comp_p_data;
957     CeedQFunctionContext ctx_p;
958     CeedQFunction        qf_prolong;
959 
960     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
961     CeedCall(CeedCalloc(1, &num_comp_p_data));
962     num_comp_p_data[0] = num_comp;
963     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
964     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
965     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
966     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
967     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
968     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
969     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
970     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
971 
972     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
973     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
974     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
975     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
976 
977     // Set name
978     char *prolongation_name;
979 
980     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
981     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
982     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
983     CeedCall(CeedFree(&prolongation_name));
984 
985     // Check
986     CeedCall(CeedOperatorCheckReady(*op_prolong));
987 
988     // Cleanup
989     CeedCall(CeedQFunctionDestroy(&qf_prolong));
990   }
991 
992   // Check
993   CeedCall(CeedOperatorCheckReady(*op_coarse));
994 
995   // Cleanup
996   CeedCall(CeedVectorDestroy(&mult_vec));
997   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
998   CeedCall(CeedBasisDestroy(&basis_c_to_f));
999   return CEED_ERROR_SUCCESS;
1000 }
1001 
1002 /**
1003   @brief Build 1D mass matrix and Laplacian with perturbation
1004 
1005   @param[in]  interp_1d   Interpolation matrix in one dimension
1006   @param[in]  grad_1d     Gradient matrix in one dimension
1007   @param[in]  q_weight_1d Quadrature weights in one dimension
1008   @param[in]  P_1d        Number of basis nodes in one dimension
1009   @param[in]  Q_1d        Number of quadrature points in one dimension
1010   @param[in]  dim         Dimension of basis
1011   @param[out] mass        Assembled mass matrix in one dimension
1012   @param[out] laplace     Assembled perturbed Laplacian in one dimension
1013 
1014   @return An error code: 0 - success, otherwise - failure
1015 
1016   @ref Developer
1017 **/
1018 CeedPragmaOptimizeOff
1019 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
1020                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
1021   for (CeedInt i = 0; i < P_1d; i++) {
1022     for (CeedInt j = 0; j < P_1d; j++) {
1023       CeedScalar sum = 0.0;
1024       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
1025       mass[i + j * P_1d] = sum;
1026     }
1027   }
1028   // -- Laplacian
1029   for (CeedInt i = 0; i < P_1d; i++) {
1030     for (CeedInt j = 0; j < P_1d; j++) {
1031       CeedScalar sum = 0.0;
1032 
1033       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
1034       laplace[i + j * P_1d] = sum;
1035     }
1036   }
1037   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
1038   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
1039   return CEED_ERROR_SUCCESS;
1040 }
1041 CeedPragmaOptimizeOn
1042 
1043 /// @}
1044 
1045 /// ----------------------------------------------------------------------------
1046 /// CeedOperator Backend API
1047 /// ----------------------------------------------------------------------------
1048 /// @addtogroup CeedOperatorBackend
1049 /// @{
1050 
1051 /**
1052   @brief Create point block restriction for active operator field
1053 
1054   @param[in]  rstr             Original CeedElemRestriction for active field
1055   @param[out] point_block_rstr Address of the variable where the newly created CeedElemRestriction will be stored
1056 
1057   @return An error code: 0 - success, otherwise - failure
1058 
1059   @ref Backend
1060 **/
1061 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) {
1062   Ceed           ceed;
1063   CeedInt        num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets;
1064   CeedSize       l_size;
1065   const CeedInt *offsets;
1066 
1067   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
1068   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
1069 
1070   // Expand offsets
1071   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
1072   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
1073   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
1074   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
1075   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
1076   shift = num_comp;
1077   if (comp_stride != 1) shift *= num_comp;
1078   CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets));
1079   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
1080     point_block_offsets[i] = offsets[i] * shift;
1081   }
1082 
1083   // Create new restriction
1084   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
1085                                      point_block_offsets, point_block_rstr));
1086 
1087   // Cleanup
1088   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
1089   return CEED_ERROR_SUCCESS;
1090 }
1091 
1092 /**
1093   @brief Create object holding CeedQFunction assembly data for CeedOperator
1094 
1095   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
1096   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
1097 
1098   @return An error code: 0 - success, otherwise - failure
1099 
1100   @ref Backend
1101 **/
1102 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
1103   CeedCall(CeedCalloc(1, data));
1104   (*data)->ref_count = 1;
1105   (*data)->ceed      = ceed;
1106   CeedCall(CeedReference(ceed));
1107   return CEED_ERROR_SUCCESS;
1108 }
1109 
1110 /**
1111   @brief Increment the reference counter for a CeedQFunctionAssemblyData
1112 
1113   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
1114 
1115   @return An error code: 0 - success, otherwise - failure
1116 
1117   @ref Backend
1118 **/
1119 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1120   data->ref_count++;
1121   return CEED_ERROR_SUCCESS;
1122 }
1123 
1124 /**
1125   @brief Set re-use of CeedQFunctionAssemblyData
1126 
1127   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
1128   @param[in]     reuse_data Boolean flag indicating data re-use
1129 
1130   @return An error code: 0 - success, otherwise - failure
1131 
1132   @ref Backend
1133 **/
1134 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
1135   data->reuse_data        = reuse_data;
1136   data->needs_data_update = true;
1137   return CEED_ERROR_SUCCESS;
1138 }
1139 
1140 /**
1141   @brief Mark QFunctionAssemblyData as stale
1142 
1143   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
1144   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
1145 
1146   @return An error code: 0 - success, otherwise - failure
1147 
1148   @ref Backend
1149 **/
1150 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
1151   data->needs_data_update = needs_data_update;
1152   return CEED_ERROR_SUCCESS;
1153 }
1154 
1155 /**
1156   @brief Determine if QFunctionAssemblyData needs update
1157 
1158   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
1159   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1160 
1161   @return An error code: 0 - success, otherwise - failure
1162 
1163   @ref Backend
1164 **/
1165 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1166   *is_update_needed = !data->reuse_data || data->needs_data_update;
1167   return CEED_ERROR_SUCCESS;
1168 }
1169 
1170 /**
1171   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1172 
1173   Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1174 
1175   Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
1176         CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
1177         CeedQFunctionAssemblyData.
1178 
1179   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1180   @param[in,out] data_copy Variable to store copied reference
1181 
1182   @return An error code: 0 - success, otherwise - failure
1183 
1184   @ref Backend
1185 **/
1186 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1187   CeedCall(CeedQFunctionAssemblyDataReference(data));
1188   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1189   *data_copy = data;
1190   return CEED_ERROR_SUCCESS;
1191 }
1192 
1193 /**
1194   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1195 
1196   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1197   @param[out] is_setup Boolean flag for setup status
1198 
1199   @return An error code: 0 - success, otherwise - failure
1200 
1201   @ref Backend
1202 **/
1203 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1204   *is_setup = data->is_setup;
1205   return CEED_ERROR_SUCCESS;
1206 }
1207 
1208 /**
1209   @brief Set internal objects for CeedQFunctionAssemblyData
1210 
1211   @param[in,out] data CeedQFunctionAssemblyData to set objects
1212   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1213   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1214 
1215   @return An error code: 0 - success, otherwise - failure
1216 
1217   @ref Backend
1218 **/
1219 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1220   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1221   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1222 
1223   data->is_setup = true;
1224   return CEED_ERROR_SUCCESS;
1225 }
1226 
1227 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1228   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1229 
1230   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1231   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1232   return CEED_ERROR_SUCCESS;
1233 }
1234 
1235 /**
1236   @brief Destroy CeedQFunctionAssemblyData
1237 
1238   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1239 
1240   @return An error code: 0 - success, otherwise - failure
1241 
1242   @ref Backend
1243 **/
1244 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1245   if (!*data || --(*data)->ref_count > 0) {
1246     *data = NULL;
1247     return CEED_ERROR_SUCCESS;
1248   }
1249   CeedCall(CeedDestroy(&(*data)->ceed));
1250   CeedCall(CeedVectorDestroy(&(*data)->vec));
1251   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1252 
1253   CeedCall(CeedFree(data));
1254   return CEED_ERROR_SUCCESS;
1255 }
1256 
1257 /**
1258   @brief Get CeedOperatorAssemblyData
1259 
1260   @param[in]  op   CeedOperator to assemble
1261   @param[out] data CeedQFunctionAssemblyData
1262 
1263   @return An error code: 0 - success, otherwise - failure
1264 
1265   @ref Backend
1266 **/
1267 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1268   if (!op->op_assembled) {
1269     CeedOperatorAssemblyData data;
1270 
1271     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1272     op->op_assembled = data;
1273   }
1274   *data = op->op_assembled;
1275   return CEED_ERROR_SUCCESS;
1276 }
1277 
1278 /**
1279   @brief Create object holding CeedOperator assembly data.
1280 
1281   The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1282   An array with references to the corresponding active CeedElemRestrictions is also stored.
1283   For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1284   The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1285 CeedEvalMode.
1286   The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1287   Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1288 
1289   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1290   @param[in]  op   CeedOperator to be assembled
1291   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1292 
1293   @return An error code: 0 - success, otherwise - failure
1294 
1295   @ref Backend
1296 **/
1297 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1298   CeedInt             num_active_bases_in = 0, num_active_bases_out = 0, offset = 0;
1299   CeedInt             num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL;
1300   CeedSize          **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1301   CeedEvalMode      **eval_modes_in = NULL, **eval_modes_out = NULL;
1302   CeedQFunctionField *qf_fields;
1303   CeedQFunction       qf;
1304   CeedOperatorField  *op_fields;
1305   bool                is_composite;
1306 
1307   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1308   CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators.");
1309 
1310   // Allocate
1311   CeedCall(CeedCalloc(1, data));
1312   (*data)->ceed = ceed;
1313   CeedCall(CeedReference(ceed));
1314 
1315   // Build OperatorAssembly data
1316   CeedCall(CeedOperatorGetQFunction(op, &qf));
1317   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1318   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1319 
1320   // Determine active input basis
1321   for (CeedInt i = 0; i < num_input_fields; i++) {
1322     CeedVector vec;
1323 
1324     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1325     if (vec == CEED_VECTOR_ACTIVE) {
1326       CeedInt      index = -1, num_comp, q_comp;
1327       CeedEvalMode eval_mode;
1328       CeedBasis    basis_in = NULL;
1329 
1330       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1331       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1332       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1333       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1334       for (CeedInt i = 0; i < num_active_bases_in; i++) {
1335         if ((*data)->active_bases_in[i] == basis_in) index = i;
1336       }
1337       if (index == -1) {
1338         CeedElemRestriction elem_rstr_in;
1339 
1340         index = num_active_bases_in;
1341         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in));
1342         (*data)->active_bases_in[num_active_bases_in] = NULL;
1343         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in]));
1344         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in));
1345         (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL;
1346         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1347         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in]));
1348         CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in));
1349         num_eval_modes_in[index] = 0;
1350         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in));
1351         eval_modes_in[index] = NULL;
1352         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in));
1353         eval_mode_offsets_in[index] = NULL;
1354         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in));
1355         (*data)->assembled_bases_in[index] = NULL;
1356         num_active_bases_in++;
1357       }
1358       if (eval_mode != CEED_EVAL_WEIGHT) {
1359         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1360         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1361         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1362         for (CeedInt d = 0; d < q_comp; d++) {
1363           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1364           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1365           offset += num_comp;
1366         }
1367         num_eval_modes_in[index] += q_comp;
1368       }
1369     }
1370   }
1371 
1372   // Determine active output basis
1373   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1374   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1375   offset = 0;
1376   for (CeedInt i = 0; i < num_output_fields; i++) {
1377     CeedVector vec;
1378 
1379     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1380     if (vec == CEED_VECTOR_ACTIVE) {
1381       CeedInt      index = -1, num_comp, q_comp;
1382       CeedEvalMode eval_mode;
1383       CeedBasis    basis_out = NULL;
1384 
1385       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1386       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1387       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1388       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1389       for (CeedInt i = 0; i < num_active_bases_out; i++) {
1390         if ((*data)->active_bases_out[i] == basis_out) index = i;
1391       }
1392       if (index == -1) {
1393         CeedElemRestriction elem_rstr_out;
1394 
1395         index = num_active_bases_out;
1396         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out));
1397         (*data)->active_bases_out[num_active_bases_out] = NULL;
1398         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out]));
1399         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out));
1400         (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL;
1401         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1402         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out]));
1403         CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out));
1404         num_eval_modes_out[index] = 0;
1405         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out));
1406         eval_modes_out[index] = NULL;
1407         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out));
1408         eval_mode_offsets_out[index] = NULL;
1409         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out));
1410         (*data)->assembled_bases_out[index] = NULL;
1411         num_active_bases_out++;
1412       }
1413       if (eval_mode != CEED_EVAL_WEIGHT) {
1414         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1415         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1416         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1417         for (CeedInt d = 0; d < q_comp; d++) {
1418           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1419           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1420           offset += num_comp;
1421         }
1422         num_eval_modes_out[index] += q_comp;
1423       }
1424     }
1425   }
1426   (*data)->num_active_bases_in   = num_active_bases_in;
1427   (*data)->num_eval_modes_in     = num_eval_modes_in;
1428   (*data)->eval_modes_in         = eval_modes_in;
1429   (*data)->eval_mode_offsets_in  = eval_mode_offsets_in;
1430   (*data)->num_active_bases_out  = num_active_bases_out;
1431   (*data)->num_eval_modes_out    = num_eval_modes_out;
1432   (*data)->eval_modes_out        = eval_modes_out;
1433   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1434   (*data)->num_output_components = offset;
1435   return CEED_ERROR_SUCCESS;
1436 }
1437 
1438 /**
1439   @brief Get CeedOperator CeedEvalModes for assembly.
1440 
1441   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1442 
1443   @param[in]  data                  CeedOperatorAssemblyData
1444   @param[out] num_active_bases_in   Total number of active bases for input
1445   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1446                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1447   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1448   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1449   @param[out] num_active_bases_out  Total number of active bases for output
1450   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1451   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1452   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1453   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1454                                       including contributions of all active bases
1455 
1456   @return An error code: 0 - success, otherwise - failure
1457 
1458   @ref Backend
1459 **/
1460 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in,
1461                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out,
1462                                          CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out,
1463                                          CeedSize *num_output_components) {
1464   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1465   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1466   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1467   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1468   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1469   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1470   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1471   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1472   if (num_output_components) *num_output_components = data->num_output_components;
1473   return CEED_ERROR_SUCCESS;
1474 }
1475 
1476 /**
1477   @brief Get CeedOperator CeedBasis data for assembly.
1478 
1479   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1480 
1481   @param[in]  data                 CeedOperatorAssemblyData
1482   @param[out] num_active_bases_in  Number of active input bases, or NULL
1483   @param[out] active_bases_in      Pointer to hold active input CeedBasis, or NULL
1484   @param[out] assembled_bases_in   Pointer to hold assembled active input B, or NULL
1485   @param[out] num_active_bases_out Number of active output bases, or NULL
1486   @param[out] active_bases_out     Pointer to hold active output CeedBasis, or NULL
1487   @param[out] assembled_bases_out  Pointer to hold assembled active output B, or NULL
1488 
1489   @return An error code: 0 - success, otherwise - failure
1490 
1491   @ref Backend
1492 **/
1493 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in,
1494                                      const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out,
1495                                      const CeedScalar ***assembled_bases_out) {
1496   // Assemble B_in, B_out if needed
1497   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1498     CeedInt num_qpts;
1499 
1500     if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts));
1501     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts));
1502     for (CeedInt b = 0; b < data->num_active_bases_in; b++) {
1503       bool        has_eval_none = false;
1504       CeedInt     num_nodes;
1505       CeedScalar *B_in = NULL, *identity = NULL;
1506 
1507       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes));
1508       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1509 
1510       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1511         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1512       }
1513       if (has_eval_none) {
1514         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1515         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1516           identity[i * num_nodes + i] = 1.0;
1517         }
1518       }
1519 
1520       for (CeedInt q = 0; q < num_qpts; q++) {
1521         for (CeedInt n = 0; n < num_nodes; n++) {
1522           CeedInt      d_in              = 0, q_comp_in;
1523           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1524 
1525           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1526             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1527             const CeedScalar *B  = NULL;
1528 
1529             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B));
1530             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in));
1531             if (q_comp_in > 1) {
1532               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1533               else B = &B[(++d_in) * num_qpts * num_nodes];
1534             }
1535             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1536             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1537           }
1538         }
1539       }
1540       if (identity) CeedCall(CeedFree(&identity));
1541       data->assembled_bases_in[b] = B_in;
1542     }
1543   }
1544 
1545   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1546     CeedInt num_qpts;
1547 
1548     if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts));
1549     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts));
1550     for (CeedInt b = 0; b < data->num_active_bases_out; b++) {
1551       bool        has_eval_none = false;
1552       CeedInt     num_nodes;
1553       CeedScalar *B_out = NULL, *identity = NULL;
1554 
1555       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes));
1556       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1557 
1558       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1559         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1560       }
1561       if (has_eval_none) {
1562         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1563         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1564           identity[i * num_nodes + i] = 1.0;
1565         }
1566       }
1567 
1568       for (CeedInt q = 0; q < num_qpts; q++) {
1569         for (CeedInt n = 0; n < num_nodes; n++) {
1570           CeedInt      d_out              = 0, q_comp_out;
1571           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1572 
1573           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1574             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1575             const CeedScalar *B  = NULL;
1576 
1577             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B));
1578             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out));
1579             if (q_comp_out > 1) {
1580               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1581               else B = &B[(++d_out) * num_qpts * num_nodes];
1582             }
1583             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1584             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1585           }
1586         }
1587       }
1588       if (identity) CeedCall(CeedFree(&identity));
1589       data->assembled_bases_out[b] = B_out;
1590     }
1591   }
1592 
1593   // Pass out assembled data
1594   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1595   if (active_bases_in) *active_bases_in = data->active_bases_in;
1596   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1597   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1598   if (active_bases_out) *active_bases_out = data->active_bases_out;
1599   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1600   return CEED_ERROR_SUCCESS;
1601 }
1602 
1603 /**
1604   @brief Get CeedOperator CeedBasis data for assembly.
1605 
1606   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1607 
1608   @param[in]  data                      CeedOperatorAssemblyData
1609   @param[out] num_active_elem_rstrs_in  Number of active input element restrictions, or NULL
1610   @param[out] active_elem_rstrs_in      Pointer to hold active input CeedElemRestrictions, or NULL
1611   @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or NULL
1612   @param[out] active_elem_rstrs_out     Pointer to hold active output CeedElemRestrictions, or NULL
1613 
1614   @return An error code: 0 - success, otherwise - failure
1615 
1616   @ref Backend
1617 **/
1618 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in,
1619                                                 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out,
1620                                                 CeedElemRestriction **active_elem_rstrs_out) {
1621   if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in;
1622   if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in;
1623   if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out;
1624   if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out;
1625   return CEED_ERROR_SUCCESS;
1626 }
1627 
1628 /**
1629   @brief Destroy CeedOperatorAssemblyData
1630 
1631   @param[in,out] data CeedOperatorAssemblyData to destroy
1632 
1633   @return An error code: 0 - success, otherwise - failure
1634 
1635   @ref Backend
1636 **/
1637 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1638   if (!*data) {
1639     *data = NULL;
1640     return CEED_ERROR_SUCCESS;
1641   }
1642   CeedCall(CeedDestroy(&(*data)->ceed));
1643   for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) {
1644     CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b]));
1645     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b]));
1646     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1647     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1648     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1649   }
1650   for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) {
1651     CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b]));
1652     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b]));
1653     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1654     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1655     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1656   }
1657   CeedCall(CeedFree(&(*data)->active_bases_in));
1658   CeedCall(CeedFree(&(*data)->active_bases_out));
1659   CeedCall(CeedFree(&(*data)->active_elem_rstrs_in));
1660   CeedCall(CeedFree(&(*data)->active_elem_rstrs_out));
1661   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1662   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1663   CeedCall(CeedFree(&(*data)->eval_modes_in));
1664   CeedCall(CeedFree(&(*data)->eval_modes_out));
1665   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1666   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1667   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1668   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1669 
1670   CeedCall(CeedFree(data));
1671   return CEED_ERROR_SUCCESS;
1672 }
1673 
1674 /// @}
1675 
1676 /// ----------------------------------------------------------------------------
1677 /// CeedOperator Public API
1678 /// ----------------------------------------------------------------------------
1679 /// @addtogroup CeedOperatorUser
1680 /// @{
1681 
1682 /**
1683   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1684 
1685   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1686   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1687 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1688 
1689   Inputs and outputs are in the order provided by the user when adding CeedOperator fields.
1690   For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction
1691 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1692 
1693   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1694 
1695   @param[in]  op        CeedOperator to assemble CeedQFunction
1696   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1697   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1698   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1699 
1700   @return An error code: 0 - success, otherwise - failure
1701 
1702   @ref User
1703 **/
1704 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1705   CeedCall(CeedOperatorCheckReady(op));
1706 
1707   if (op->LinearAssembleQFunction) {
1708     // Backend version
1709     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1710   } else {
1711     // Operator fallback
1712     CeedOperator op_fallback;
1713 
1714     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1715     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1716     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1717   }
1718   return CEED_ERROR_SUCCESS;
1719 }
1720 
1721 /**
1722   @brief Assemble CeedQFunction and store result internally.
1723 
1724   Return copied references of stored data to the caller.
1725   Caller is responsible for ownership and destruction of the copied references.
1726   See also @ref CeedOperatorLinearAssembleQFunction
1727 
1728   Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers.
1729         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
1730 
1731   @param[in]  op        CeedOperator to assemble CeedQFunction
1732   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1733   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1734   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1735 
1736   @return An error code: 0 - success, otherwise - failure
1737 
1738   @ref User
1739 **/
1740 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1741   int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL;
1742   CeedOperator op_assemble                                                                           = NULL;
1743   CeedOperator op_fallback_parent                                                                    = NULL;
1744 
1745   CeedCall(CeedOperatorCheckReady(op));
1746 
1747   // Determine if fallback parent or operator has implementation
1748   CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent));
1749   if (op_fallback_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) {
1750     // -- Backend version for op fallback parent is faster, if it exists
1751     LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate;
1752     op_assemble                   = op_fallback_parent;
1753   } else if (op->LinearAssembleQFunctionUpdate) {
1754     // -- Backend version for op
1755     LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate;
1756     op_assemble                   = op;
1757   }
1758 
1759   // Assemble QFunction
1760   if (LinearAssembleQFunctionUpdate) {
1761     // Backend or fallback parent version
1762     bool                qf_assembled_is_setup;
1763     CeedVector          assembled_vec  = NULL;
1764     CeedElemRestriction assembled_rstr = NULL;
1765 
1766     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1767     if (qf_assembled_is_setup) {
1768       bool update_needed;
1769 
1770       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1771       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1772       if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request));
1773     } else {
1774       CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request));
1775       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1776     }
1777     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1778 
1779     // Copy reference from internally held copy
1780     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1781     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1782     CeedCall(CeedVectorDestroy(&assembled_vec));
1783     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1784   } else {
1785     // Operator fallback
1786     CeedOperator op_fallback;
1787 
1788     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1789     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1790     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1791   }
1792   return CEED_ERROR_SUCCESS;
1793 }
1794 
1795 /**
1796   @brief Assemble the diagonal of a square linear CeedOperator
1797 
1798   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1799 
1800   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1801 
1802   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1803 
1804   @param[in]  op        CeedOperator to assemble CeedQFunction
1805   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1806   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1807 
1808   @return An error code: 0 - success, otherwise - failure
1809 
1810   @ref User
1811 **/
1812 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1813   bool     is_composite;
1814   CeedSize input_size = 0, output_size = 0;
1815 
1816   CeedCall(CeedOperatorCheckReady(op));
1817   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1818 
1819   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1820   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1821 
1822   // Early exit for empty operator
1823   if (!is_composite) {
1824     CeedInt num_elem = 0;
1825 
1826     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1827     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1828   }
1829 
1830   if (op->LinearAssembleDiagonal) {
1831     // Backend version
1832     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1833     return CEED_ERROR_SUCCESS;
1834   } else if (op->LinearAssembleAddDiagonal) {
1835     // Backend version with zeroing first
1836     CeedCall(CeedVectorSetValue(assembled, 0.0));
1837     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1838     return CEED_ERROR_SUCCESS;
1839   } else {
1840     // Operator fallback
1841     CeedOperator op_fallback;
1842 
1843     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1844     if (op_fallback) {
1845       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1846       return CEED_ERROR_SUCCESS;
1847     }
1848   }
1849   // Default interface implementation
1850   CeedCall(CeedVectorSetValue(assembled, 0.0));
1851   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1852   return CEED_ERROR_SUCCESS;
1853 }
1854 
1855 /**
1856   @brief Assemble the diagonal of a square linear CeedOperator
1857 
1858   This sums into a CeedVector the diagonal of a linear CeedOperator.
1859 
1860   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1861 
1862   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1863 
1864   @param[in]  op        CeedOperator to assemble CeedQFunction
1865   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1866   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1867 
1868   @return An error code: 0 - success, otherwise - failure
1869 
1870   @ref User
1871 **/
1872 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1873   bool     is_composite;
1874   CeedSize input_size = 0, output_size = 0;
1875 
1876   CeedCall(CeedOperatorCheckReady(op));
1877   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1878 
1879   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1880   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1881 
1882   // Early exit for empty operator
1883   if (!is_composite) {
1884     CeedInt num_elem = 0;
1885 
1886     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1887     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1888   }
1889 
1890   if (op->LinearAssembleAddDiagonal) {
1891     // Backend version
1892     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1893     return CEED_ERROR_SUCCESS;
1894   } else {
1895     // Operator fallback
1896     CeedOperator op_fallback;
1897 
1898     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1899     if (op_fallback) {
1900       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1901       return CEED_ERROR_SUCCESS;
1902     }
1903   }
1904   // Default interface implementation
1905   if (is_composite) {
1906     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1907   } else {
1908     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1909   }
1910   return CEED_ERROR_SUCCESS;
1911 }
1912 
1913 /**
1914    @brief Fully assemble the point-block diagonal pattern of a linear operator.
1915 
1916    Expected to be used in conjunction with CeedOperatorLinearAssemblePointBlockDiagonal().
1917 
1918    The assembly routines use coordinate format, with `num_entries` tuples of the form (i, j, value) which indicate that value should be added to the
1919 matrix in entry (i, j).
1920   Note that the (i, j) pairs are unique.
1921   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in
1922 the same ordering.
1923 
1924    This will generally be slow unless your operator is low-order.
1925 
1926    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1927 
1928    @param[in]  op          CeedOperator to assemble
1929    @param[out] num_entries Number of entries in coordinate nonzero pattern
1930    @param[out] rows        Row number for each entry
1931    @param[out] cols        Column number for each entry
1932 
1933    @ref User
1934 **/
1935 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1936   Ceed          ceed;
1937   bool          is_composite;
1938   CeedInt       num_active_components, num_sub_operators;
1939   CeedOperator *sub_operators;
1940 
1941   CeedCall(CeedOperatorGetCeed(op, &ceed));
1942   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1943 
1944   CeedSize input_size = 0, output_size = 0;
1945   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1946   CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1947 
1948   if (is_composite) {
1949     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_operators));
1950     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1951   } else {
1952     sub_operators     = &op;
1953     num_sub_operators = 1;
1954   }
1955 
1956   // Verify operator can be assembled correctly
1957   {
1958     CeedOperatorAssemblyData data;
1959     CeedInt                  num_active_elem_rstrs, comp_stride;
1960     CeedElemRestriction     *active_elem_rstrs;
1961 
1962     // Get initial values to check against
1963     CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data));
1964     CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
1965     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride));
1966     CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components));
1967 
1968     // Verify that all active element restrictions have same component stride and number of components
1969     for (CeedInt k = 0; k < num_sub_operators; k++) {
1970       CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data));
1971       CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
1972       for (CeedInt i = 0; i < num_active_elem_rstrs; i++) {
1973         CeedInt comp_stride_sub, num_active_components_sub;
1974 
1975         CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub));
1976         CeedCheck(comp_stride == comp_stride_sub, ceed, CEED_ERROR_DIMENSION,
1977                   "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub);
1978         CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub));
1979         CeedCheck(num_active_components == num_active_components_sub, ceed, CEED_ERROR_INCOMPATIBLE,
1980                   "All suboperators must have the same number of output components");
1981       }
1982     }
1983   }
1984   *num_entries = input_size * num_active_components;
1985   CeedCall(CeedCalloc(*num_entries, rows));
1986   CeedCall(CeedCalloc(*num_entries, cols));
1987 
1988   for (CeedInt o = 0; o < num_sub_operators; o++) {
1989     CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr;
1990     CeedInt             comp_stride, num_elem, elem_size;
1991     const CeedInt      *offsets, *point_block_offsets;
1992 
1993     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr));
1994     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride));
1995     CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem));
1996     CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size));
1997     CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets));
1998 
1999     CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr));
2000     CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets));
2001 
2002     for (CeedSize i = 0; i < num_elem * elem_size; i++) {
2003       for (CeedInt c_out = 0; c_out < num_active_components; c_out++) {
2004         for (CeedInt c_in = 0; c_in < num_active_components; c_in++) {
2005           (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride;
2006           (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride;
2007         }
2008       }
2009     }
2010 
2011     CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets));
2012     CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets));
2013     CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr));
2014   }
2015   return CEED_ERROR_SUCCESS;
2016 }
2017 
2018 /**
2019   @brief Assemble the point block diagonal of a square linear CeedOperator
2020 
2021   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
2022 
2023   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
2024 
2025   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2026 
2027   @param[in]  op        CeedOperator to assemble CeedQFunction
2028   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
2029 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
2030 component in].
2031   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2032 
2033   @return An error code: 0 - success, otherwise - failure
2034 
2035   @ref User
2036 **/
2037 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2038   bool     is_composite;
2039   CeedSize input_size = 0, output_size = 0;
2040 
2041   CeedCall(CeedOperatorCheckReady(op));
2042   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2043 
2044   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2045   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
2046 
2047   // Early exit for empty operator
2048   if (!is_composite) {
2049     CeedInt num_elem = 0;
2050 
2051     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2052     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2053   }
2054 
2055   if (op->LinearAssemblePointBlockDiagonal) {
2056     // Backend version
2057     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
2058     return CEED_ERROR_SUCCESS;
2059   } else if (op->LinearAssembleAddPointBlockDiagonal) {
2060     // Backend version with zeroing first
2061     CeedCall(CeedVectorSetValue(assembled, 0.0));
2062     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2063     return CEED_ERROR_SUCCESS;
2064   } else {
2065     // Operator fallback
2066     CeedOperator op_fallback;
2067 
2068     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2069     if (op_fallback) {
2070       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
2071       return CEED_ERROR_SUCCESS;
2072     }
2073   }
2074   // Default interface implementation
2075   CeedCall(CeedVectorSetValue(assembled, 0.0));
2076   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2077   return CEED_ERROR_SUCCESS;
2078 }
2079 
2080 /**
2081   @brief Assemble the point block diagonal of a square linear CeedOperator
2082 
2083   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
2084 
2085   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
2086 
2087   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2088 
2089   @param[in]  op        CeedOperator to assemble CeedQFunction
2090   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
2091 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
2092 component in].
2093   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2094 
2095   @return An error code: 0 - success, otherwise - failure
2096 
2097   @ref User
2098 **/
2099 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2100   bool     is_composite;
2101   CeedSize input_size = 0, output_size = 0;
2102 
2103   CeedCall(CeedOperatorCheckReady(op));
2104   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2105 
2106   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2107   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
2108 
2109   // Early exit for empty operator
2110   if (!is_composite) {
2111     CeedInt num_elem = 0;
2112 
2113     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2114     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2115   }
2116 
2117   if (op->LinearAssembleAddPointBlockDiagonal) {
2118     // Backend version
2119     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
2120     return CEED_ERROR_SUCCESS;
2121   } else {
2122     // Operator fallback
2123     CeedOperator op_fallback;
2124 
2125     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2126     if (op_fallback) {
2127       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
2128       return CEED_ERROR_SUCCESS;
2129     }
2130   }
2131   // Default interface implementation
2132   if (is_composite) {
2133     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
2134   } else {
2135     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
2136   }
2137   return CEED_ERROR_SUCCESS;
2138 }
2139 
2140 /**
2141    @brief Fully assemble the nonzero pattern of a linear operator.
2142 
2143    Expected to be used in conjunction with CeedOperatorLinearAssemble().
2144 
2145    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
2146 matrix in entry (i, j).
2147   Note that the (i, j) pairs are not unique and may repeat.
2148   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
2149 
2150    This will generally be slow unless your operator is low-order.
2151 
2152    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2153 
2154    @param[in]  op          CeedOperator to assemble
2155    @param[out] num_entries Number of entries in coordinate nonzero pattern
2156    @param[out] rows        Row number for each entry
2157    @param[out] cols        Column number for each entry
2158 
2159    @ref User
2160 **/
2161 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2162   bool          is_composite;
2163   CeedInt       num_suboperators, offset = 0;
2164   CeedSize      single_entries;
2165   CeedOperator *sub_operators;
2166 
2167   CeedCall(CeedOperatorCheckReady(op));
2168   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2169 
2170   if (op->LinearAssembleSymbolic) {
2171     // Backend version
2172     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
2173     return CEED_ERROR_SUCCESS;
2174   } else {
2175     // Operator fallback
2176     CeedOperator op_fallback;
2177 
2178     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2179     if (op_fallback) {
2180       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
2181       return CEED_ERROR_SUCCESS;
2182     }
2183   }
2184 
2185   // Default interface implementation
2186 
2187   // Count entries and allocate rows, cols arrays
2188   *num_entries = 0;
2189   if (is_composite) {
2190     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2191     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2192     for (CeedInt k = 0; k < num_suboperators; ++k) {
2193       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2194       *num_entries += single_entries;
2195     }
2196   } else {
2197     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
2198     *num_entries += single_entries;
2199   }
2200   CeedCall(CeedCalloc(*num_entries, rows));
2201   CeedCall(CeedCalloc(*num_entries, cols));
2202 
2203   // Assemble nonzero locations
2204   if (is_composite) {
2205     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2206     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2207     for (CeedInt k = 0; k < num_suboperators; ++k) {
2208       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
2209       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2210       offset += single_entries;
2211     }
2212   } else {
2213     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
2214   }
2215   return CEED_ERROR_SUCCESS;
2216 }
2217 
2218 /**
2219    @brief Fully assemble the nonzero entries of a linear operator.
2220 
2221    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
2222 
2223    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
2224 matrix in entry (i, j).
2225   Note that the (i, j) pairs are not unique and may repeat.
2226   This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
2227 
2228    This will generally be slow unless your operator is low-order.
2229 
2230    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2231 
2232    @param[in]  op     CeedOperator to assemble
2233    @param[out] values Values to assemble into matrix
2234 
2235    @ref User
2236 **/
2237 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
2238   bool          is_composite;
2239   CeedInt       num_suboperators, offset = 0;
2240   CeedSize      single_entries = 0;
2241   CeedOperator *sub_operators;
2242 
2243   CeedCall(CeedOperatorCheckReady(op));
2244   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2245 
2246   // Early exit for empty operator
2247   if (!is_composite) {
2248     CeedInt num_elem = 0;
2249 
2250     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2251     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2252   }
2253 
2254   if (op->LinearAssemble) {
2255     // Backend version
2256     CeedCall(op->LinearAssemble(op, values));
2257     return CEED_ERROR_SUCCESS;
2258   } else {
2259     // Operator fallback
2260     CeedOperator op_fallback;
2261 
2262     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2263     if (op_fallback) {
2264       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2265       return CEED_ERROR_SUCCESS;
2266     }
2267   }
2268 
2269   // Default interface implementation
2270   CeedCall(CeedVectorSetValue(values, 0.0));
2271   if (is_composite) {
2272     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2273     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2274     for (CeedInt k = 0; k < num_suboperators; k++) {
2275       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2276       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2277       offset += single_entries;
2278     }
2279   } else {
2280     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2281   }
2282   return CEED_ERROR_SUCCESS;
2283 }
2284 
2285 /**
2286   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2287 
2288   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2289 
2290   @param[in]  op               Composite CeedOperator
2291   @param[in]  num_skip_indices Number of suboperators to skip
2292   @param[in]  skip_indices     Array of indices of suboperators to skip
2293   @param[out] mult             Vector to store multiplicity (of size l_size)
2294 
2295   @return An error code: 0 - success, otherwise - failure
2296 
2297   @ref User
2298 **/
2299 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2300   Ceed                ceed;
2301   CeedInt             num_suboperators;
2302   CeedSize            l_vec_len;
2303   CeedScalar         *mult_array;
2304   CeedVector          ones_l_vec;
2305   CeedElemRestriction elem_rstr, mult_elem_rstr;
2306   CeedOperator       *sub_operators;
2307 
2308   CeedCall(CeedOperatorCheckReady(op));
2309 
2310   CeedCall(CeedOperatorGetCeed(op, &ceed));
2311 
2312   // Zero mult vector
2313   CeedCall(CeedVectorSetValue(mult, 0.0));
2314 
2315   // Get suboperators
2316   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2317   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2318   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2319 
2320   // Work vector
2321   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2322   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2323   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2324   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2325 
2326   // Compute multiplicity across suboperators
2327   for (CeedInt i = 0; i < num_suboperators; i++) {
2328     const CeedScalar *sub_mult_array;
2329     CeedVector        sub_mult_l_vec, ones_e_vec;
2330 
2331     // -- Check for suboperator to skip
2332     for (CeedInt j = 0; j < num_skip_indices; j++) {
2333       if (skip_indices[j] == i) continue;
2334     }
2335 
2336     // -- Sub operator multiplicity
2337     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2338     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr));
2339     CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2340     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2341     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2342     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2343     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2344     // ---- Flag every node present in the current suboperator
2345     for (CeedInt j = 0; j < l_vec_len; j++) {
2346       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2347     }
2348     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2349     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2350     CeedCall(CeedVectorDestroy(&ones_e_vec));
2351     CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr));
2352   }
2353   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2354   CeedCall(CeedVectorDestroy(&ones_l_vec));
2355   return CEED_ERROR_SUCCESS;
2356 }
2357 
2358 /**
2359   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2360 grid interpolation
2361 
2362   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2363 
2364   @param[in]  op_fine      Fine grid operator
2365   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2366   @param[in]  rstr_coarse  Coarse grid restriction
2367   @param[in]  basis_coarse Coarse grid active vector basis
2368   @param[out] op_coarse    Coarse grid operator
2369   @param[out] op_prolong   Coarse to fine operator, or NULL
2370   @param[out] op_restrict  Fine to coarse operator, or NULL
2371 
2372   @return An error code: 0 - success, otherwise - failure
2373 
2374   @ref User
2375 **/
2376 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2377                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2378   CeedBasis basis_c_to_f = NULL;
2379 
2380   CeedCall(CeedOperatorCheckReady(op_fine));
2381 
2382   // Build prolongation matrix, if required
2383   if (op_prolong || op_restrict) {
2384     CeedBasis basis_fine;
2385 
2386     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2387     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2388   }
2389 
2390   // Core code
2391   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2392   return CEED_ERROR_SUCCESS;
2393 }
2394 
2395 /**
2396   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2397 
2398   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2399 
2400   @param[in]  op_fine       Fine grid operator
2401   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2402   @param[in]  rstr_coarse   Coarse grid restriction
2403   @param[in]  basis_coarse  Coarse grid active vector basis
2404   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2405   @param[out] op_coarse     Coarse grid operator
2406   @param[out] op_prolong    Coarse to fine operator, or NULL
2407   @param[out] op_restrict   Fine to coarse operator, or NULL
2408 
2409   @return An error code: 0 - success, otherwise - failure
2410 
2411   @ref User
2412 **/
2413 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2414                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2415                                              CeedOperator *op_restrict) {
2416   Ceed      ceed;
2417   CeedInt   Q_f, Q_c;
2418   CeedBasis basis_fine, basis_c_to_f = NULL;
2419 
2420   CeedCall(CeedOperatorCheckReady(op_fine));
2421   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2422 
2423   // Check for compatible quadrature spaces
2424   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2425   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2426   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2427   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2428 
2429   // Create coarse to fine basis, if required
2430   if (op_prolong || op_restrict) {
2431     CeedInt     dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2432     CeedScalar *q_ref, *q_weight, *grad;
2433 
2434     // Check if interpolation matrix is provided
2435     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2436               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2437     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2438     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2439     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2440     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2441     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2442     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2443     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2444     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2445     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2446     CeedCall(CeedFree(&q_ref));
2447     CeedCall(CeedFree(&q_weight));
2448     CeedCall(CeedFree(&grad));
2449   }
2450 
2451   // Core code
2452   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2453   return CEED_ERROR_SUCCESS;
2454 }
2455 
2456 /**
2457   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2458 
2459   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2460 
2461   @param[in]  op_fine       Fine grid operator
2462   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2463   @param[in]  rstr_coarse   Coarse grid restriction
2464   @param[in]  basis_coarse  Coarse grid active vector basis
2465   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2466   @param[out] op_coarse     Coarse grid operator
2467   @param[out] op_prolong    Coarse to fine operator, or NULL
2468   @param[out] op_restrict   Fine to coarse operator, or NULL
2469 
2470   @return An error code: 0 - success, otherwise - failure
2471 
2472   @ref User
2473 **/
2474 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2475                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2476                                        CeedOperator *op_restrict) {
2477   Ceed      ceed;
2478   CeedInt   Q_f, Q_c;
2479   CeedBasis basis_fine, basis_c_to_f = NULL;
2480 
2481   CeedCall(CeedOperatorCheckReady(op_fine));
2482   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2483 
2484   // Check for compatible quadrature spaces
2485   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2486   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2487   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2488   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2489 
2490   // Coarse to fine basis
2491   if (op_prolong || op_restrict) {
2492     CeedInt          dim, num_comp, num_nodes_c, num_nodes_f;
2493     CeedScalar      *q_ref, *q_weight, *grad;
2494     CeedElemTopology topo;
2495 
2496     // Check if interpolation matrix is provided
2497     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2498               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2499     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2500     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2501     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2502     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2503     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2504     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2505     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2506     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2507     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2508     CeedCall(CeedFree(&q_ref));
2509     CeedCall(CeedFree(&q_weight));
2510     CeedCall(CeedFree(&grad));
2511   }
2512 
2513   // Core code
2514   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2515   return CEED_ERROR_SUCCESS;
2516 }
2517 
2518 /**
2519   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2520 
2521   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2522   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2523   The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2524 \hat S V\f$.
2525   The CeedOperator must be linear and non-composite.
2526   The associated CeedQFunction must therefore also be linear.
2527 
2528   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2529 
2530   @param[in]  op      CeedOperator to create element inverses
2531   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2532   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2533 
2534   @return An error code: 0 - success, otherwise - failure
2535 
2536   @ref User
2537 **/
2538 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2539   Ceed                 ceed, ceed_parent;
2540   bool                 interp = false, grad = false, is_tensor_basis = true;
2541   CeedInt              num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2542   CeedSize             l_size = 1;
2543   CeedScalar          *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg;
2544   const CeedScalar    *interp_1d, *grad_1d, *q_weight_1d;
2545   CeedVector           q_data;
2546   CeedElemRestriction  rstr  = NULL, rstr_qd_i;
2547   CeedBasis            basis = NULL, fdm_basis;
2548   CeedQFunctionContext ctx_fdm;
2549   CeedQFunctionField  *qf_fields;
2550   CeedQFunction        qf, qf_fdm;
2551   CeedOperatorField   *op_fields;
2552 
2553   CeedCall(CeedOperatorCheckReady(op));
2554 
2555   if (op->CreateFDMElementInverse) {
2556     // Backend version
2557     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2558     return CEED_ERROR_SUCCESS;
2559   } else {
2560     // Operator fallback
2561     CeedOperator op_fallback;
2562 
2563     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2564     if (op_fallback) {
2565       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2566       return CEED_ERROR_SUCCESS;
2567     }
2568   }
2569 
2570   // Default interface implementation
2571   CeedCall(CeedOperatorGetCeed(op, &ceed));
2572   CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent));
2573   CeedCall(CeedOperatorGetQFunction(op, &qf));
2574 
2575   // Determine active input basis
2576   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2577   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2578   for (CeedInt i = 0; i < num_input_fields; i++) {
2579     CeedVector vec;
2580 
2581     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2582     if (vec == CEED_VECTOR_ACTIVE) {
2583       CeedEvalMode eval_mode;
2584 
2585       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2586       interp = interp || eval_mode == CEED_EVAL_INTERP;
2587       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2588       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2589       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2590     }
2591   }
2592   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2593   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2594   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2595   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2596   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2597   CeedCall(CeedBasisGetDimension(basis, &dim));
2598   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2599   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2600   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2601 
2602   // Build and diagonalize 1D Mass and Laplacian
2603   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2604   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2605   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2606   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2607   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2608   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2609   CeedCall(CeedCalloc(P_1d, &lambda));
2610   // -- Build matrices
2611   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2612   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2613   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2614   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2615 
2616   // -- Diagonalize
2617   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2618   CeedCall(CeedFree(&mass));
2619   CeedCall(CeedFree(&laplace));
2620   for (CeedInt i = 0; i < P_1d; i++) {
2621     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2622   }
2623   CeedCall(CeedFree(&x));
2624 
2625   {
2626     CeedInt             layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2627     CeedScalar          max_norm = 0;
2628     const CeedScalar   *assembled_array, *q_weight_array;
2629     CeedVector          assembled = NULL, q_weight;
2630     CeedElemRestriction rstr_qf   = NULL;
2631 
2632     // Assemble QFunction
2633     CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2634     CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2635     CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2636     CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2637 
2638     // Calculate element averages
2639     CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2640     CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2641     CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2642     CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2643     CeedCall(CeedCalloc(num_elem, &elem_avg));
2644     const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2645 
2646     for (CeedInt e = 0; e < num_elem; e++) {
2647       CeedInt count = 0;
2648 
2649       for (CeedInt q = 0; q < num_qpts; q++) {
2650         for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2651           if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2652             elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2653             count++;
2654           }
2655         }
2656       }
2657       if (count) {
2658         elem_avg[e] /= count;
2659       } else {
2660         elem_avg[e] = 1.0;
2661       }
2662     }
2663     CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2664     CeedCall(CeedVectorDestroy(&assembled));
2665     CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2666     CeedCall(CeedVectorDestroy(&q_weight));
2667   }
2668 
2669   // Build FDM diagonal
2670   {
2671     CeedScalar *q_data_array, *fdm_diagonal;
2672 
2673     CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2674     const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2675     for (CeedInt c = 0; c < num_comp; c++) {
2676       for (CeedInt n = 0; n < num_nodes; n++) {
2677         if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2678         if (grad) {
2679           for (CeedInt d = 0; d < dim; d++) {
2680             CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2681             fdm_diagonal[c * num_nodes + n] += lambda[i];
2682           }
2683         }
2684         if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2685       }
2686     }
2687     CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2688     CeedCall(CeedVectorSetValue(q_data, 0.0));
2689     CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2690     for (CeedInt e = 0; e < num_elem; e++) {
2691       for (CeedInt c = 0; c < num_comp; c++) {
2692         for (CeedInt n = 0; n < num_nodes; n++)
2693           q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2694       }
2695     }
2696     CeedCall(CeedFree(&elem_avg));
2697     CeedCall(CeedFree(&fdm_diagonal));
2698     CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2699   }
2700 
2701   // Setup FDM operator
2702   // -- Basis
2703   {
2704     CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2705 
2706     CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2707     CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2708     CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2709     CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2710     CeedCall(CeedFree(&fdm_interp));
2711     CeedCall(CeedFree(&grad_dummy));
2712     CeedCall(CeedFree(&q_ref_dummy));
2713     CeedCall(CeedFree(&q_weight_dummy));
2714     CeedCall(CeedFree(&lambda));
2715   }
2716 
2717   // -- Restriction
2718   {
2719     CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp};
2720     CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2721   }
2722 
2723   // -- QFunction
2724   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2725   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2726   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2727   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2728   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2729 
2730   // -- QFunction context
2731   {
2732     CeedInt *num_comp_data;
2733 
2734     CeedCall(CeedCalloc(1, &num_comp_data));
2735     num_comp_data[0] = num_comp;
2736     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2737     CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2738   }
2739   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2740   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2741 
2742   // -- Operator
2743   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2744   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2745   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data));
2746   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2747 
2748   // Cleanup
2749   CeedCall(CeedVectorDestroy(&q_data));
2750   CeedCall(CeedBasisDestroy(&fdm_basis));
2751   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2752   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2753   return CEED_ERROR_SUCCESS;
2754 }
2755 
2756 /// @}
2757