xref: /libCEED/interface/ceed-preconditioning.c (revision 123d890ddf70fcda52aa403ec2d78adc317a115c)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   char *source_path_with_name = NULL;
39 
40   // Check if NULL qf passed in
41   if (!qf) return CEED_ERROR_SUCCESS;
42 
43   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
44   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
45 
46   if (qf->source_path) {
47     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
48     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
49     memcpy(source_path_with_name, qf->source_path, path_len);
50     memcpy(&source_path_with_name[path_len], ":", 1);
51     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
52   } else {
53     CeedCall(CeedCalloc(1, &source_path_with_name));
54   }
55 
56   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
57   {
58     CeedQFunctionContext ctx;
59 
60     CeedCall(CeedQFunctionGetContext(qf, &ctx));
61     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
62   }
63   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
64     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
65   }
66   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
67     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
68   }
69   CeedCall(CeedFree(&source_path_with_name));
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   Ceed         ceed_fallback;
84   bool         is_composite;
85   CeedOperator op_fallback;
86 
87   // Check not already created
88   if (op->op_fallback) return CEED_ERROR_SUCCESS;
89 
90   // Fallback Ceed
91   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
92   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
93 
94   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
95   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
96 
97   // Clone Op
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114 
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
117     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
118     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
119     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
120       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
121                                     op->input_fields[i]->vec));
122     }
123     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
124       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
125                                     op->output_fields[i]->vec));
126     }
127     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
128     if (op_fallback->num_qpts == 0) CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     // Cleanup
130     CeedCall(CeedQFunctionDestroy(&qf_fallback));
131     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
133   }
134   CeedCall(CeedOperatorSetName(op_fallback, op->name));
135   CeedCall(CeedOperatorCheckReady(op_fallback));
136   // Note: No ref-counting here so we don't get caught in a reference loop.
137   //       The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback.
138   op->op_fallback                 = op_fallback;
139   op_fallback->op_fallback_parent = op;
140   return CEED_ERROR_SUCCESS;
141 }
142 
143 /**
144   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
145 
146   @param[in]  op          CeedOperator to retrieve fallback for
147   @param[out] op_fallback Fallback CeedOperator
148 
149   @return An error code: 0 - success, otherwise - failure
150 
151   @ref Developer
152 **/
153 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
154   // Create if needed
155   if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op));
156   if (op->op_fallback) {
157     bool is_debug;
158 
159     CeedCall(CeedIsDebug(op->ceed, &is_debug));
160     if (is_debug) {
161       Ceed        ceed, ceed_fallback;
162       const char *resource, *resource_fallback;
163 
164       CeedCall(CeedOperatorGetCeed(op, &ceed));
165       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
166       CeedCall(CeedGetResource(ceed, &resource));
167       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
168 
169       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
170       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
171                 op->op_fallback);
172     }
173   }
174   *op_fallback = op->op_fallback;
175   return CEED_ERROR_SUCCESS;
176 }
177 
178 /**
179   @brief Get the parent CeedOperator for a fallback CeedOperator
180 
181   @param[in]  op     CeedOperator context
182   @param[out] parent Variable to store parent CeedOperator context
183 
184   @return An error code: 0 - success, otherwise - failure
185 
186   @ref Developer
187 **/
188 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) {
189   *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL;
190   return CEED_ERROR_SUCCESS;
191 }
192 
193 /**
194   @brief Get the Ceed context of the parent CeedOperator for a fallback CeedOperator
195 
196   @param[in]  op     CeedOperator context
197   @param[out] parent Variable to store parent Ceed context
198 
199   @return An error code: 0 - success, otherwise - failure
200 
201   @ref Developer
202 **/
203 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) {
204   *parent = op->op_fallback_parent ? op->op_fallback_parent->ceed : op->ceed;
205   return CEED_ERROR_SUCCESS;
206 }
207 
208 /**
209   @brief Select correct basis matrix pointer based on CeedEvalMode
210 
211   @param[in]  basis     CeedBasis from which to get the basis matrix
212   @param[in]  eval_mode Current basis evaluation mode
213   @param[in]  identity  Pointer to identity matrix
214   @param[out] basis_ptr Basis pointer to set
215 
216   @ref Developer
217 **/
218 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
219   switch (eval_mode) {
220     case CEED_EVAL_NONE:
221       *basis_ptr = identity;
222       break;
223     case CEED_EVAL_INTERP:
224       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
225       break;
226     case CEED_EVAL_GRAD:
227       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
228       break;
229     case CEED_EVAL_DIV:
230       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
231       break;
232     case CEED_EVAL_CURL:
233       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
234       break;
235     case CEED_EVAL_WEIGHT:
236       break;  // Caught by QF Assembly
237   }
238   assert(*basis_ptr != NULL);
239   return CEED_ERROR_SUCCESS;
240 }
241 
242 /**
243   @brief Core logic for assembling operator diagonal or point block diagonal
244 
245   @param[in]  op             CeedOperator to assemble point block diagonal
246   @param[in]  request        Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
247   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
248   @param[out] assembled      CeedVector to store assembled diagonal
249 
250   @return An error code: 0 - success, otherwise - failure
251 
252   @ref Developer
253 **/
254 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_point_block, CeedVector assembled) {
255   Ceed ceed;
256   bool is_composite;
257 
258   CeedCall(CeedOperatorGetCeed(op, &ceed));
259   CeedCall(CeedOperatorIsComposite(op, &is_composite));
260   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
261 
262   // Assemble QFunction
263   CeedInt             layout_qf[3];
264   const CeedScalar   *assembled_qf_array;
265   CeedVector          assembled_qf        = NULL;
266   CeedElemRestriction assembled_elem_rstr = NULL;
267 
268   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
269   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf));
270   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
271   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
272 
273   // Get assembly data
274   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
275   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
276   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
277   CeedBasis               *active_bases_in, *active_bases_out;
278   CeedElemRestriction     *active_elem_rstrs_in, *active_elem_rstrs_out;
279   CeedOperatorAssemblyData data;
280 
281   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
282   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
283                                                 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out,
284                                                 &num_output_components));
285   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL));
286   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out));
287 
288   CeedCheck(num_active_bases_in == num_active_bases_out, ceed, CEED_ERROR_UNSUPPORTED,
289             "Cannot assemble operator diagonal with different numbers of input and output active bases");
290 
291   // Loop over all active bases
292   for (CeedInt b = 0; b < num_active_bases_in; b++) {
293     bool                has_eval_none = false;
294     CeedInt             num_elem, num_nodes, num_qpts, num_comp;
295     CeedScalar         *elem_diag_array, *identity = NULL;
296     CeedVector          elem_diag;
297     CeedElemRestriction diag_elem_rstr;
298 
299     CeedCheck(active_elem_rstrs_in[b] == active_elem_rstrs_out[b], ceed, CEED_ERROR_UNSUPPORTED,
300               "Cannot assemble operator diagonal with different input and output active element restrictions");
301 
302     // Assemble point block diagonal restriction, if needed
303     if (is_point_block) {
304       CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b], &diag_elem_rstr));
305     } else {
306       CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b], &diag_elem_rstr));
307     }
308 
309     // Create diagonal vector
310     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
311 
312     // Assemble element operator diagonals
313     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
314     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
315     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
316     CeedCall(CeedBasisGetNumNodes(active_bases_in[b], &num_nodes));
317     CeedCall(CeedBasisGetNumComponents(active_bases_in[b], &num_comp));
318     if (active_bases_in[b] == CEED_BASIS_NONE) num_qpts = num_nodes;
319     else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b], &num_qpts));
320 
321     if (active_bases_in[b] != active_bases_out[b]) {
322       CeedInt num_nodes_out, num_qpts_out, num_comp_out;
323 
324       CeedCall(CeedBasisGetNumNodes(active_bases_out[b], &num_nodes_out));
325       CeedCheck(num_nodes == num_nodes_out, ceed, CEED_ERROR_UNSUPPORTED, "Active input and output bases must have the same number of nodes");
326       CeedCall(CeedBasisGetNumComponents(active_bases_out[b], &num_comp_out));
327       CeedCheck(num_comp == num_comp_out, ceed, CEED_ERROR_UNSUPPORTED, "Active input and output bases must have the same number of components");
328       if (active_bases_out[b] == CEED_BASIS_NONE) num_qpts_out = num_nodes_out;
329       else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_out[b], &num_qpts_out));
330       CeedCheck(num_qpts == num_qpts_out, ceed, CEED_ERROR_UNSUPPORTED,
331                 "Active input and output bases must have the same number of quadrature points");
332     }
333 
334     // Construct identity matrix for basis if required
335     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
336       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
337     }
338     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
339       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
340     }
341     if (has_eval_none) {
342       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
343       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
344     }
345 
346     // Compute the diagonal of B^T D B
347     // Each element
348     for (CeedSize e = 0; e < num_elem; e++) {
349       // Each basis eval mode pair
350       CeedInt      d_out              = 0, q_comp_out;
351       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
352 
353       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
354         CeedInt           d_in              = 0, q_comp_in;
355         const CeedScalar *B_t               = NULL;
356         CeedEvalMode      eval_mode_in_prev = CEED_EVAL_NONE;
357 
358         CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b], eval_modes_out[b][e_out], identity, &B_t));
359         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b], eval_modes_out[b][e_out], &q_comp_out));
360         if (q_comp_out > 1) {
361           if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
362           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
363         }
364         eval_mode_out_prev = eval_modes_out[b][e_out];
365 
366         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
367           const CeedScalar *B = NULL;
368 
369           CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b], eval_modes_in[b][e_in], identity, &B));
370           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b], eval_modes_in[b][e_in], &q_comp_in));
371           if (q_comp_in > 1) {
372             if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
373             else B = &B[(++d_in) * num_qpts * num_nodes];
374           }
375           eval_mode_in_prev = eval_modes_in[b][e_in];
376 
377           // Each component
378           for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
379             // Each qpt/node pair
380             for (CeedInt q = 0; q < num_qpts; q++) {
381               if (is_point_block) {
382                 // Point Block Diagonal
383                 for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
384                   const CeedSize c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
385                   const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
386 
387                   for (CeedInt n = 0; n < num_nodes; n++) {
388                     elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
389                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
390                   }
391                 }
392               } else {
393                 // Diagonal Only
394                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
395                 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
396 
397                 for (CeedInt n = 0; n < num_nodes; n++) {
398                   elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
399                 }
400               }
401             }
402           }
403         }
404       }
405     }
406     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
407 
408     // Assemble local operator diagonal
409     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
410 
411     // Cleanup
412     CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
413     CeedCall(CeedVectorDestroy(&elem_diag));
414     CeedCall(CeedFree(&identity));
415   }
416   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
417   CeedCall(CeedVectorDestroy(&assembled_qf));
418   return CEED_ERROR_SUCCESS;
419 }
420 
421 /**
422   @brief Core logic for assembling composite operator diagonal
423 
424   @param[in]  op             CeedOperator to assemble point block diagonal
425   @param[in]  request        Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
426   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
427   @param[out] assembled      CeedVector to store assembled diagonal
428 
429   @return An error code: 0 - success, otherwise - failure
430 
431   @ref Developer
432 **/
433 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block,
434                                                                  CeedVector assembled) {
435   CeedInt       num_sub;
436   CeedOperator *suboperators;
437 
438   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
439   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
440   for (CeedInt i = 0; i < num_sub; i++) {
441     if (is_point_block) {
442       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
443     } else {
444       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
445     }
446   }
447   return CEED_ERROR_SUCCESS;
448 }
449 
450 /**
451   @brief Build nonzero pattern for non-composite operator
452 
453   Users should generally use CeedOperatorLinearAssembleSymbolic()
454 
455   @param[in]  op     CeedOperator to assemble nonzero pattern
456   @param[in]  offset Offset for number of entries
457   @param[out] rows   Row number for each entry
458   @param[out] cols   Column number for each entry
459 
460   @return An error code: 0 - success, otherwise - failure
461 
462   @ref Developer
463 **/
464 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
465   Ceed                ceed;
466   bool                is_composite;
467   CeedSize            num_nodes_in, num_nodes_out, count = 0;
468   CeedInt             num_elem_in, elem_size_in, num_comp_in, layout_er_in[3];
469   CeedInt             num_elem_out, elem_size_out, num_comp_out, layout_er_out[3], local_num_entries;
470   CeedScalar         *array;
471   const CeedScalar   *elem_dof_a_in, *elem_dof_a_out;
472   CeedVector          index_vec_in, index_vec_out, elem_dof_in, elem_dof_out;
473   CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out;
474 
475   CeedCall(CeedOperatorGetCeed(op, &ceed));
476   CeedCall(CeedOperatorIsComposite(op, &is_composite));
477   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
478 
479   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out));
480   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
481   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
482   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
483   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
484   CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, &layout_er_in));
485 
486   // Determine elem_dof relation for input
487   CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in));
488   CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array));
489   for (CeedInt i = 0; i < num_nodes_in; i++) array[i] = i;
490   CeedCall(CeedVectorRestoreArray(index_vec_in, &array));
491   CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in));
492   CeedCall(CeedVectorSetValue(elem_dof_in, 0.0));
493   CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in));
494   CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE));
495   CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in));
496   CeedCall(CeedVectorDestroy(&index_vec_in));
497   CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in));
498 
499   if (elem_rstr_in != elem_rstr_out) {
500     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
501     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
502               "Active input and output operator restrictions must have the same number of elements");
503     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
504     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
505     CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, &layout_er_out));
506 
507     // Determine elem_dof relation for output
508     CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out));
509     CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array));
510     for (CeedInt i = 0; i < num_nodes_out; i++) array[i] = i;
511     CeedCall(CeedVectorRestoreArray(index_vec_out, &array));
512     CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out));
513     CeedCall(CeedVectorSetValue(elem_dof_out, 0.0));
514     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out));
515     CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE));
516     CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out));
517     CeedCall(CeedVectorDestroy(&index_vec_out));
518     CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out));
519   } else {
520     num_elem_out     = num_elem_in;
521     elem_size_out    = elem_size_in;
522     num_comp_out     = num_comp_in;
523     layout_er_out[0] = layout_er_in[0];
524     layout_er_out[1] = layout_er_in[1];
525     layout_er_out[2] = layout_er_in[2];
526     elem_dof_a_out   = elem_dof_a_in;
527   }
528   local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
529 
530   // Determine i, j locations for element matrices
531   for (CeedInt e = 0; e < num_elem_in; e++) {
532     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
533       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
534         for (CeedInt i = 0; i < elem_size_out; i++) {
535           for (CeedInt j = 0; j < elem_size_in; j++) {
536             const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2];
537             const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2];
538             const CeedInt row                = elem_dof_a_out[elem_dof_index_row];
539             const CeedInt col                = elem_dof_a_in[elem_dof_index_col];
540 
541             rows[offset + count] = row;
542             cols[offset + count] = col;
543             count++;
544           }
545         }
546       }
547     }
548   }
549   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
550   CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in));
551   CeedCall(CeedVectorDestroy(&elem_dof_in));
552   if (elem_rstr_in != elem_rstr_out) {
553     CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out));
554     CeedCall(CeedVectorDestroy(&elem_dof_out));
555   }
556   return CEED_ERROR_SUCCESS;
557 }
558 
559 /**
560   @brief Assemble nonzero entries for non-composite operator
561 
562   Users should generally use CeedOperatorLinearAssemble()
563 
564   @param[in]  op     CeedOperator to assemble
565   @param[in]  offset Offset for number of entries
566   @param[out] values Values to assemble into matrix
567 
568   @return An error code: 0 - success, otherwise - failure
569 
570   @ref Developer
571 **/
572 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
573   Ceed ceed;
574   bool is_composite;
575 
576   CeedCall(CeedOperatorGetCeed(op, &ceed));
577   CeedCall(CeedOperatorIsComposite(op, &is_composite));
578   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
579 
580   // Early exit for empty operator
581   {
582     CeedInt num_elem = 0;
583 
584     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
585     if (num_elem == 0) return CEED_ERROR_SUCCESS;
586   }
587 
588   if (op->LinearAssembleSingle) {
589     // Backend version
590     CeedCall(op->LinearAssembleSingle(op, offset, values));
591     return CEED_ERROR_SUCCESS;
592   } else {
593     // Operator fallback
594     CeedOperator op_fallback;
595 
596     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
597     if (op_fallback) {
598       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
599       return CEED_ERROR_SUCCESS;
600     }
601   }
602 
603   // Assemble QFunction
604   CeedInt             layout_qf[3];
605   const CeedScalar   *assembled_qf_array;
606   CeedVector          assembled_qf        = NULL;
607   CeedElemRestriction assembled_elem_rstr = NULL;
608 
609   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE));
610   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf));
611   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
612   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
613 
614   // Get assembly data
615   CeedInt                  num_elem_in, elem_size_in, num_comp_in, num_qpts_in;
616   CeedInt                  num_elem_out, elem_size_out, num_comp_out, num_qpts_out, local_num_entries;
617   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
618   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
619   CeedBasis               *active_bases_in, *active_bases_out, basis_in, basis_out;
620   const CeedScalar       **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out;
621   CeedElemRestriction      elem_rstr_in, elem_rstr_out;
622   CeedRestrictionType      elem_rstr_type_in, elem_rstr_type_out;
623   const bool              *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL;
624   const CeedInt8          *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL;
625   CeedOperatorAssemblyData data;
626 
627   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
628   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out,
629                                                 &num_eval_modes_out, &eval_modes_out, NULL, NULL));
630 
631   CeedCheck(num_active_bases_in == num_active_bases_out && num_active_bases_in == 1, ceed, CEED_ERROR_UNSUPPORTED,
632             "Cannot assemble operator with multiple active bases");
633   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs");
634 
635   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out));
636   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
637   basis_in  = active_bases_in[0];
638   basis_out = active_bases_out[0];
639   B_mat_in  = B_mats_in[0];
640   B_mat_out = B_mats_out[0];
641 
642   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
643   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
644   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
645   if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in;
646   else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in));
647 
648   CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in));
649   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
650     CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in));
651   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
652     CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in));
653   }
654 
655   if (elem_rstr_in != elem_rstr_out) {
656     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
657     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
658               "Active input and output operator restrictions must have the same number of elements");
659     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
660     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
661     if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out;
662     else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out));
663     CeedCheck(num_qpts_in == num_qpts_out, ceed, CEED_ERROR_UNSUPPORTED,
664               "Active input and output bases must have the same number of quadrature points");
665 
666     CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out));
667     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
668       CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out));
669     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
670       CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out));
671     }
672   } else {
673     num_elem_out  = num_elem_in;
674     elem_size_out = elem_size_in;
675     num_comp_out  = num_comp_in;
676     num_qpts_out  = num_qpts_in;
677 
678     elem_rstr_orients_out      = elem_rstr_orients_in;
679     elem_rstr_curl_orients_out = elem_rstr_curl_orients_in;
680   }
681   local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
682 
683   // Loop over elements and put in data structure
684   // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
685   CeedSize    count = 0;
686   CeedScalar *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL;
687 
688   CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat));
689   CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat));
690   if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b));
691 
692   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
693   for (CeedSize e = 0; e < num_elem_in; e++) {
694     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
695       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
696         // Compute B^T*D
697         for (CeedSize n = 0; n < elem_size_out; n++) {
698           for (CeedSize q = 0; q < num_qpts_in; q++) {
699             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
700               const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in;
701               CeedScalar     sum       = 0.0;
702 
703               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
704                 const CeedSize b_out_index     = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n;
705                 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out;
706                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
707 
708                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
709               }
710               BTD_mat[btd_index] = sum;
711             }
712           }
713         }
714 
715         // Form element matrix itself (for each block component)
716         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size_out, elem_size_in, num_qpts_in * num_eval_modes_in[0]));
717 
718         // Transform the element matrix if required
719         if (elem_rstr_orients_out) {
720           const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out];
721 
722           for (CeedInt i = 0; i < elem_size_out; i++) {
723             const double orient = elem_orients[i] ? -1.0 : 1.0;
724 
725             for (CeedInt j = 0; j < elem_size_in; j++) {
726               elem_mat[i * elem_size_in + j] *= orient;
727             }
728           }
729         } else if (elem_rstr_curl_orients_out) {
730           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out];
731 
732           // T^T*(B^T*D*B)
733           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
734           for (CeedInt i = 0; i < elem_size_out; i++) {
735             for (CeedInt j = 0; j < elem_size_in; j++) {
736               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] +
737                                                (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) +
738                                                (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0);
739             }
740           }
741         }
742         if (elem_rstr_orients_in) {
743           const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in];
744 
745           for (CeedInt i = 0; i < elem_size_out; i++) {
746             for (CeedInt j = 0; j < elem_size_in; j++) {
747               elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0;
748             }
749           }
750         } else if (elem_rstr_curl_orients_in) {
751           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in];
752 
753           // (B^T*D*B)*T
754           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
755           for (CeedInt i = 0; i < elem_size_out; i++) {
756             for (CeedInt j = 0; j < elem_size_in; j++) {
757               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] +
758                                                (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) +
759                                                (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0);
760             }
761           }
762         }
763 
764         // Put element matrix in coordinate data structure
765         for (CeedInt i = 0; i < elem_size_out; i++) {
766           for (CeedInt j = 0; j < elem_size_in; j++) {
767             vals[offset + count] = elem_mat[i * elem_size_in + j];
768             count++;
769           }
770         }
771       }
772     }
773   }
774   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries");
775   CeedCall(CeedVectorRestoreArray(values, &vals));
776 
777   // Cleanup
778   CeedCall(CeedFree(&BTD_mat));
779   CeedCall(CeedFree(&elem_mat));
780   CeedCall(CeedFree(&elem_mat_b));
781   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
782     CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in));
783   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
784     CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in));
785   }
786   if (elem_rstr_in != elem_rstr_out) {
787     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
788       CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out));
789     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
790       CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out));
791     }
792   }
793   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
794   CeedCall(CeedVectorDestroy(&assembled_qf));
795   return CEED_ERROR_SUCCESS;
796 }
797 
798 /**
799   @brief Count number of entries for assembled CeedOperator
800 
801   @param[in]  op          CeedOperator to assemble
802   @param[out] num_entries Number of entries in assembled representation
803 
804   @return An error code: 0 - success, otherwise - failure
805 
806   @ref Utility
807 **/
808 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) {
809   bool                is_composite;
810   CeedInt             num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out;
811   CeedElemRestriction rstr_in, rstr_out;
812 
813   CeedCall(CeedOperatorIsComposite(op, &is_composite));
814   CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
815 
816   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out));
817   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in));
818   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in));
819   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
820   if (rstr_in != rstr_out) {
821     CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out));
822     CeedCheck(num_elem_in == num_elem_out, op->ceed, CEED_ERROR_UNSUPPORTED,
823               "Active input and output operator restrictions must have the same number of elements");
824     CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out));
825     CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
826   } else {
827     num_elem_out  = num_elem_in;
828     elem_size_out = elem_size_in;
829     num_comp_out  = num_comp_in;
830   }
831   *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in;
832   return CEED_ERROR_SUCCESS;
833 }
834 
835 /**
836   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
837 
838   @param[in]  op_fine      Fine grid operator
839   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
840   @param[in]  rstr_coarse  Coarse grid restriction
841   @param[in]  basis_coarse Coarse grid active vector basis
842   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
843   @param[out] op_coarse    Coarse grid operator
844   @param[out] op_prolong   Coarse to fine operator, or NULL
845   @param[out] op_restrict  Fine to coarse operator, or NULL
846 
847   @return An error code: 0 - success, otherwise - failure
848 
849   @ref Developer
850 **/
851 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
852                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
853   bool                is_composite;
854   Ceed                ceed;
855   CeedInt             num_comp;
856   CeedVector          mult_vec         = NULL;
857   CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL;
858 
859   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
860 
861   // Check for composite operator
862   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
863   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
864 
865   // Coarse Grid
866   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
867   // -- Clone input fields
868   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
869     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
870       rstr_fine = op_fine->input_fields[i]->elem_rstr;
871       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
872     } else {
873       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
874                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
875     }
876   }
877   // -- Clone output fields
878   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
879     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
880       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
881     } else {
882       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
883                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
884     }
885   }
886   // -- Clone QFunctionAssemblyData
887   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
888 
889   // Multiplicity vector
890   if (op_restrict || op_prolong) {
891     CeedVector          mult_e_vec;
892     CeedRestrictionType rstr_type;
893 
894     CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type));
895     CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED,
896               "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported");
897     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
898     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
899     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
900     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
901     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
902     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
903     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
904     CeedCall(CeedVectorDestroy(&mult_e_vec));
905     CeedCall(CeedVectorReciprocal(mult_vec));
906   }
907 
908   // Clone name
909   bool   has_name = op_fine->name;
910   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
911   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
912 
913   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
914   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
915             "Prolongation or restriction operator creation requires coarse-to-fine basis");
916 
917   // Restriction/Prolongation Operators
918   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
919 
920   // Restriction
921   if (op_restrict) {
922     CeedInt             *num_comp_r_data;
923     CeedQFunctionContext ctx_r;
924     CeedQFunction        qf_restrict;
925 
926     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
927     CeedCall(CeedCalloc(1, &num_comp_r_data));
928     num_comp_r_data[0] = num_comp;
929     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
930     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
931     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
932     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
933     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
934     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
935     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
936     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
937 
938     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
939     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
940     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
941     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
942 
943     // Set name
944     char *restriction_name;
945 
946     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
947     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
948     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
949     CeedCall(CeedFree(&restriction_name));
950 
951     // Check
952     CeedCall(CeedOperatorCheckReady(*op_restrict));
953 
954     // Cleanup
955     CeedCall(CeedQFunctionDestroy(&qf_restrict));
956   }
957 
958   // Prolongation
959   if (op_prolong) {
960     CeedInt             *num_comp_p_data;
961     CeedQFunctionContext ctx_p;
962     CeedQFunction        qf_prolong;
963 
964     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
965     CeedCall(CeedCalloc(1, &num_comp_p_data));
966     num_comp_p_data[0] = num_comp;
967     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
968     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
969     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
970     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
971     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
972     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
973     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
974     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
975 
976     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
977     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
978     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
979     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
980 
981     // Set name
982     char *prolongation_name;
983 
984     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
985     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
986     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
987     CeedCall(CeedFree(&prolongation_name));
988 
989     // Check
990     CeedCall(CeedOperatorCheckReady(*op_prolong));
991 
992     // Cleanup
993     CeedCall(CeedQFunctionDestroy(&qf_prolong));
994   }
995 
996   // Check
997   CeedCall(CeedOperatorCheckReady(*op_coarse));
998 
999   // Cleanup
1000   CeedCall(CeedVectorDestroy(&mult_vec));
1001   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
1002   CeedCall(CeedBasisDestroy(&basis_c_to_f));
1003   return CEED_ERROR_SUCCESS;
1004 }
1005 
1006 /**
1007   @brief Build 1D mass matrix and Laplacian with perturbation
1008 
1009   @param[in]  interp_1d   Interpolation matrix in one dimension
1010   @param[in]  grad_1d     Gradient matrix in one dimension
1011   @param[in]  q_weight_1d Quadrature weights in one dimension
1012   @param[in]  P_1d        Number of basis nodes in one dimension
1013   @param[in]  Q_1d        Number of quadrature points in one dimension
1014   @param[in]  dim         Dimension of basis
1015   @param[out] mass        Assembled mass matrix in one dimension
1016   @param[out] laplace     Assembled perturbed Laplacian in one dimension
1017 
1018   @return An error code: 0 - success, otherwise - failure
1019 
1020   @ref Developer
1021 **/
1022 CeedPragmaOptimizeOff
1023 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
1024                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
1025   for (CeedInt i = 0; i < P_1d; i++) {
1026     for (CeedInt j = 0; j < P_1d; j++) {
1027       CeedScalar sum = 0.0;
1028       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
1029       mass[i + j * P_1d] = sum;
1030     }
1031   }
1032   // -- Laplacian
1033   for (CeedInt i = 0; i < P_1d; i++) {
1034     for (CeedInt j = 0; j < P_1d; j++) {
1035       CeedScalar sum = 0.0;
1036 
1037       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
1038       laplace[i + j * P_1d] = sum;
1039     }
1040   }
1041   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
1042   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
1043   return CEED_ERROR_SUCCESS;
1044 }
1045 CeedPragmaOptimizeOn
1046 
1047 /// @}
1048 
1049 /// ----------------------------------------------------------------------------
1050 /// CeedOperator Backend API
1051 /// ----------------------------------------------------------------------------
1052 /// @addtogroup CeedOperatorBackend
1053 /// @{
1054 
1055 /**
1056   @brief Create point block restriction for active operator field
1057 
1058   @param[in]  rstr             Original CeedElemRestriction for active field
1059   @param[out] point_block_rstr Address of the variable where the newly created CeedElemRestriction will be stored
1060 
1061   @return An error code: 0 - success, otherwise - failure
1062 
1063   @ref Backend
1064 **/
1065 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) {
1066   Ceed           ceed;
1067   CeedInt        num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets;
1068   CeedSize       l_size;
1069   const CeedInt *offsets;
1070 
1071   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
1072   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
1073 
1074   // Expand offsets
1075   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
1076   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
1077   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
1078   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
1079   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
1080   shift = num_comp;
1081   if (comp_stride != 1) shift *= num_comp;
1082   CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets));
1083   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
1084     point_block_offsets[i] = offsets[i] * shift;
1085   }
1086 
1087   // Create new restriction
1088   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
1089                                      point_block_offsets, point_block_rstr));
1090 
1091   // Cleanup
1092   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
1093   return CEED_ERROR_SUCCESS;
1094 }
1095 
1096 /**
1097   @brief Create object holding CeedQFunction assembly data for CeedOperator
1098 
1099   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
1100   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
1101 
1102   @return An error code: 0 - success, otherwise - failure
1103 
1104   @ref Backend
1105 **/
1106 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
1107   CeedCall(CeedCalloc(1, data));
1108   (*data)->ref_count = 1;
1109   (*data)->ceed      = ceed;
1110   CeedCall(CeedReference(ceed));
1111   return CEED_ERROR_SUCCESS;
1112 }
1113 
1114 /**
1115   @brief Increment the reference counter for a CeedQFunctionAssemblyData
1116 
1117   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
1118 
1119   @return An error code: 0 - success, otherwise - failure
1120 
1121   @ref Backend
1122 **/
1123 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1124   data->ref_count++;
1125   return CEED_ERROR_SUCCESS;
1126 }
1127 
1128 /**
1129   @brief Set re-use of CeedQFunctionAssemblyData
1130 
1131   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
1132   @param[in]     reuse_data Boolean flag indicating data re-use
1133 
1134   @return An error code: 0 - success, otherwise - failure
1135 
1136   @ref Backend
1137 **/
1138 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
1139   data->reuse_data        = reuse_data;
1140   data->needs_data_update = true;
1141   return CEED_ERROR_SUCCESS;
1142 }
1143 
1144 /**
1145   @brief Mark QFunctionAssemblyData as stale
1146 
1147   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
1148   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
1149 
1150   @return An error code: 0 - success, otherwise - failure
1151 
1152   @ref Backend
1153 **/
1154 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
1155   data->needs_data_update = needs_data_update;
1156   return CEED_ERROR_SUCCESS;
1157 }
1158 
1159 /**
1160   @brief Determine if QFunctionAssemblyData needs update
1161 
1162   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
1163   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1164 
1165   @return An error code: 0 - success, otherwise - failure
1166 
1167   @ref Backend
1168 **/
1169 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1170   *is_update_needed = !data->reuse_data || data->needs_data_update;
1171   return CEED_ERROR_SUCCESS;
1172 }
1173 
1174 /**
1175   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1176 
1177   Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1178 
1179   Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
1180         CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
1181         CeedQFunctionAssemblyData.
1182 
1183   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1184   @param[in,out] data_copy Variable to store copied reference
1185 
1186   @return An error code: 0 - success, otherwise - failure
1187 
1188   @ref Backend
1189 **/
1190 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1191   CeedCall(CeedQFunctionAssemblyDataReference(data));
1192   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1193   *data_copy = data;
1194   return CEED_ERROR_SUCCESS;
1195 }
1196 
1197 /**
1198   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1199 
1200   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1201   @param[out] is_setup Boolean flag for setup status
1202 
1203   @return An error code: 0 - success, otherwise - failure
1204 
1205   @ref Backend
1206 **/
1207 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1208   *is_setup = data->is_setup;
1209   return CEED_ERROR_SUCCESS;
1210 }
1211 
1212 /**
1213   @brief Set internal objects for CeedQFunctionAssemblyData
1214 
1215   @param[in,out] data CeedQFunctionAssemblyData to set objects
1216   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1217   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1218 
1219   @return An error code: 0 - success, otherwise - failure
1220 
1221   @ref Backend
1222 **/
1223 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1224   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1225   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1226 
1227   data->is_setup = true;
1228   return CEED_ERROR_SUCCESS;
1229 }
1230 
1231 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1232   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1233 
1234   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1235   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1236   return CEED_ERROR_SUCCESS;
1237 }
1238 
1239 /**
1240   @brief Destroy CeedQFunctionAssemblyData
1241 
1242   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1243 
1244   @return An error code: 0 - success, otherwise - failure
1245 
1246   @ref Backend
1247 **/
1248 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1249   if (!*data || --(*data)->ref_count > 0) {
1250     *data = NULL;
1251     return CEED_ERROR_SUCCESS;
1252   }
1253   CeedCall(CeedDestroy(&(*data)->ceed));
1254   CeedCall(CeedVectorDestroy(&(*data)->vec));
1255   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1256 
1257   CeedCall(CeedFree(data));
1258   return CEED_ERROR_SUCCESS;
1259 }
1260 
1261 /**
1262   @brief Get CeedOperatorAssemblyData
1263 
1264   @param[in]  op   CeedOperator to assemble
1265   @param[out] data CeedQFunctionAssemblyData
1266 
1267   @return An error code: 0 - success, otherwise - failure
1268 
1269   @ref Backend
1270 **/
1271 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1272   if (!op->op_assembled) {
1273     CeedOperatorAssemblyData data;
1274 
1275     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1276     op->op_assembled = data;
1277   }
1278   *data = op->op_assembled;
1279   return CEED_ERROR_SUCCESS;
1280 }
1281 
1282 /**
1283   @brief Create object holding CeedOperator assembly data.
1284 
1285   The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1286   An array with references to the corresponding active CeedElemRestrictions is also stored.
1287   For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1288   The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1289 CeedEvalMode.
1290   The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1291   Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1292 
1293   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1294   @param[in]  op   CeedOperator to be assembled
1295   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1296 
1297   @return An error code: 0 - success, otherwise - failure
1298 
1299   @ref Backend
1300 **/
1301 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1302   CeedInt             num_active_bases_in = 0, num_active_bases_out = 0, offset = 0;
1303   CeedInt             num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL;
1304   CeedSize          **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1305   CeedEvalMode      **eval_modes_in = NULL, **eval_modes_out = NULL;
1306   CeedQFunctionField *qf_fields;
1307   CeedQFunction       qf;
1308   CeedOperatorField  *op_fields;
1309   bool                is_composite;
1310 
1311   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1312   CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators.");
1313 
1314   // Allocate
1315   CeedCall(CeedCalloc(1, data));
1316   (*data)->ceed = ceed;
1317   CeedCall(CeedReference(ceed));
1318 
1319   // Build OperatorAssembly data
1320   CeedCall(CeedOperatorGetQFunction(op, &qf));
1321   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1322   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1323 
1324   // Determine active input basis
1325   for (CeedInt i = 0; i < num_input_fields; i++) {
1326     CeedVector vec;
1327 
1328     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1329     if (vec == CEED_VECTOR_ACTIVE) {
1330       CeedInt      index = -1, num_comp, q_comp;
1331       CeedEvalMode eval_mode;
1332       CeedBasis    basis_in = NULL;
1333 
1334       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1335       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1336       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1337       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1338       for (CeedInt i = 0; i < num_active_bases_in; i++) {
1339         if ((*data)->active_bases_in[i] == basis_in) index = i;
1340       }
1341       if (index == -1) {
1342         CeedElemRestriction elem_rstr_in;
1343 
1344         index = num_active_bases_in;
1345         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in));
1346         (*data)->active_bases_in[num_active_bases_in] = NULL;
1347         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in]));
1348         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in));
1349         (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL;
1350         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1351         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in]));
1352         CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in));
1353         num_eval_modes_in[index] = 0;
1354         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in));
1355         eval_modes_in[index] = NULL;
1356         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in));
1357         eval_mode_offsets_in[index] = NULL;
1358         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in));
1359         (*data)->assembled_bases_in[index] = NULL;
1360         num_active_bases_in++;
1361       }
1362       if (eval_mode != CEED_EVAL_WEIGHT) {
1363         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1364         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1365         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1366         for (CeedInt d = 0; d < q_comp; d++) {
1367           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1368           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1369           offset += num_comp;
1370         }
1371         num_eval_modes_in[index] += q_comp;
1372       }
1373     }
1374   }
1375 
1376   // Determine active output basis
1377   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1378   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1379   offset = 0;
1380   for (CeedInt i = 0; i < num_output_fields; i++) {
1381     CeedVector vec;
1382 
1383     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1384     if (vec == CEED_VECTOR_ACTIVE) {
1385       CeedInt      index = -1, num_comp, q_comp;
1386       CeedEvalMode eval_mode;
1387       CeedBasis    basis_out = NULL;
1388 
1389       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1390       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1391       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1392       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1393       for (CeedInt i = 0; i < num_active_bases_out; i++) {
1394         if ((*data)->active_bases_out[i] == basis_out) index = i;
1395       }
1396       if (index == -1) {
1397         CeedElemRestriction elem_rstr_out;
1398 
1399         index = num_active_bases_out;
1400         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out));
1401         (*data)->active_bases_out[num_active_bases_out] = NULL;
1402         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out]));
1403         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out));
1404         (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL;
1405         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1406         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out]));
1407         CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out));
1408         num_eval_modes_out[index] = 0;
1409         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out));
1410         eval_modes_out[index] = NULL;
1411         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out));
1412         eval_mode_offsets_out[index] = NULL;
1413         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out));
1414         (*data)->assembled_bases_out[index] = NULL;
1415         num_active_bases_out++;
1416       }
1417       if (eval_mode != CEED_EVAL_WEIGHT) {
1418         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1419         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1420         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1421         for (CeedInt d = 0; d < q_comp; d++) {
1422           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1423           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1424           offset += num_comp;
1425         }
1426         num_eval_modes_out[index] += q_comp;
1427       }
1428     }
1429   }
1430   (*data)->num_active_bases_in   = num_active_bases_in;
1431   (*data)->num_eval_modes_in     = num_eval_modes_in;
1432   (*data)->eval_modes_in         = eval_modes_in;
1433   (*data)->eval_mode_offsets_in  = eval_mode_offsets_in;
1434   (*data)->num_active_bases_out  = num_active_bases_out;
1435   (*data)->num_eval_modes_out    = num_eval_modes_out;
1436   (*data)->eval_modes_out        = eval_modes_out;
1437   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1438   (*data)->num_output_components = offset;
1439   return CEED_ERROR_SUCCESS;
1440 }
1441 
1442 /**
1443   @brief Get CeedOperator CeedEvalModes for assembly.
1444 
1445   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1446 
1447   @param[in]  data                  CeedOperatorAssemblyData
1448   @param[out] num_active_bases_in   Total number of active bases for input
1449   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1450                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1451   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1452   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1453   @param[out] num_active_bases_out  Total number of active bases for output
1454   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1455   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1456   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1457   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1458                                       including contributions of all active bases
1459 
1460   @return An error code: 0 - success, otherwise - failure
1461 
1462   @ref Backend
1463 **/
1464 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in,
1465                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out,
1466                                          CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out,
1467                                          CeedSize *num_output_components) {
1468   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1469   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1470   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1471   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1472   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1473   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1474   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1475   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1476   if (num_output_components) *num_output_components = data->num_output_components;
1477   return CEED_ERROR_SUCCESS;
1478 }
1479 
1480 /**
1481   @brief Get CeedOperator CeedBasis data for assembly.
1482 
1483   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1484 
1485   @param[in]  data                 CeedOperatorAssemblyData
1486   @param[out] num_active_bases_in  Number of active input bases, or NULL
1487   @param[out] active_bases_in      Pointer to hold active input CeedBasis, or NULL
1488   @param[out] assembled_bases_in   Pointer to hold assembled active input B, or NULL
1489   @param[out] num_active_bases_out Number of active output bases, or NULL
1490   @param[out] active_bases_out     Pointer to hold active output CeedBasis, or NULL
1491   @param[out] assembled_bases_out  Pointer to hold assembled active output B, or NULL
1492 
1493   @return An error code: 0 - success, otherwise - failure
1494 
1495   @ref Backend
1496 **/
1497 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in,
1498                                      const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out,
1499                                      const CeedScalar ***assembled_bases_out) {
1500   // Assemble B_in, B_out if needed
1501   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1502     CeedInt num_qpts;
1503 
1504     if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts));
1505     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts));
1506     for (CeedInt b = 0; b < data->num_active_bases_in; b++) {
1507       bool        has_eval_none = false;
1508       CeedInt     num_nodes;
1509       CeedScalar *B_in = NULL, *identity = NULL;
1510 
1511       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes));
1512       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1513 
1514       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1515         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1516       }
1517       if (has_eval_none) {
1518         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1519         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1520           identity[i * num_nodes + i] = 1.0;
1521         }
1522       }
1523 
1524       for (CeedInt q = 0; q < num_qpts; q++) {
1525         for (CeedInt n = 0; n < num_nodes; n++) {
1526           CeedInt      d_in              = 0, q_comp_in;
1527           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1528 
1529           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1530             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1531             const CeedScalar *B  = NULL;
1532 
1533             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B));
1534             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in));
1535             if (q_comp_in > 1) {
1536               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1537               else B = &B[(++d_in) * num_qpts * num_nodes];
1538             }
1539             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1540             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1541           }
1542         }
1543       }
1544       if (identity) CeedCall(CeedFree(&identity));
1545       data->assembled_bases_in[b] = B_in;
1546     }
1547   }
1548 
1549   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1550     CeedInt num_qpts;
1551 
1552     if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts));
1553     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts));
1554     for (CeedInt b = 0; b < data->num_active_bases_out; b++) {
1555       bool        has_eval_none = false;
1556       CeedInt     num_nodes;
1557       CeedScalar *B_out = NULL, *identity = NULL;
1558 
1559       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes));
1560       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1561 
1562       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1563         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1564       }
1565       if (has_eval_none) {
1566         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1567         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1568           identity[i * num_nodes + i] = 1.0;
1569         }
1570       }
1571 
1572       for (CeedInt q = 0; q < num_qpts; q++) {
1573         for (CeedInt n = 0; n < num_nodes; n++) {
1574           CeedInt      d_out              = 0, q_comp_out;
1575           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1576 
1577           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1578             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1579             const CeedScalar *B  = NULL;
1580 
1581             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B));
1582             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out));
1583             if (q_comp_out > 1) {
1584               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1585               else B = &B[(++d_out) * num_qpts * num_nodes];
1586             }
1587             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1588             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1589           }
1590         }
1591       }
1592       if (identity) CeedCall(CeedFree(&identity));
1593       data->assembled_bases_out[b] = B_out;
1594     }
1595   }
1596 
1597   // Pass out assembled data
1598   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1599   if (active_bases_in) *active_bases_in = data->active_bases_in;
1600   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1601   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1602   if (active_bases_out) *active_bases_out = data->active_bases_out;
1603   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1604   return CEED_ERROR_SUCCESS;
1605 }
1606 
1607 /**
1608   @brief Get CeedOperator CeedBasis data for assembly.
1609 
1610   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1611 
1612   @param[in]  data                      CeedOperatorAssemblyData
1613   @param[out] num_active_elem_rstrs_in  Number of active input element restrictions, or NULL
1614   @param[out] active_elem_rstrs_in      Pointer to hold active input CeedElemRestrictions, or NULL
1615   @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or NULL
1616   @param[out] active_elem_rstrs_out     Pointer to hold active output CeedElemRestrictions, or NULL
1617 
1618   @return An error code: 0 - success, otherwise - failure
1619 
1620   @ref Backend
1621 **/
1622 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in,
1623                                                 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out,
1624                                                 CeedElemRestriction **active_elem_rstrs_out) {
1625   if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in;
1626   if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in;
1627   if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out;
1628   if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out;
1629   return CEED_ERROR_SUCCESS;
1630 }
1631 
1632 /**
1633   @brief Destroy CeedOperatorAssemblyData
1634 
1635   @param[in,out] data CeedOperatorAssemblyData to destroy
1636 
1637   @return An error code: 0 - success, otherwise - failure
1638 
1639   @ref Backend
1640 **/
1641 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1642   if (!*data) {
1643     *data = NULL;
1644     return CEED_ERROR_SUCCESS;
1645   }
1646   CeedCall(CeedDestroy(&(*data)->ceed));
1647   for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) {
1648     CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b]));
1649     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b]));
1650     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1651     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1652     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1653   }
1654   for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) {
1655     CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b]));
1656     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b]));
1657     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1658     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1659     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1660   }
1661   CeedCall(CeedFree(&(*data)->active_bases_in));
1662   CeedCall(CeedFree(&(*data)->active_bases_out));
1663   CeedCall(CeedFree(&(*data)->active_elem_rstrs_in));
1664   CeedCall(CeedFree(&(*data)->active_elem_rstrs_out));
1665   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1666   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1667   CeedCall(CeedFree(&(*data)->eval_modes_in));
1668   CeedCall(CeedFree(&(*data)->eval_modes_out));
1669   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1670   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1671   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1672   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1673 
1674   CeedCall(CeedFree(data));
1675   return CEED_ERROR_SUCCESS;
1676 }
1677 
1678 /// @}
1679 
1680 /// ----------------------------------------------------------------------------
1681 /// CeedOperator Public API
1682 /// ----------------------------------------------------------------------------
1683 /// @addtogroup CeedOperatorUser
1684 /// @{
1685 
1686 /**
1687   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1688 
1689   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1690   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1691 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1692 
1693   Inputs and outputs are in the order provided by the user when adding CeedOperator fields.
1694   For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction
1695 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1696 
1697   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1698 
1699   @param[in]  op        CeedOperator to assemble CeedQFunction
1700   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1701   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1702   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1703 
1704   @return An error code: 0 - success, otherwise - failure
1705 
1706   @ref User
1707 **/
1708 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1709   CeedCall(CeedOperatorCheckReady(op));
1710 
1711   if (op->LinearAssembleQFunction) {
1712     // Backend version
1713     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1714   } else {
1715     // Operator fallback
1716     CeedOperator op_fallback;
1717 
1718     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1719     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1720     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1721   }
1722   return CEED_ERROR_SUCCESS;
1723 }
1724 
1725 /**
1726   @brief Assemble CeedQFunction and store result internally.
1727 
1728   Return copied references of stored data to the caller.
1729   Caller is responsible for ownership and destruction of the copied references.
1730   See also @ref CeedOperatorLinearAssembleQFunction
1731 
1732   Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers.
1733         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
1734 
1735   @param[in]  op        CeedOperator to assemble CeedQFunction
1736   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1737   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1738   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1739 
1740   @return An error code: 0 - success, otherwise - failure
1741 
1742   @ref User
1743 **/
1744 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1745   int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL;
1746   CeedOperator op_assemble                                                                           = NULL;
1747   CeedOperator op_fallback_parent                                                                    = NULL;
1748 
1749   CeedCall(CeedOperatorCheckReady(op));
1750 
1751   // Determine if fallback parent or operator has implementation
1752   CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent));
1753   if (op_fallback_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) {
1754     // -- Backend version for op fallback parent is faster, if it exists
1755     LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate;
1756     op_assemble                   = op_fallback_parent;
1757   } else if (op->LinearAssembleQFunctionUpdate) {
1758     // -- Backend version for op
1759     LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate;
1760     op_assemble                   = op;
1761   }
1762 
1763   // Assemble QFunction
1764   if (LinearAssembleQFunctionUpdate) {
1765     // Backend or fallback parent version
1766     bool                qf_assembled_is_setup;
1767     CeedVector          assembled_vec  = NULL;
1768     CeedElemRestriction assembled_rstr = NULL;
1769 
1770     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1771     if (qf_assembled_is_setup) {
1772       bool update_needed;
1773 
1774       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1775       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1776       if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request));
1777     } else {
1778       CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request));
1779       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1780     }
1781     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1782 
1783     // Copy reference from internally held copy
1784     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1785     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1786     CeedCall(CeedVectorDestroy(&assembled_vec));
1787     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1788   } else {
1789     // Operator fallback
1790     CeedOperator op_fallback;
1791 
1792     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1793     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1794     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1795   }
1796   return CEED_ERROR_SUCCESS;
1797 }
1798 
1799 /**
1800   @brief Assemble the diagonal of a square linear CeedOperator
1801 
1802   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1803 
1804   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1805 
1806   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1807 
1808   @param[in]  op        CeedOperator to assemble CeedQFunction
1809   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1810   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1811 
1812   @return An error code: 0 - success, otherwise - failure
1813 
1814   @ref User
1815 **/
1816 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1817   bool     is_composite;
1818   CeedSize input_size = 0, output_size = 0;
1819 
1820   CeedCall(CeedOperatorCheckReady(op));
1821   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1822 
1823   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1824   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1825 
1826   // Early exit for empty operator
1827   if (!is_composite) {
1828     CeedInt num_elem = 0;
1829 
1830     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1831     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1832   }
1833 
1834   if (op->LinearAssembleDiagonal) {
1835     // Backend version
1836     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1837     return CEED_ERROR_SUCCESS;
1838   } else if (op->LinearAssembleAddDiagonal) {
1839     // Backend version with zeroing first
1840     CeedCall(CeedVectorSetValue(assembled, 0.0));
1841     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1842     return CEED_ERROR_SUCCESS;
1843   } else {
1844     // Operator fallback
1845     CeedOperator op_fallback;
1846 
1847     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1848     if (op_fallback) {
1849       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1850       return CEED_ERROR_SUCCESS;
1851     }
1852   }
1853   // Default interface implementation
1854   CeedCall(CeedVectorSetValue(assembled, 0.0));
1855   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1856   return CEED_ERROR_SUCCESS;
1857 }
1858 
1859 /**
1860   @brief Assemble the diagonal of a square linear CeedOperator
1861 
1862   This sums into a CeedVector the diagonal of a linear CeedOperator.
1863 
1864   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1865 
1866   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1867 
1868   @param[in]  op        CeedOperator to assemble CeedQFunction
1869   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1870   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1871 
1872   @return An error code: 0 - success, otherwise - failure
1873 
1874   @ref User
1875 **/
1876 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1877   bool     is_composite;
1878   CeedSize input_size = 0, output_size = 0;
1879 
1880   CeedCall(CeedOperatorCheckReady(op));
1881   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1882 
1883   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1884   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1885 
1886   // Early exit for empty operator
1887   if (!is_composite) {
1888     CeedInt num_elem = 0;
1889 
1890     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1891     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1892   }
1893 
1894   if (op->LinearAssembleAddDiagonal) {
1895     // Backend version
1896     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1897     return CEED_ERROR_SUCCESS;
1898   } else {
1899     // Operator fallback
1900     CeedOperator op_fallback;
1901 
1902     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1903     if (op_fallback) {
1904       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1905       return CEED_ERROR_SUCCESS;
1906     }
1907   }
1908   // Default interface implementation
1909   if (is_composite) {
1910     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1911   } else {
1912     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1913   }
1914   return CEED_ERROR_SUCCESS;
1915 }
1916 
1917 /**
1918    @brief Fully assemble the point-block diagonal pattern of a linear operator.
1919 
1920    Expected to be used in conjunction with CeedOperatorLinearAssemblePointBlockDiagonal().
1921 
1922    The assembly routines use coordinate format, with `num_entries` tuples of the form (i, j, value) which indicate that value should be added to the
1923 matrix in entry (i, j).
1924   Note that the (i, j) pairs are unique.
1925   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in
1926 the same ordering.
1927 
1928    This will generally be slow unless your operator is low-order.
1929 
1930    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1931 
1932    @param[in]  op          CeedOperator to assemble
1933    @param[out] num_entries Number of entries in coordinate nonzero pattern
1934    @param[out] rows        Row number for each entry
1935    @param[out] cols        Column number for each entry
1936 
1937    @ref User
1938 **/
1939 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1940   Ceed          ceed;
1941   bool          is_composite;
1942   CeedInt       num_active_components, num_sub_operators;
1943   CeedOperator *sub_operators;
1944 
1945   CeedCall(CeedOperatorGetCeed(op, &ceed));
1946   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1947 
1948   CeedSize input_size = 0, output_size = 0;
1949   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1950   CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1951 
1952   if (is_composite) {
1953     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_operators));
1954     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1955   } else {
1956     sub_operators     = &op;
1957     num_sub_operators = 1;
1958   }
1959 
1960   // Verify operator can be assembled correctly
1961   {
1962     CeedOperatorAssemblyData data;
1963     CeedInt                  num_active_elem_rstrs, comp_stride;
1964     CeedElemRestriction     *active_elem_rstrs;
1965 
1966     // Get initial values to check against
1967     CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data));
1968     CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
1969     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride));
1970     CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components));
1971 
1972     // Verify that all active element restrictions have same component stride and number of components
1973     for (CeedInt k = 0; k < num_sub_operators; k++) {
1974       CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data));
1975       CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
1976       for (CeedInt i = 0; i < num_active_elem_rstrs; i++) {
1977         CeedInt comp_stride_sub, num_active_components_sub;
1978 
1979         CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub));
1980         CeedCheck(comp_stride == comp_stride_sub, ceed, CEED_ERROR_DIMENSION,
1981                   "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub);
1982         CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub));
1983         CeedCheck(num_active_components == num_active_components_sub, ceed, CEED_ERROR_INCOMPATIBLE,
1984                   "All suboperators must have the same number of output components");
1985       }
1986     }
1987   }
1988   *num_entries = input_size * num_active_components;
1989   CeedCall(CeedCalloc(*num_entries, rows));
1990   CeedCall(CeedCalloc(*num_entries, cols));
1991 
1992   for (CeedInt o = 0; o < num_sub_operators; o++) {
1993     CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr;
1994     CeedInt             comp_stride, num_elem, elem_size;
1995     const CeedInt      *offsets, *point_block_offsets;
1996 
1997     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr));
1998     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride));
1999     CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem));
2000     CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size));
2001     CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets));
2002 
2003     CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr));
2004     CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets));
2005 
2006     for (CeedSize i = 0; i < num_elem * elem_size; i++) {
2007       for (CeedInt c_out = 0; c_out < num_active_components; c_out++) {
2008         for (CeedInt c_in = 0; c_in < num_active_components; c_in++) {
2009           (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride;
2010           (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride;
2011         }
2012       }
2013     }
2014 
2015     CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets));
2016     CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets));
2017     CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr));
2018   }
2019   return CEED_ERROR_SUCCESS;
2020 }
2021 
2022 /**
2023   @brief Assemble the point block diagonal of a square linear CeedOperator
2024 
2025   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
2026 
2027   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
2028 
2029   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2030 
2031   @param[in]  op        CeedOperator to assemble CeedQFunction
2032   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
2033 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
2034 component in].
2035   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2036 
2037   @return An error code: 0 - success, otherwise - failure
2038 
2039   @ref User
2040 **/
2041 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2042   bool     is_composite;
2043   CeedSize input_size = 0, output_size = 0;
2044 
2045   CeedCall(CeedOperatorCheckReady(op));
2046   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2047 
2048   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2049   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
2050 
2051   // Early exit for empty operator
2052   if (!is_composite) {
2053     CeedInt num_elem = 0;
2054 
2055     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2056     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2057   }
2058 
2059   if (op->LinearAssemblePointBlockDiagonal) {
2060     // Backend version
2061     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
2062     return CEED_ERROR_SUCCESS;
2063   } else if (op->LinearAssembleAddPointBlockDiagonal) {
2064     // Backend version with zeroing first
2065     CeedCall(CeedVectorSetValue(assembled, 0.0));
2066     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2067     return CEED_ERROR_SUCCESS;
2068   } else {
2069     // Operator fallback
2070     CeedOperator op_fallback;
2071 
2072     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2073     if (op_fallback) {
2074       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
2075       return CEED_ERROR_SUCCESS;
2076     }
2077   }
2078   // Default interface implementation
2079   CeedCall(CeedVectorSetValue(assembled, 0.0));
2080   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2081   return CEED_ERROR_SUCCESS;
2082 }
2083 
2084 /**
2085   @brief Assemble the point block diagonal of a square linear CeedOperator
2086 
2087   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
2088 
2089   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
2090 
2091   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2092 
2093   @param[in]  op        CeedOperator to assemble CeedQFunction
2094   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
2095 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
2096 component in].
2097   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2098 
2099   @return An error code: 0 - success, otherwise - failure
2100 
2101   @ref User
2102 **/
2103 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2104   bool     is_composite;
2105   CeedSize input_size = 0, output_size = 0;
2106 
2107   CeedCall(CeedOperatorCheckReady(op));
2108   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2109 
2110   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2111   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
2112 
2113   // Early exit for empty operator
2114   if (!is_composite) {
2115     CeedInt num_elem = 0;
2116 
2117     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2118     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2119   }
2120 
2121   if (op->LinearAssembleAddPointBlockDiagonal) {
2122     // Backend version
2123     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
2124     return CEED_ERROR_SUCCESS;
2125   } else {
2126     // Operator fallback
2127     CeedOperator op_fallback;
2128 
2129     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2130     if (op_fallback) {
2131       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
2132       return CEED_ERROR_SUCCESS;
2133     }
2134   }
2135   // Default interface implementation
2136   if (is_composite) {
2137     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
2138   } else {
2139     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
2140   }
2141   return CEED_ERROR_SUCCESS;
2142 }
2143 
2144 /**
2145    @brief Fully assemble the nonzero pattern of a linear operator.
2146 
2147    Expected to be used in conjunction with CeedOperatorLinearAssemble().
2148 
2149    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
2150 matrix in entry (i, j).
2151   Note that the (i, j) pairs are not unique and may repeat.
2152   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
2153 
2154    This will generally be slow unless your operator is low-order.
2155 
2156    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2157 
2158    @param[in]  op          CeedOperator to assemble
2159    @param[out] num_entries Number of entries in coordinate nonzero pattern
2160    @param[out] rows        Row number for each entry
2161    @param[out] cols        Column number for each entry
2162 
2163    @ref User
2164 **/
2165 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2166   bool          is_composite;
2167   CeedInt       num_suboperators, offset = 0;
2168   CeedSize      single_entries;
2169   CeedOperator *sub_operators;
2170 
2171   CeedCall(CeedOperatorCheckReady(op));
2172   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2173 
2174   if (op->LinearAssembleSymbolic) {
2175     // Backend version
2176     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
2177     return CEED_ERROR_SUCCESS;
2178   } else {
2179     // Operator fallback
2180     CeedOperator op_fallback;
2181 
2182     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2183     if (op_fallback) {
2184       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
2185       return CEED_ERROR_SUCCESS;
2186     }
2187   }
2188 
2189   // Default interface implementation
2190 
2191   // Count entries and allocate rows, cols arrays
2192   *num_entries = 0;
2193   if (is_composite) {
2194     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2195     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2196     for (CeedInt k = 0; k < num_suboperators; ++k) {
2197       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2198       *num_entries += single_entries;
2199     }
2200   } else {
2201     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
2202     *num_entries += single_entries;
2203   }
2204   CeedCall(CeedCalloc(*num_entries, rows));
2205   CeedCall(CeedCalloc(*num_entries, cols));
2206 
2207   // Assemble nonzero locations
2208   if (is_composite) {
2209     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2210     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2211     for (CeedInt k = 0; k < num_suboperators; ++k) {
2212       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
2213       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2214       offset += single_entries;
2215     }
2216   } else {
2217     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
2218   }
2219   return CEED_ERROR_SUCCESS;
2220 }
2221 
2222 /**
2223    @brief Fully assemble the nonzero entries of a linear operator.
2224 
2225    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
2226 
2227    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
2228 matrix in entry (i, j).
2229   Note that the (i, j) pairs are not unique and may repeat.
2230   This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
2231 
2232    This will generally be slow unless your operator is low-order.
2233 
2234    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2235 
2236    @param[in]  op     CeedOperator to assemble
2237    @param[out] values Values to assemble into matrix
2238 
2239    @ref User
2240 **/
2241 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
2242   bool          is_composite;
2243   CeedInt       num_suboperators, offset = 0;
2244   CeedSize      single_entries = 0;
2245   CeedOperator *sub_operators;
2246 
2247   CeedCall(CeedOperatorCheckReady(op));
2248   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2249 
2250   // Early exit for empty operator
2251   if (!is_composite) {
2252     CeedInt num_elem = 0;
2253 
2254     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2255     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2256   }
2257 
2258   if (op->LinearAssemble) {
2259     // Backend version
2260     CeedCall(op->LinearAssemble(op, values));
2261     return CEED_ERROR_SUCCESS;
2262   } else {
2263     // Operator fallback
2264     CeedOperator op_fallback;
2265 
2266     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2267     if (op_fallback) {
2268       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2269       return CEED_ERROR_SUCCESS;
2270     }
2271   }
2272 
2273   // Default interface implementation
2274   CeedCall(CeedVectorSetValue(values, 0.0));
2275   if (is_composite) {
2276     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2277     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2278     for (CeedInt k = 0; k < num_suboperators; k++) {
2279       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2280       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2281       offset += single_entries;
2282     }
2283   } else {
2284     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2285   }
2286   return CEED_ERROR_SUCCESS;
2287 }
2288 
2289 /**
2290   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2291 
2292   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2293 
2294   @param[in]  op               Composite CeedOperator
2295   @param[in]  num_skip_indices Number of suboperators to skip
2296   @param[in]  skip_indices     Array of indices of suboperators to skip
2297   @param[out] mult             Vector to store multiplicity (of size l_size)
2298 
2299   @return An error code: 0 - success, otherwise - failure
2300 
2301   @ref User
2302 **/
2303 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2304   Ceed                ceed;
2305   CeedInt             num_suboperators;
2306   CeedSize            l_vec_len;
2307   CeedScalar         *mult_array;
2308   CeedVector          ones_l_vec;
2309   CeedElemRestriction elem_rstr, mult_elem_rstr;
2310   CeedOperator       *sub_operators;
2311 
2312   CeedCall(CeedOperatorCheckReady(op));
2313 
2314   CeedCall(CeedOperatorGetCeed(op, &ceed));
2315 
2316   // Zero mult vector
2317   CeedCall(CeedVectorSetValue(mult, 0.0));
2318 
2319   // Get suboperators
2320   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2321   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2322   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2323 
2324   // Work vector
2325   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2326   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2327   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2328   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2329 
2330   // Compute multiplicity across suboperators
2331   for (CeedInt i = 0; i < num_suboperators; i++) {
2332     const CeedScalar *sub_mult_array;
2333     CeedVector        sub_mult_l_vec, ones_e_vec;
2334 
2335     // -- Check for suboperator to skip
2336     for (CeedInt j = 0; j < num_skip_indices; j++) {
2337       if (skip_indices[j] == i) continue;
2338     }
2339 
2340     // -- Sub operator multiplicity
2341     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2342     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr));
2343     CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2344     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2345     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2346     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2347     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2348     // ---- Flag every node present in the current suboperator
2349     for (CeedInt j = 0; j < l_vec_len; j++) {
2350       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2351     }
2352     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2353     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2354     CeedCall(CeedVectorDestroy(&ones_e_vec));
2355     CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr));
2356   }
2357   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2358   CeedCall(CeedVectorDestroy(&ones_l_vec));
2359   return CEED_ERROR_SUCCESS;
2360 }
2361 
2362 /**
2363   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2364 grid interpolation
2365 
2366   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2367 
2368   @param[in]  op_fine      Fine grid operator
2369   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2370   @param[in]  rstr_coarse  Coarse grid restriction
2371   @param[in]  basis_coarse Coarse grid active vector basis
2372   @param[out] op_coarse    Coarse grid operator
2373   @param[out] op_prolong   Coarse to fine operator, or NULL
2374   @param[out] op_restrict  Fine to coarse operator, or NULL
2375 
2376   @return An error code: 0 - success, otherwise - failure
2377 
2378   @ref User
2379 **/
2380 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2381                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2382   CeedBasis basis_c_to_f = NULL;
2383 
2384   CeedCall(CeedOperatorCheckReady(op_fine));
2385 
2386   // Build prolongation matrix, if required
2387   if (op_prolong || op_restrict) {
2388     CeedBasis basis_fine;
2389 
2390     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2391     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2392   }
2393 
2394   // Core code
2395   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2396   return CEED_ERROR_SUCCESS;
2397 }
2398 
2399 /**
2400   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2401 
2402   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2403 
2404   @param[in]  op_fine       Fine grid operator
2405   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2406   @param[in]  rstr_coarse   Coarse grid restriction
2407   @param[in]  basis_coarse  Coarse grid active vector basis
2408   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2409   @param[out] op_coarse     Coarse grid operator
2410   @param[out] op_prolong    Coarse to fine operator, or NULL
2411   @param[out] op_restrict   Fine to coarse operator, or NULL
2412 
2413   @return An error code: 0 - success, otherwise - failure
2414 
2415   @ref User
2416 **/
2417 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2418                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2419                                              CeedOperator *op_restrict) {
2420   Ceed      ceed;
2421   CeedInt   Q_f, Q_c;
2422   CeedBasis basis_fine, basis_c_to_f = NULL;
2423 
2424   CeedCall(CeedOperatorCheckReady(op_fine));
2425   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2426 
2427   // Check for compatible quadrature spaces
2428   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2429   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2430   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2431   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2432 
2433   // Create coarse to fine basis, if required
2434   if (op_prolong || op_restrict) {
2435     CeedInt     dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2436     CeedScalar *q_ref, *q_weight, *grad;
2437 
2438     // Check if interpolation matrix is provided
2439     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2440               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2441     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2442     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2443     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2444     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2445     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2446     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2447     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2448     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2449     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2450     CeedCall(CeedFree(&q_ref));
2451     CeedCall(CeedFree(&q_weight));
2452     CeedCall(CeedFree(&grad));
2453   }
2454 
2455   // Core code
2456   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2457   return CEED_ERROR_SUCCESS;
2458 }
2459 
2460 /**
2461   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2462 
2463   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2464 
2465   @param[in]  op_fine       Fine grid operator
2466   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2467   @param[in]  rstr_coarse   Coarse grid restriction
2468   @param[in]  basis_coarse  Coarse grid active vector basis
2469   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2470   @param[out] op_coarse     Coarse grid operator
2471   @param[out] op_prolong    Coarse to fine operator, or NULL
2472   @param[out] op_restrict   Fine to coarse operator, or NULL
2473 
2474   @return An error code: 0 - success, otherwise - failure
2475 
2476   @ref User
2477 **/
2478 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2479                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2480                                        CeedOperator *op_restrict) {
2481   Ceed      ceed;
2482   CeedInt   Q_f, Q_c;
2483   CeedBasis basis_fine, basis_c_to_f = NULL;
2484 
2485   CeedCall(CeedOperatorCheckReady(op_fine));
2486   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2487 
2488   // Check for compatible quadrature spaces
2489   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2490   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2491   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2492   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2493 
2494   // Coarse to fine basis
2495   if (op_prolong || op_restrict) {
2496     CeedInt          dim, num_comp, num_nodes_c, num_nodes_f;
2497     CeedScalar      *q_ref, *q_weight, *grad;
2498     CeedElemTopology topo;
2499 
2500     // Check if interpolation matrix is provided
2501     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2502               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2503     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2504     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2505     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2506     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2507     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2508     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2509     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2510     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2511     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2512     CeedCall(CeedFree(&q_ref));
2513     CeedCall(CeedFree(&q_weight));
2514     CeedCall(CeedFree(&grad));
2515   }
2516 
2517   // Core code
2518   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2519   return CEED_ERROR_SUCCESS;
2520 }
2521 
2522 /**
2523   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2524 
2525   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2526   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2527   The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2528 \hat S V\f$.
2529   The CeedOperator must be linear and non-composite.
2530   The associated CeedQFunction must therefore also be linear.
2531 
2532   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2533 
2534   @param[in]  op      CeedOperator to create element inverses
2535   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2536   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2537 
2538   @return An error code: 0 - success, otherwise - failure
2539 
2540   @ref User
2541 **/
2542 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2543   Ceed                 ceed, ceed_parent;
2544   bool                 interp = false, grad = false, is_tensor_basis = true;
2545   CeedInt              num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2546   CeedSize             l_size = 1;
2547   CeedScalar          *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg;
2548   const CeedScalar    *interp_1d, *grad_1d, *q_weight_1d;
2549   CeedVector           q_data;
2550   CeedElemRestriction  rstr  = NULL, rstr_qd_i;
2551   CeedBasis            basis = NULL, fdm_basis;
2552   CeedQFunctionContext ctx_fdm;
2553   CeedQFunctionField  *qf_fields;
2554   CeedQFunction        qf, qf_fdm;
2555   CeedOperatorField   *op_fields;
2556 
2557   CeedCall(CeedOperatorCheckReady(op));
2558 
2559   if (op->CreateFDMElementInverse) {
2560     // Backend version
2561     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2562     return CEED_ERROR_SUCCESS;
2563   } else {
2564     // Operator fallback
2565     CeedOperator op_fallback;
2566 
2567     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2568     if (op_fallback) {
2569       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2570       return CEED_ERROR_SUCCESS;
2571     }
2572   }
2573 
2574   // Default interface implementation
2575   CeedCall(CeedOperatorGetCeed(op, &ceed));
2576   CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent));
2577   CeedCall(CeedOperatorGetQFunction(op, &qf));
2578 
2579   // Determine active input basis
2580   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2581   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2582   for (CeedInt i = 0; i < num_input_fields; i++) {
2583     CeedVector vec;
2584 
2585     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2586     if (vec == CEED_VECTOR_ACTIVE) {
2587       CeedEvalMode eval_mode;
2588 
2589       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2590       interp = interp || eval_mode == CEED_EVAL_INTERP;
2591       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2592       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2593       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2594     }
2595   }
2596   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2597   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2598   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2599   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2600   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2601   CeedCall(CeedBasisGetDimension(basis, &dim));
2602   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2603   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2604   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2605 
2606   // Build and diagonalize 1D Mass and Laplacian
2607   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2608   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2609   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2610   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2611   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2612   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2613   CeedCall(CeedCalloc(P_1d, &lambda));
2614   // -- Build matrices
2615   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2616   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2617   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2618   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2619 
2620   // -- Diagonalize
2621   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2622   CeedCall(CeedFree(&mass));
2623   CeedCall(CeedFree(&laplace));
2624   for (CeedInt i = 0; i < P_1d; i++) {
2625     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2626   }
2627   CeedCall(CeedFree(&x));
2628 
2629   {
2630     CeedInt             layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2631     CeedScalar          max_norm = 0;
2632     const CeedScalar   *assembled_array, *q_weight_array;
2633     CeedVector          assembled = NULL, q_weight;
2634     CeedElemRestriction rstr_qf   = NULL;
2635 
2636     // Assemble QFunction
2637     CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2638     CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2639     CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2640     CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2641 
2642     // Calculate element averages
2643     CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2644     CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2645     CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2646     CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2647     CeedCall(CeedCalloc(num_elem, &elem_avg));
2648     const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2649 
2650     for (CeedInt e = 0; e < num_elem; e++) {
2651       CeedInt count = 0;
2652 
2653       for (CeedInt q = 0; q < num_qpts; q++) {
2654         for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2655           if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2656             elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2657             count++;
2658           }
2659         }
2660       }
2661       if (count) {
2662         elem_avg[e] /= count;
2663       } else {
2664         elem_avg[e] = 1.0;
2665       }
2666     }
2667     CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2668     CeedCall(CeedVectorDestroy(&assembled));
2669     CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2670     CeedCall(CeedVectorDestroy(&q_weight));
2671   }
2672 
2673   // Build FDM diagonal
2674   {
2675     CeedScalar *q_data_array, *fdm_diagonal;
2676 
2677     CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2678     const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2679     for (CeedInt c = 0; c < num_comp; c++) {
2680       for (CeedInt n = 0; n < num_nodes; n++) {
2681         if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2682         if (grad) {
2683           for (CeedInt d = 0; d < dim; d++) {
2684             CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2685             fdm_diagonal[c * num_nodes + n] += lambda[i];
2686           }
2687         }
2688         if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2689       }
2690     }
2691     CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2692     CeedCall(CeedVectorSetValue(q_data, 0.0));
2693     CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2694     for (CeedInt e = 0; e < num_elem; e++) {
2695       for (CeedInt c = 0; c < num_comp; c++) {
2696         for (CeedInt n = 0; n < num_nodes; n++)
2697           q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2698       }
2699     }
2700     CeedCall(CeedFree(&elem_avg));
2701     CeedCall(CeedFree(&fdm_diagonal));
2702     CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2703   }
2704 
2705   // Setup FDM operator
2706   // -- Basis
2707   {
2708     CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2709 
2710     CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2711     CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2712     CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2713     CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2714     CeedCall(CeedFree(&fdm_interp));
2715     CeedCall(CeedFree(&grad_dummy));
2716     CeedCall(CeedFree(&q_ref_dummy));
2717     CeedCall(CeedFree(&q_weight_dummy));
2718     CeedCall(CeedFree(&lambda));
2719   }
2720 
2721   // -- Restriction
2722   {
2723     CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp};
2724     CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2725   }
2726 
2727   // -- QFunction
2728   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2729   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2730   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2731   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2732   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2733 
2734   // -- QFunction context
2735   {
2736     CeedInt *num_comp_data;
2737 
2738     CeedCall(CeedCalloc(1, &num_comp_data));
2739     num_comp_data[0] = num_comp;
2740     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2741     CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2742   }
2743   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2744   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2745 
2746   // -- Operator
2747   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2748   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2749   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data));
2750   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2751 
2752   // Cleanup
2753   CeedCall(CeedVectorDestroy(&q_data));
2754   CeedCall(CeedBasisDestroy(&fdm_basis));
2755   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2756   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2757   return CEED_ERROR_SUCCESS;
2758 }
2759 
2760 /// @}
2761