xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision ca38d01d95e1d63e8425c25ab66131b07bd24aa2)
1 // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a `CeedQFunction` with a reference `Ceed` to fallback for advanced `CeedOperator` functionality
28 
29   @param[in]  fallback_ceed `Ceed` on which to create fallback `CeedQFunction`
30   @param[in]  qf            `CeedQFunction` to create fallback for
31   @param[out] qf_fallback   Fallback `CeedQFunction`
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   char               *source_path_with_name = NULL;
39   CeedInt             num_input_fields, num_output_fields;
40   CeedQFunctionField *input_fields, *output_fields;
41 
42   // Check if NULL qf passed in
43   if (!qf) return CEED_ERROR_SUCCESS;
44 
45   CeedDebug256(CeedQFunctionReturnCeed(qf), 1, "---------- CeedOperator Fallback ----------\n");
46   CeedDebug(CeedQFunctionReturnCeed(qf), "Creating fallback CeedQFunction\n");
47 
48   if (qf->source_path) {
49     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
50 
51     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
52     memcpy(source_path_with_name, qf->source_path, path_len);
53     memcpy(&source_path_with_name[path_len], ":", 1);
54     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
55   } else if (qf->user_source) {
56     CeedCall(CeedStringAllocCopy(qf->user_source, &source_path_with_name));
57   } else {
58     CeedCall(CeedCalloc(1, &source_path_with_name));
59   }
60 
61   {
62     CeedInt           vec_length;
63     CeedQFunctionUser f;
64 
65     CeedCall(CeedQFunctionGetVectorLength(qf, &vec_length));
66     CeedCall(CeedQFunctionGetUserFunction(qf, &f));
67     CeedCall(CeedQFunctionCreateInterior(fallback_ceed, vec_length, f, source_path_with_name, qf_fallback));
68   }
69   {
70     CeedQFunctionContext ctx;
71 
72     CeedCall(CeedQFunctionGetContext(qf, &ctx));
73     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
74     CeedCall(CeedQFunctionContextDestroy(&ctx));
75   }
76   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
77   for (CeedInt i = 0; i < num_input_fields; i++) {
78     const char  *field_name;
79     CeedInt      size;
80     CeedEvalMode eval_mode;
81 
82     CeedCall(CeedQFunctionFieldGetData(input_fields[i], &field_name, &size, &eval_mode));
83     CeedCall(CeedQFunctionAddInput(*qf_fallback, field_name, size, eval_mode));
84   }
85   for (CeedInt i = 0; i < num_output_fields; i++) {
86     const char  *field_name;
87     CeedInt      size;
88     CeedEvalMode eval_mode;
89 
90     CeedCall(CeedQFunctionFieldGetData(output_fields[i], &field_name, &size, &eval_mode));
91     CeedCall(CeedQFunctionAddOutput(*qf_fallback, field_name, size, eval_mode));
92   }
93   CeedCall(CeedFree(&source_path_with_name));
94   return CEED_ERROR_SUCCESS;
95 }
96 
97 /**
98   @brief Duplicate a `CeedOperator` with a reference `Ceed` to fallback for advanced `CeedOperator` functionality
99 
100   @param[in,out] op `CeedOperator` to create fallback for
101 
102   @return An error code: 0 - success, otherwise - failure
103 
104   @ref Developer
105 **/
106 static int CeedOperatorCreateFallback(CeedOperator op) {
107   bool         is_composite;
108   Ceed         ceed, ceed_fallback;
109   CeedOperator op_fallback;
110 
111   // Check not already created
112   if (op->op_fallback) return CEED_ERROR_SUCCESS;
113 
114   // Fallback Ceed
115   CeedCall(CeedOperatorGetCeed(op, &ceed));
116   CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
117   CeedCall(CeedDestroy(&ceed));
118   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
119 
120   CeedDebug256(CeedOperatorReturnCeed(op), 1, "---------- CeedOperator Fallback ----------\n");
121   CeedDebug(CeedOperatorReturnCeed(op), "Creating fallback CeedOperator\n");
122 
123   // Clone Op
124   CeedCall(CeedOperatorIsComposite(op, &is_composite));
125   if (is_composite) {
126     CeedInt       num_suboperators;
127     CeedOperator *sub_operators;
128 
129     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
130     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
131     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
132     for (CeedInt i = 0; i < num_suboperators; i++) {
133       CeedOperator op_sub_fallback;
134 
135       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
136       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
137     }
138   } else {
139     bool               is_at_points = false;
140     CeedInt            num_input_fields, num_output_fields;
141     CeedQFunction      qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
142     CeedOperatorField *input_fields, *output_fields;
143 
144     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
145     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
146     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
147     CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
148     if (is_at_points) {
149       CeedVector          points;
150       CeedElemRestriction rstr_points;
151 
152       CeedCall(CeedOperatorCreateAtPoints(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
153       CeedCall(CeedOperatorAtPointsGetPoints(op, &rstr_points, &points));
154       CeedCall(CeedOperatorAtPointsSetPoints(op_fallback, rstr_points, points));
155       CeedCall(CeedVectorDestroy(&points));
156       CeedCall(CeedElemRestrictionDestroy(&rstr_points));
157     } else {
158       CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
159     }
160     CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
161     for (CeedInt i = 0; i < num_input_fields; i++) {
162       const char         *field_name;
163       CeedVector          vec;
164       CeedElemRestriction rstr;
165       CeedBasis           basis;
166 
167       CeedCall(CeedOperatorFieldGetData(input_fields[i], &field_name, &rstr, &basis, &vec));
168       CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec));
169       CeedCall(CeedVectorDestroy(&vec));
170       CeedCall(CeedElemRestrictionDestroy(&rstr));
171       CeedCall(CeedBasisDestroy(&basis));
172     }
173     for (CeedInt i = 0; i < num_output_fields; i++) {
174       const char         *field_name;
175       CeedVector          vec;
176       CeedElemRestriction rstr;
177       CeedBasis           basis;
178 
179       CeedCall(CeedOperatorFieldGetData(output_fields[i], &field_name, &rstr, &basis, &vec));
180       CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec));
181       CeedCall(CeedVectorDestroy(&vec));
182       CeedCall(CeedElemRestrictionDestroy(&rstr));
183       CeedCall(CeedBasisDestroy(&basis));
184     }
185     {
186       CeedQFunctionAssemblyData data;
187 
188       CeedCall(CeedOperatorGetQFunctionAssemblyData(op, &data));
189       CeedCall(CeedQFunctionAssemblyDataReferenceCopy(data, &op_fallback->qf_assembled));
190     }
191     // Cleanup
192     CeedCall(CeedQFunctionDestroy(&qf_fallback));
193     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
194     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
195   }
196   CeedCall(CeedOperatorSetName(op_fallback, op->name));
197   CeedCall(CeedOperatorCheckReady(op_fallback));
198   // Note: No ref-counting here so we don't get caught in a reference loop.
199   //       The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback.
200   op->op_fallback                 = op_fallback;
201   op_fallback->op_fallback_parent = op;
202   CeedCall(CeedDestroy(&ceed_fallback));
203   return CEED_ERROR_SUCCESS;
204 }
205 
206 /**
207   @brief Core logic for assembling operator diagonal or point block diagonal
208 
209   @param[in]  op             `CeedOperator` to assemble diagonal or point block diagonal
210   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
211   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
212   @param[out] assembled      `CeedVector` to store assembled diagonal
213 
214   @return An error code: 0 - success, otherwise - failure
215 
216   @ref Developer
217 **/
218 static inline int CeedSingleOperatorLinearAssembleAddDiagonal_Mesh(CeedOperator op, CeedRequest *request, const bool is_point_block,
219                                                                    CeedVector assembled) {
220   bool is_composite;
221 
222   CeedCall(CeedOperatorIsComposite(op, &is_composite));
223   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
224 
225   // Assemble QFunction
226   CeedInt             layout_qf[3];
227   const CeedScalar   *assembled_qf_array;
228   CeedVector          assembled_qf        = NULL;
229   CeedElemRestriction assembled_elem_rstr = NULL;
230 
231   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
232   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf));
233   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
234   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
235 
236   // Get assembly data
237   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
238   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
239   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
240   CeedBasis               *active_bases_in, *active_bases_out;
241   CeedElemRestriction     *active_elem_rstrs_in, *active_elem_rstrs_out;
242   CeedOperatorAssemblyData data;
243 
244   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
245   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
246                                                 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out,
247                                                 &num_output_components));
248   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL));
249   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out));
250 
251   // Loop over all active bases (find matching input/output pairs)
252   for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) {
253     CeedInt             b_in, b_out, num_elem, num_nodes, num_qpts, num_comp;
254     bool                has_eval_none = false;
255     CeedScalar         *elem_diag_array, *identity = NULL;
256     CeedVector          elem_diag;
257     CeedElemRestriction diag_elem_rstr;
258 
259     if (num_active_bases_in <= num_active_bases_out) {
260       b_in = b;
261       for (b_out = 0; b_out < num_active_bases_out; b_out++) {
262         if (active_bases_in[b_in] == active_bases_out[b_out]) {
263           break;
264         }
265       }
266       if (b_out == num_active_bases_out) {
267         continue;
268       }  // No matching output basis found
269     } else {
270       b_out = b;
271       for (b_in = 0; b_in < num_active_bases_in; b_in++) {
272         if (active_bases_in[b_in] == active_bases_out[b_out]) {
273           break;
274         }
275       }
276       if (b_in == num_active_bases_in) {
277         continue;
278       }  // No matching output basis found
279     }
280     CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
281               "Cannot assemble operator diagonal with different input and output active element restrictions");
282 
283     // Assemble point block diagonal restriction, if needed
284     if (is_point_block) {
285       CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr));
286     } else {
287       CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr));
288     }
289 
290     // Create diagonal vector
291     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
292 
293     // Assemble element operator diagonals
294     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
295     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
296     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
297     CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes));
298     CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp));
299     if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes;
300     else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts));
301 
302     // Construct identity matrix for basis if required
303     for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) {
304       has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE);
305     }
306     for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) {
307       has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE);
308     }
309     if (has_eval_none) {
310       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
311       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
312     }
313 
314     // Compute the diagonal of B^T D B
315     // Each element
316     for (CeedSize e = 0; e < num_elem; e++) {
317       // Each basis eval mode pair
318       CeedInt      d_out              = 0, q_comp_out;
319       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
320 
321       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) {
322         CeedInt           d_in              = 0, q_comp_in;
323         const CeedScalar *B_t               = NULL;
324         CeedEvalMode      eval_mode_in_prev = CEED_EVAL_NONE;
325 
326         CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t));
327         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out));
328         if (q_comp_out > 1) {
329           if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0;
330           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
331         }
332         eval_mode_out_prev = eval_modes_out[b_out][e_out];
333 
334         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) {
335           const CeedScalar *B = NULL;
336 
337           CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B));
338           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in));
339           if (q_comp_in > 1) {
340             if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0;
341             else B = &B[(++d_in) * num_qpts * num_nodes];
342           }
343           eval_mode_in_prev = eval_modes_in[b_in][e_in];
344 
345           // Each component
346           for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
347             // Each qpt/node pair
348             for (CeedInt q = 0; q < num_qpts; q++) {
349               if (is_point_block) {
350                 // Point Block Diagonal
351                 for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
352                   const CeedSize c_offset =
353                       (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
354                   const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
355 
356                   for (CeedInt n = 0; n < num_nodes; n++) {
357                     elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
358                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
359                   }
360                 }
361               } else {
362                 // Diagonal Only
363                 const CeedInt c_offset =
364                     (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
365                 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
366 
367                 for (CeedInt n = 0; n < num_nodes; n++) {
368                   elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
369                 }
370               }
371             }
372           }
373         }
374       }
375     }
376     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
377 
378     // Assemble local operator diagonal
379     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
380 
381     // Cleanup
382     CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
383     CeedCall(CeedVectorDestroy(&elem_diag));
384     CeedCall(CeedFree(&identity));
385   }
386   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
387   CeedCall(CeedVectorDestroy(&assembled_qf));
388   return CEED_ERROR_SUCCESS;
389 }
390 
391 /**
392   @brief Core logic for assembling operator diagonal or point block diagonal
393 
394   @param[in]  op             `CeedOperator` to assemble diagonal or point block diagonal
395   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
396   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
397   @param[out] assembled      `CeedVector` to store assembled diagonal
398 
399   @return An error code: 0 - success, otherwise - failure
400 
401   @ref Developer
402 **/
403 static inline int CeedSingleOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block,
404                                                               CeedVector assembled) {
405   bool is_at_points;
406 
407   CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
408   CeedCheck(!is_at_points, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "AtPoints operator not supported");
409   CeedCall(CeedSingleOperatorLinearAssembleAddDiagonal_Mesh(op, request, is_point_block, assembled));
410   return CEED_ERROR_SUCCESS;
411 }
412 
413 /**
414   @brief Core logic for assembling composite operator diagonal
415 
416   @param[in]  op             `CeedOperator` to assemble point block diagonal
417   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
418   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
419   @param[out] assembled      `CeedVector` to store assembled diagonal
420 
421   @return An error code: 0 - success, otherwise - failure
422 
423   @ref Developer
424 **/
425 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block,
426                                                                  CeedVector assembled) {
427   CeedInt       num_sub;
428   CeedOperator *suboperators;
429 
430   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
431   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
432   for (CeedInt i = 0; i < num_sub; i++) {
433     if (is_point_block) {
434       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
435     } else {
436       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
437     }
438   }
439   return CEED_ERROR_SUCCESS;
440 }
441 
442 /**
443   @brief Build nonzero pattern for non-composite CeedOperator`.
444 
445   Users should generally use @ref CeedOperatorLinearAssembleSymbolic().
446 
447   @param[in]  op     `CeedOperator` to assemble nonzero pattern
448   @param[in]  offset Offset for number of entries
449   @param[out] rows   Row number for each entry
450   @param[out] cols   Column number for each entry
451 
452   @return An error code: 0 - success, otherwise - failure
453 
454   @ref Developer
455 **/
456 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
457   Ceed                ceed;
458   bool                is_composite;
459   CeedSize            num_nodes_in, num_nodes_out, local_num_entries, count = 0;
460   CeedInt             num_elem_in, elem_size_in, num_comp_in, layout_er_in[3];
461   CeedInt             num_elem_out, elem_size_out, num_comp_out, layout_er_out[3];
462   CeedScalar         *array;
463   const CeedScalar   *elem_dof_a_in, *elem_dof_a_out;
464   CeedVector          index_vec_in, index_vec_out, elem_dof_in, elem_dof_out;
465   CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out;
466 
467   CeedCall(CeedOperatorIsComposite(op, &is_composite));
468   CeedCall(CeedOperatorGetCeed(op, &ceed));
469   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
470 
471   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out));
472   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
473   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
474   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
475   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
476   CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, layout_er_in));
477 
478   // Determine elem_dof relation for input
479   CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in));
480   CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array));
481   for (CeedSize i = 0; i < num_nodes_in; i++) array[i] = i;
482   CeedCall(CeedVectorRestoreArray(index_vec_in, &array));
483   CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in));
484   CeedCall(CeedVectorSetValue(elem_dof_in, 0.0));
485   CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in));
486   CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE));
487   CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in));
488   CeedCall(CeedVectorDestroy(&index_vec_in));
489   CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in));
490 
491   if (elem_rstr_in != elem_rstr_out) {
492     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
493     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
494               "Active input and output operator restrictions must have the same number of elements."
495               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
496               num_elem_in, num_elem_out);
497     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
498     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
499     CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, layout_er_out));
500 
501     // Determine elem_dof relation for output
502     CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out));
503     CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array));
504     for (CeedSize i = 0; i < num_nodes_out; i++) array[i] = i;
505     CeedCall(CeedVectorRestoreArray(index_vec_out, &array));
506     CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out));
507     CeedCall(CeedVectorSetValue(elem_dof_out, 0.0));
508     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out));
509     CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE));
510     CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out));
511     CeedCall(CeedVectorDestroy(&index_vec_out));
512     CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out));
513   } else {
514     num_elem_out     = num_elem_in;
515     elem_size_out    = elem_size_in;
516     num_comp_out     = num_comp_in;
517     layout_er_out[0] = layout_er_in[0];
518     layout_er_out[1] = layout_er_in[1];
519     layout_er_out[2] = layout_er_in[2];
520     elem_dof_a_out   = elem_dof_a_in;
521   }
522   local_num_entries = (CeedSize)elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
523 
524   // Determine i, j locations for element matrices
525   for (CeedInt e = 0; e < num_elem_in; e++) {
526     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
527       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
528         for (CeedInt i = 0; i < elem_size_out; i++) {
529           for (CeedInt j = 0; j < elem_size_in; j++) {
530             const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2];
531             const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2];
532             const CeedInt row                = elem_dof_a_out[elem_dof_index_row];
533             const CeedInt col                = elem_dof_a_in[elem_dof_index_col];
534 
535             rows[offset + count] = row;
536             cols[offset + count] = col;
537             count++;
538           }
539         }
540       }
541     }
542   }
543   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
544   CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in));
545   CeedCall(CeedVectorDestroy(&elem_dof_in));
546   if (elem_rstr_in != elem_rstr_out) {
547     CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out));
548     CeedCall(CeedVectorDestroy(&elem_dof_out));
549   }
550   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
551   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
552   CeedCall(CeedDestroy(&ceed));
553   return CEED_ERROR_SUCCESS;
554 }
555 
556 /**
557   @brief Core logic to assemble `CeedQFunction` and store result internally.
558 
559   Return copied references of stored data to the caller.
560   Caller is responsible for ownership and destruction of the copied references.
561   See also @ref CeedOperatorLinearAssembleQFunction().
562 
563   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
564         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
565 
566   @param[in]  op         `CeedOperator` to assemble `CeedQFunction`
567   @param[in]  use_parent Boolean flag to check for fallback parent implementation
568   @param[out] assembled  `CeedVector` to store assembled `CeedQFunction` at quadrature points
569   @param[out] rstr       `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
570   @param[in]  request    Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
571 
572   @return An error code: 0 - success, otherwise - failure
573 
574   @ref User
575 **/
576 static int CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(CeedOperator op, bool use_parent, CeedVector *assembled, CeedElemRestriction *rstr,
577                                                                  CeedRequest *request) {
578   int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL;
579   CeedOperator op_assemble                                                                           = NULL;
580   CeedOperator op_fallback_parent                                                                    = NULL;
581 
582   CeedCall(CeedOperatorCheckReady(op));
583 
584   // Determine if fallback parent or operator has implementation
585   CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent));
586   if (op_fallback_parent && use_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) {
587     // -- Backend version for op fallback parent is faster, if it exists
588     CeedDebug(CeedOperatorReturnCeed(op), "Using fallback parent for CeedOperatorLinearAssembleQFunctionBuildOrUpdate\n");
589     LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate;
590     op_assemble                   = op_fallback_parent;
591   } else if (op->LinearAssembleQFunctionUpdate) {
592     // -- Backend version for op
593     LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate;
594     op_assemble                   = op;
595   }
596 
597   // Assemble QFunction
598   if (LinearAssembleQFunctionUpdate) {
599     // Backend or fallback parent version
600     CeedQFunctionAssemblyData data;
601     bool                      data_is_setup;
602     CeedVector                assembled_vec  = NULL;
603     CeedElemRestriction       assembled_rstr = NULL;
604 
605     CeedCall(CeedOperatorGetQFunctionAssemblyData(op, &data));
606     CeedCall(CeedQFunctionAssemblyDataIsSetup(data, &data_is_setup));
607     if (data_is_setup) {
608       bool update_needed;
609 
610       CeedCall(CeedQFunctionAssemblyDataGetObjects(data, &assembled_vec, &assembled_rstr));
611       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(data, &update_needed));
612       if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request));
613     } else {
614       CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request));
615       CeedCall(CeedQFunctionAssemblyDataSetObjects(data, assembled_vec, assembled_rstr));
616     }
617     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(data, false));
618 
619     // Copy reference from internally held copy
620     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
621     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
622     CeedCall(CeedVectorDestroy(&assembled_vec));
623     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
624   } else {
625     // Operator fallback
626     CeedOperator op_fallback;
627 
628     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleQFunctionBuildOrUpdate\n");
629     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
630     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
631     else return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
632   }
633   return CEED_ERROR_SUCCESS;
634 }
635 
636 /**
637   @brief Assemble `CeedQFunction` and store result internally, but do not use fallback parent.
638 
639   Return copied references of stored data to the caller.
640   Caller is responsible for ownership and destruction of the copied references.
641   See also @ref CeedOperatorLinearAssembleQFunction().
642 
643   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
644         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
645 
646   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
647   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
648   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
649   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
650 
651   @return An error code: 0 - success, otherwise - failure
652 
653   @ref Developer
654 **/
655 int CeedOperatorFallbackLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr,
656                                                              CeedRequest *request) {
657   return CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(op, false, assembled, rstr, request);
658 }
659 
660 /**
661   @brief Assemble nonzero entries for non-composite `CeedOperator`.
662 
663   Users should generally use @ref CeedOperatorLinearAssemble().
664 
665   @param[in]  op     `CeedOperator` to assemble
666   @param[in]  offset Offset for number of entries
667   @param[out] values Values to assemble into matrix
668 
669   @return An error code: 0 - success, otherwise - failure
670 
671   @ref Developer
672 **/
673 int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
674   bool is_composite, is_at_points;
675 
676   CeedCall(CeedOperatorIsComposite(op, &is_composite));
677   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
678 
679   // Early exit for empty operator
680   {
681     CeedInt num_elem = 0;
682 
683     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
684     if (num_elem == 0) return CEED_ERROR_SUCCESS;
685   }
686 
687   if (op->LinearAssembleSingle) {
688     // Backend version
689     CeedCall(op->LinearAssembleSingle(op, offset, values));
690     return CEED_ERROR_SUCCESS;
691   } else {
692     // Operator fallback
693     CeedOperator op_fallback;
694 
695     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedSingleOperatorAssemble\n");
696     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
697     if (op_fallback) {
698       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
699       return CEED_ERROR_SUCCESS;
700     }
701   }
702 
703   CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
704   CeedCheck(!is_at_points, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
705             "Backend does not implement CeedOperatorLinearAssemble for AtPoints operator");
706 
707   // Assemble QFunction
708   CeedInt             layout_qf[3];
709   const CeedScalar   *assembled_qf_array;
710   CeedVector          assembled_qf        = NULL;
711   CeedElemRestriction assembled_elem_rstr = NULL;
712 
713   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE));
714   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf));
715   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
716   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
717 
718   // Get assembly data
719   CeedInt                  num_elem_in, elem_size_in, num_comp_in, num_qpts_in;
720   CeedInt                  num_elem_out, elem_size_out, num_comp_out, num_qpts_out;
721   CeedSize                 local_num_entries, count = 0;
722   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
723   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
724   CeedBasis               *active_bases_in, *active_bases_out, basis_in, basis_out;
725   const CeedScalar       **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out;
726   CeedElemRestriction      elem_rstr_in, elem_rstr_out;
727   CeedRestrictionType      elem_rstr_type_in, elem_rstr_type_out;
728   const bool              *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL;
729   const CeedInt8          *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL;
730   CeedOperatorAssemblyData data;
731 
732   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
733   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out,
734                                                 &num_eval_modes_out, &eval_modes_out, NULL, NULL));
735 
736   CeedCheck(num_active_bases_in == 1 && num_active_bases_out == 1, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
737             "Cannot assemble operator with multiple active bases");
738   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
739             "Cannot assemble operator without inputs/outputs");
740 
741   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out));
742   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
743   basis_in  = active_bases_in[0];
744   basis_out = active_bases_out[0];
745   B_mat_in  = B_mats_in[0];
746   B_mat_out = B_mats_out[0];
747 
748   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
749   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
750   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
751   if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in;
752   else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in));
753 
754   CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in));
755   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
756     CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in));
757   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
758     CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in));
759   }
760 
761   if (elem_rstr_in != elem_rstr_out) {
762     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
763     CeedCheck(num_elem_in == num_elem_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
764               "Active input and output operator restrictions must have the same number of elements."
765               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
766               num_elem_in, num_elem_out);
767     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
768     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
769     if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out;
770     else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out));
771     CeedCheck(num_qpts_in == num_qpts_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
772               "Active input and output bases must have the same number of quadrature points."
773               " Input has %" CeedInt_FMT " points; output has %" CeedInt_FMT "points.",
774               num_qpts_in, num_qpts_out);
775 
776     CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out));
777     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
778       CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out));
779     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
780       CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out));
781     }
782   } else {
783     num_elem_out  = num_elem_in;
784     elem_size_out = elem_size_in;
785     num_comp_out  = num_comp_in;
786     num_qpts_out  = num_qpts_in;
787 
788     elem_rstr_orients_out      = elem_rstr_orients_in;
789     elem_rstr_curl_orients_out = elem_rstr_curl_orients_in;
790   }
791   local_num_entries = (CeedSize)elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
792 
793   // Loop over elements and put in data structure
794   // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
795   CeedTensorContract contract;
796   CeedScalar        *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL;
797 
798   CeedCall(CeedBasisGetTensorContract(basis_in, &contract));
799   CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat));
800   CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat));
801   if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b));
802 
803   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
804   for (CeedSize e = 0; e < num_elem_in; e++) {
805     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
806       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
807         // Compute B^T*D
808         for (CeedSize n = 0; n < elem_size_out; n++) {
809           for (CeedSize q = 0; q < num_qpts_in; q++) {
810             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
811               const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in;
812               CeedScalar     sum       = 0.0;
813 
814               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
815                 const CeedSize b_out_index     = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n;
816                 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out;
817                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
818 
819                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
820               }
821               BTD_mat[btd_index] = sum;
822             }
823           }
824         }
825 
826         // Form element matrix itself (for each block component)
827         if (contract) {
828           CeedCall(CeedTensorContractApply(contract, 1, num_qpts_in * num_eval_modes_in[0], elem_size_in, elem_size_out, BTD_mat, CEED_NOTRANSPOSE,
829                                            false, B_mat_in, elem_mat));
830         } else {
831           Ceed ceed;
832 
833           CeedCall(CeedOperatorGetCeed(op, &ceed));
834           CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size_out, elem_size_in, num_qpts_in * num_eval_modes_in[0]));
835           CeedCall(CeedDestroy(&ceed));
836         }
837 
838         // Transform the element matrix if required
839         if (elem_rstr_orients_out) {
840           const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out];
841 
842           for (CeedInt i = 0; i < elem_size_out; i++) {
843             const double orient = elem_orients[i] ? -1.0 : 1.0;
844 
845             for (CeedInt j = 0; j < elem_size_in; j++) {
846               elem_mat[i * elem_size_in + j] *= orient;
847             }
848           }
849         } else if (elem_rstr_curl_orients_out) {
850           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out];
851 
852           // T^T*(B^T*D*B)
853           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
854           for (CeedInt i = 0; i < elem_size_out; i++) {
855             for (CeedInt j = 0; j < elem_size_in; j++) {
856               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] +
857                                                (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) +
858                                                (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0);
859             }
860           }
861         }
862         if (elem_rstr_orients_in) {
863           const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in];
864 
865           for (CeedInt i = 0; i < elem_size_out; i++) {
866             for (CeedInt j = 0; j < elem_size_in; j++) {
867               elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0;
868             }
869           }
870         } else if (elem_rstr_curl_orients_in) {
871           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in];
872 
873           // (B^T*D*B)*T
874           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
875           for (CeedInt i = 0; i < elem_size_out; i++) {
876             for (CeedInt j = 0; j < elem_size_in; j++) {
877               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] +
878                                                (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) +
879                                                (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0);
880             }
881           }
882         }
883 
884         // Put element matrix in coordinate data structure
885         for (CeedInt i = 0; i < elem_size_out; i++) {
886           for (CeedInt j = 0; j < elem_size_in; j++) {
887             vals[offset + count] = elem_mat[i * elem_size_in + j];
888             count++;
889           }
890         }
891       }
892     }
893   }
894   CeedCheck(count == local_num_entries, CeedOperatorReturnCeed(op), CEED_ERROR_MAJOR, "Error computing entries");
895   CeedCall(CeedVectorRestoreArray(values, &vals));
896 
897   // Cleanup
898   CeedCall(CeedFree(&BTD_mat));
899   CeedCall(CeedFree(&elem_mat));
900   CeedCall(CeedFree(&elem_mat_b));
901   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
902     CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in));
903   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
904     CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in));
905   }
906   if (elem_rstr_in != elem_rstr_out) {
907     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
908       CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out));
909     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
910       CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out));
911     }
912   }
913   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
914   CeedCall(CeedVectorDestroy(&assembled_qf));
915   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
916   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
917   return CEED_ERROR_SUCCESS;
918 }
919 
920 /**
921   @brief Count number of entries for assembled `CeedOperator`
922 
923   @param[in]  op          `CeedOperator` to assemble
924   @param[out] num_entries Number of entries in assembled representation
925 
926   @return An error code: 0 - success, otherwise - failure
927 
928   @ref Utility
929 **/
930 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) {
931   bool                is_composite;
932   CeedInt             num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out;
933   CeedElemRestriction rstr_in, rstr_out;
934 
935   CeedCall(CeedOperatorIsComposite(op, &is_composite));
936   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
937 
938   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out));
939   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in));
940   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in));
941   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
942   if (rstr_in != rstr_out) {
943     CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out));
944     CeedCheck(num_elem_in == num_elem_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
945               "Active input and output operator restrictions must have the same number of elements."
946               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
947               num_elem_in, num_elem_out);
948     CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out));
949     CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
950   } else {
951     num_elem_out  = num_elem_in;
952     elem_size_out = elem_size_in;
953     num_comp_out  = num_comp_in;
954   }
955   CeedCall(CeedElemRestrictionDestroy(&rstr_in));
956   CeedCall(CeedElemRestrictionDestroy(&rstr_out));
957   *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in;
958   return CEED_ERROR_SUCCESS;
959 }
960 
961 /**
962   @brief Common code for creating a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator`
963 
964   @param[in]  op_fine      Fine grid `CeedOperator`
965   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
966   @param[in]  rstr_coarse  Coarse grid `CeedElemRestriction`
967   @param[in]  basis_coarse Coarse grid active vector `CeedBasis`
968   @param[in]  basis_c_to_f `CeedBasis` for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction operators
969   @param[out] op_coarse    Coarse grid `CeedOperator`
970   @param[out] op_prolong   Coarse to fine `CeedOperator`, or `NULL`
971   @param[out] op_restrict  Fine to coarse `CeedOperator`, or `NULL`
972 
973   @return An error code: 0 - success, otherwise - failure
974 
975   @ref Developer
976 **/
977 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
978                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
979   bool                is_composite;
980   Ceed                ceed;
981   CeedInt             num_comp, num_input_fields, num_output_fields;
982   CeedVector          mult_vec         = NULL;
983   CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL;
984   CeedOperatorField  *input_fields, *output_fields;
985 
986   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
987 
988   // Check for composite operator
989   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
990   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
991 
992   // Coarse Grid
993   {
994     bool is_at_points;
995 
996     CeedCall(CeedOperatorIsAtPoints(op_fine, &is_at_points));
997     if (is_at_points) {
998       CeedVector          point_coords;
999       CeedElemRestriction rstr_points;
1000 
1001       CeedCall(CeedOperatorCreateAtPoints(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
1002       CeedCall(CeedOperatorAtPointsGetPoints(op_fine, &rstr_points, &point_coords));
1003       CeedCall(CeedOperatorAtPointsSetPoints(*op_coarse, rstr_points, point_coords));
1004       CeedCall(CeedVectorDestroy(&point_coords));
1005       CeedCall(CeedElemRestrictionDestroy(&rstr_points));
1006     } else {
1007       CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
1008     }
1009   }
1010   CeedCall(CeedOperatorGetFields(op_fine, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
1011   // -- Clone input fields
1012   for (CeedInt i = 0; i < num_input_fields; i++) {
1013     const char         *field_name;
1014     CeedVector          vec;
1015     CeedElemRestriction rstr  = NULL;
1016     CeedBasis           basis = NULL;
1017 
1018     CeedCall(CeedOperatorFieldGetName(input_fields[i], &field_name));
1019     CeedCall(CeedOperatorFieldGetVector(input_fields[i], &vec));
1020     if (vec == CEED_VECTOR_ACTIVE) {
1021       CeedCall(CeedElemRestrictionReferenceCopy(rstr_coarse, &rstr));
1022       CeedCall(CeedBasisReferenceCopy(basis_coarse, &basis));
1023       if (!rstr_fine) CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_fine));
1024     } else {
1025       CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr));
1026       CeedCall(CeedOperatorFieldGetBasis(input_fields[i], &basis));
1027     }
1028     CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec));
1029     CeedCall(CeedVectorDestroy(&vec));
1030     CeedCall(CeedElemRestrictionDestroy(&rstr));
1031     CeedCall(CeedBasisDestroy(&basis));
1032   }
1033   // -- Clone output fields
1034   for (CeedInt i = 0; i < num_output_fields; i++) {
1035     const char         *field_name;
1036     CeedVector          vec;
1037     CeedElemRestriction rstr  = NULL;
1038     CeedBasis           basis = NULL;
1039 
1040     CeedCall(CeedOperatorFieldGetName(output_fields[i], &field_name));
1041     CeedCall(CeedOperatorFieldGetVector(output_fields[i], &vec));
1042     if (vec == CEED_VECTOR_ACTIVE) {
1043       CeedCall(CeedElemRestrictionReferenceCopy(rstr_coarse, &rstr));
1044       CeedCall(CeedBasisReferenceCopy(basis_coarse, &basis));
1045       if (!rstr_fine) CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_fine));
1046     } else {
1047       CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr));
1048       CeedCall(CeedOperatorFieldGetBasis(output_fields[i], &basis));
1049     }
1050     CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec));
1051     CeedCall(CeedVectorDestroy(&vec));
1052     CeedCall(CeedElemRestrictionDestroy(&rstr));
1053     CeedCall(CeedBasisDestroy(&basis));
1054   }
1055   // -- Clone QFunctionAssemblyData
1056   {
1057     CeedQFunctionAssemblyData fine_data;
1058 
1059     CeedCall(CeedOperatorGetQFunctionAssemblyData(op_fine, &fine_data));
1060     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(fine_data, &(*op_coarse)->qf_assembled));
1061   }
1062 
1063   // Multiplicity vector
1064   if (op_restrict || op_prolong) {
1065     CeedVector          mult_e_vec;
1066     CeedRestrictionType rstr_type;
1067 
1068     CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type));
1069     CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED,
1070               "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported");
1071     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
1072     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
1073     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
1074     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
1075     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
1076     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
1077     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
1078     CeedCall(CeedVectorDestroy(&mult_e_vec));
1079     CeedCall(CeedVectorReciprocal(mult_vec));
1080   }
1081 
1082   // Clone name
1083   bool   has_name = op_fine->name;
1084   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
1085   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
1086 
1087   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
1088   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
1089             "Prolongation or restriction operator creation requires coarse-to-fine basis");
1090 
1091   // Restriction/Prolongation Operators
1092   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
1093 
1094   // Restriction
1095   if (op_restrict) {
1096     CeedInt             *num_comp_r_data;
1097     CeedQFunctionContext ctx_r;
1098     CeedQFunction        qf_restrict;
1099 
1100     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
1101     CeedCall(CeedCalloc(1, &num_comp_r_data));
1102     num_comp_r_data[0] = num_comp;
1103     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
1104     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
1105     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
1106     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
1107     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
1108     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
1109     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
1110     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
1111 
1112     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
1113     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
1114     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
1115     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
1116 
1117     // Set name
1118     char *restriction_name;
1119 
1120     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
1121     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
1122     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
1123     CeedCall(CeedFree(&restriction_name));
1124 
1125     // Check
1126     CeedCall(CeedOperatorCheckReady(*op_restrict));
1127 
1128     // Cleanup
1129     CeedCall(CeedQFunctionDestroy(&qf_restrict));
1130   }
1131 
1132   // Prolongation
1133   if (op_prolong) {
1134     CeedInt             *num_comp_p_data;
1135     CeedQFunctionContext ctx_p;
1136     CeedQFunction        qf_prolong;
1137 
1138     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
1139     CeedCall(CeedCalloc(1, &num_comp_p_data));
1140     num_comp_p_data[0] = num_comp;
1141     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
1142     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
1143     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
1144     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
1145     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
1146     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
1147     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
1148     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
1149 
1150     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
1151     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
1152     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
1153     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
1154 
1155     // Set name
1156     char *prolongation_name;
1157 
1158     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
1159     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
1160     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
1161     CeedCall(CeedFree(&prolongation_name));
1162 
1163     // Check
1164     CeedCall(CeedOperatorCheckReady(*op_prolong));
1165 
1166     // Cleanup
1167     CeedCall(CeedQFunctionDestroy(&qf_prolong));
1168   }
1169 
1170   // Check
1171   CeedCall(CeedOperatorCheckReady(*op_coarse));
1172 
1173   // Cleanup
1174   CeedCall(CeedDestroy(&ceed));
1175   CeedCall(CeedVectorDestroy(&mult_vec));
1176   CeedCall(CeedElemRestrictionDestroy(&rstr_fine));
1177   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
1178   CeedCall(CeedBasisDestroy(&basis_c_to_f));
1179   return CEED_ERROR_SUCCESS;
1180 }
1181 
1182 /**
1183   @brief Build 1D mass matrix and Laplacian with perturbation
1184 
1185   @param[in]  interp_1d   Interpolation matrix in one dimension
1186   @param[in]  grad_1d     Gradient matrix in one dimension
1187   @param[in]  q_weight_1d Quadrature weights in one dimension
1188   @param[in]  P_1d        Number of basis nodes in one dimension
1189   @param[in]  Q_1d        Number of quadrature points in one dimension
1190   @param[in]  dim         Dimension of basis
1191   @param[out] mass        Assembled mass matrix in one dimension
1192   @param[out] laplace     Assembled perturbed Laplacian in one dimension
1193 
1194   @return An error code: 0 - success, otherwise - failure
1195 
1196   @ref Developer
1197 **/
1198 CeedPragmaOptimizeOff
1199 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
1200                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
1201   for (CeedInt i = 0; i < P_1d; i++) {
1202     for (CeedInt j = 0; j < P_1d; j++) {
1203       CeedScalar sum = 0.0;
1204       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
1205       mass[i + j * P_1d] = sum;
1206     }
1207   }
1208   // -- Laplacian
1209   for (CeedInt i = 0; i < P_1d; i++) {
1210     for (CeedInt j = 0; j < P_1d; j++) {
1211       CeedScalar sum = 0.0;
1212 
1213       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
1214       laplace[i + j * P_1d] = sum;
1215     }
1216   }
1217   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
1218   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
1219   return CEED_ERROR_SUCCESS;
1220 }
1221 CeedPragmaOptimizeOn
1222 
1223 /// @}
1224 
1225 /// ----------------------------------------------------------------------------
1226 /// CeedOperator Backend API
1227 /// ----------------------------------------------------------------------------
1228 /// @addtogroup CeedOperatorBackend
1229 /// @{
1230 
1231 /**
1232   @brief Select correct basis matrix pointer based on @ref CeedEvalMode
1233 
1234   @param[in]  basis     `CeedBasis` from which to get the basis matrix
1235   @param[in]  eval_mode Current basis evaluation mode
1236   @param[in]  identity  Pointer to identity matrix
1237   @param[out] basis_ptr `CeedBasis` pointer to set
1238 
1239   @ref Backend
1240 **/
1241 int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
1242   switch (eval_mode) {
1243     case CEED_EVAL_NONE:
1244       *basis_ptr = identity;
1245       break;
1246     case CEED_EVAL_INTERP:
1247       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
1248       break;
1249     case CEED_EVAL_GRAD:
1250       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
1251       break;
1252     case CEED_EVAL_DIV:
1253       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
1254       break;
1255     case CEED_EVAL_CURL:
1256       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
1257       break;
1258     case CEED_EVAL_WEIGHT:
1259       break;  // Caught by QF Assembly
1260   }
1261   assert(*basis_ptr != NULL);
1262   return CEED_ERROR_SUCCESS;
1263 }
1264 
1265 /**
1266   @brief Create point block restriction for active `CeedOperatorField`
1267 
1268   @param[in]  rstr             Original `CeedElemRestriction` for active field
1269   @param[out] point_block_rstr Address of the variable where the newly created `CeedElemRestriction` will be stored
1270 
1271   @return An error code: 0 - success, otherwise - failure
1272 
1273   @ref Backend
1274 **/
1275 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) {
1276   Ceed           ceed;
1277   CeedInt        num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets;
1278   CeedSize       l_size;
1279   const CeedInt *offsets;
1280 
1281   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
1282   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
1283 
1284   // Expand offsets
1285   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
1286   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
1287   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
1288   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
1289   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
1290   shift = num_comp;
1291   if (comp_stride != 1) shift *= num_comp;
1292   CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets));
1293   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
1294     point_block_offsets[i] = offsets[i] * shift;
1295   }
1296 
1297   // Create new restriction
1298   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
1299                                      point_block_offsets, point_block_rstr));
1300 
1301   // Cleanup
1302   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
1303   CeedCall(CeedDestroy(&ceed));
1304   return CEED_ERROR_SUCCESS;
1305 }
1306 
1307 /**
1308   @brief Get `CeedQFunctionAssemblyData`
1309 
1310   @param[in]  op   `CeedOperator` to assemble
1311   @param[out] data `CeedQFunctionAssemblyData`
1312 
1313   @return An error code: 0 - success, otherwise - failure
1314 
1315   @ref Backend
1316 **/
1317 int CeedOperatorGetQFunctionAssemblyData(CeedOperator op, CeedQFunctionAssemblyData *data) {
1318   if (!op->qf_assembled) {
1319     CeedQFunctionAssemblyData data;
1320 
1321     CeedCall(CeedQFunctionAssemblyDataCreate(op->ceed, &data));
1322     op->qf_assembled = data;
1323   }
1324   *data = op->qf_assembled;
1325   return CEED_ERROR_SUCCESS;
1326 }
1327 
1328 /**
1329   @brief Create object holding `CeedQFunction` assembly data for `CeedOperator`
1330 
1331   @param[in]  ceed `Ceed` object used to create the `CeedQFunctionAssemblyData`
1332   @param[out] data Address of the variable where the newly created `CeedQFunctionAssemblyData` will be stored
1333 
1334   @return An error code: 0 - success, otherwise - failure
1335 
1336   @ref Backend
1337 **/
1338 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
1339   CeedCall(CeedCalloc(1, data));
1340   (*data)->ref_count = 1;
1341   (*data)->ceed      = ceed;
1342   CeedCall(CeedReference(ceed));
1343   return CEED_ERROR_SUCCESS;
1344 }
1345 
1346 /**
1347   @brief Increment the reference counter for a `CeedQFunctionAssemblyData`
1348 
1349   @param[in,out] data `CeedQFunctionAssemblyData` to increment the reference counter
1350 
1351   @return An error code: 0 - success, otherwise - failure
1352 
1353   @ref Backend
1354 **/
1355 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1356   data->ref_count++;
1357   return CEED_ERROR_SUCCESS;
1358 }
1359 
1360 /**
1361   @brief Set re-use of `CeedQFunctionAssemblyData`
1362 
1363   @param[in,out] data       `CeedQFunctionAssemblyData` to mark for reuse
1364   @param[in]     reuse_data Boolean flag indicating data re-use
1365 
1366   @return An error code: 0 - success, otherwise - failure
1367 
1368   @ref Backend
1369 **/
1370 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
1371   data->reuse_data        = reuse_data;
1372   data->needs_data_update = true;
1373   return CEED_ERROR_SUCCESS;
1374 }
1375 
1376 /**
1377   @brief Mark `CeedQFunctionAssemblyData` as stale
1378 
1379   @param[in,out] data              `CeedQFunctionAssemblyData` to mark as stale
1380   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
1381 
1382   @return An error code: 0 - success, otherwise - failure
1383 
1384   @ref Backend
1385 **/
1386 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
1387   data->needs_data_update = needs_data_update;
1388   return CEED_ERROR_SUCCESS;
1389 }
1390 
1391 /**
1392   @brief Determine if `CeedQFunctionAssemblyData` needs update
1393 
1394   @param[in]  data             `CeedQFunctionAssemblyData` to mark as stale
1395   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1396 
1397   @return An error code: 0 - success, otherwise - failure
1398 
1399   @ref Backend
1400 **/
1401 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1402   *is_update_needed = !data->reuse_data || data->needs_data_update;
1403   return CEED_ERROR_SUCCESS;
1404 }
1405 
1406 /**
1407   @brief Copy the pointer to a `CeedQFunctionAssemblyData`.
1408 
1409   Both pointers should be destroyed with @ref CeedQFunctionAssemblyDataDestroy().
1410 
1411   Note: If the value of ` *data_copy` passed to this function is non-`NULL` , then it is assumed that ` *data_copy` is a pointer to a `CeedQFunctionAssemblyData`.
1412         This `CeedQFunctionAssemblyData` will be destroyed if ` *data_copy` is the only reference to this `CeedQFunctionAssemblyData`.
1413 
1414   @param[in]     data      `CeedQFunctionAssemblyData` to copy reference to
1415   @param[in,out] data_copy Variable to store copied reference
1416 
1417   @return An error code: 0 - success, otherwise - failure
1418 
1419   @ref Backend
1420 **/
1421 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1422   CeedCall(CeedQFunctionAssemblyDataReference(data));
1423   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1424   *data_copy = data;
1425   return CEED_ERROR_SUCCESS;
1426 }
1427 
1428 /**
1429   @brief Get setup status for internal objects for `CeedQFunctionAssemblyData`
1430 
1431   @param[in]  data     `CeedQFunctionAssemblyData` to retrieve status
1432   @param[out] is_setup Boolean flag for setup status
1433 
1434   @return An error code: 0 - success, otherwise - failure
1435 
1436   @ref Backend
1437 **/
1438 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1439   *is_setup = data->is_setup;
1440   return CEED_ERROR_SUCCESS;
1441 }
1442 
1443 /**
1444   @brief Set internal objects for `CeedQFunctionAssemblyData`
1445 
1446   @param[in,out] data `CeedQFunctionAssemblyData` to set objects
1447   @param[in]     vec  `CeedVector` to store assembled `CeedQFunction` at quadrature points
1448   @param[in]     rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
1449 
1450   @return An error code: 0 - success, otherwise - failure
1451 
1452   @ref Backend
1453 **/
1454 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1455   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1456   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1457 
1458   data->is_setup = true;
1459   return CEED_ERROR_SUCCESS;
1460 }
1461 
1462 /**
1463   @brief Get internal objects for `CeedQFunctionAssemblyData`
1464 
1465   @param[in,out] data `CeedQFunctionAssemblyData` to set objects
1466   @param[out]    vec  `CeedVector` to store assembled `CeedQFunction` at quadrature points
1467   @param[out]    rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
1468 
1469   @return An error code: 0 - success, otherwise - failure
1470 
1471   @ref Backend
1472 **/
1473 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1474   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1475 
1476   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1477   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1478   return CEED_ERROR_SUCCESS;
1479 }
1480 
1481 /**
1482   @brief Destroy `CeedQFunctionAssemblyData`
1483 
1484   @param[in,out] data  `CeedQFunctionAssemblyData` to destroy
1485 
1486   @return An error code: 0 - success, otherwise - failure
1487 
1488   @ref Backend
1489 **/
1490 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1491   if (!*data || --(*data)->ref_count > 0) {
1492     *data = NULL;
1493     return CEED_ERROR_SUCCESS;
1494   }
1495   CeedCall(CeedDestroy(&(*data)->ceed));
1496   CeedCall(CeedVectorDestroy(&(*data)->vec));
1497   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1498 
1499   CeedCall(CeedFree(data));
1500   return CEED_ERROR_SUCCESS;
1501 }
1502 
1503 /**
1504   @brief Get `CeedOperatorAssemblyData`
1505 
1506   @param[in]  op   `CeedOperator` to assemble
1507   @param[out] data `CeedOperatorAssemblyData`
1508 
1509   @return An error code: 0 - success, otherwise - failure
1510 
1511   @ref Backend
1512 **/
1513 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1514   if (!op->op_assembled) {
1515     CeedOperatorAssemblyData data;
1516 
1517     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1518     op->op_assembled = data;
1519   }
1520   *data = op->op_assembled;
1521   return CEED_ERROR_SUCCESS;
1522 }
1523 
1524 /**
1525   @brief Create object holding `CeedOperator` assembly data.
1526 
1527   The `CeedOperatorAssemblyData` holds an array with references to every active `CeedBasis` used in the `CeedOperator`.
1528   An array with references to the corresponding active `CeedElemRestriction` is also stored.
1529   For each active `CeedBasis, the `CeedOperatorAssemblyData` holds an array of all input and output @ref CeedEvalMode for this `CeedBasis`.
1530   The `CeedOperatorAssemblyData` holds an array of offsets for indexing into the assembled `CeedQFunction` arrays to the row representing each @ref CeedEvalMode.
1531   The number of input columns across all active bases for the assembled `CeedQFunction` is also stored.
1532   Lastly, the `CeedOperatorAssembly` data holds assembled matrices representing the full action of the `CeedBasis` for all @ref CeedEvalMode.
1533 
1534   @param[in]  ceed `Ceed` object used to create the `CeedOperatorAssemblyData`
1535   @param[in]  op   `CeedOperator` to be assembled
1536   @param[out] data Address of the variable where the newly created `CeedOperatorAssemblyData` will be stored
1537 
1538   @return An error code: 0 - success, otherwise - failure
1539 
1540   @ref Backend
1541 **/
1542 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1543   CeedInt             num_active_bases_in = 0, num_active_bases_out = 0, offset = 0;
1544   CeedInt             num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL;
1545   CeedSize          **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1546   CeedEvalMode      **eval_modes_in = NULL, **eval_modes_out = NULL;
1547   CeedQFunctionField *qf_fields;
1548   CeedQFunction       qf;
1549   CeedOperatorField  *op_fields;
1550   bool                is_composite;
1551 
1552   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1553   CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators.");
1554 
1555   // Allocate
1556   CeedCall(CeedCalloc(1, data));
1557   (*data)->ceed = ceed;
1558   CeedCall(CeedReference(ceed));
1559 
1560   // Build OperatorAssembly data
1561   CeedCall(CeedOperatorGetQFunction(op, &qf));
1562 
1563   // Determine active input basis
1564   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1565   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1566   for (CeedInt i = 0; i < num_input_fields; i++) {
1567     CeedVector vec;
1568 
1569     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1570     if (vec == CEED_VECTOR_ACTIVE) {
1571       CeedInt      index = -1, num_comp, q_comp;
1572       CeedEvalMode eval_mode;
1573       CeedBasis    basis_in = NULL;
1574 
1575       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1576       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1577       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1578       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1579       for (CeedInt i = 0; i < num_active_bases_in; i++) {
1580         if ((*data)->active_bases_in[i] == basis_in) index = i;
1581       }
1582       if (index == -1) {
1583         CeedElemRestriction elem_rstr_in;
1584 
1585         index = num_active_bases_in;
1586         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in));
1587         (*data)->active_bases_in[num_active_bases_in] = NULL;
1588         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in]));
1589         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in));
1590         (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL;
1591         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1592         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in]));
1593         CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
1594         CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in));
1595         num_eval_modes_in[index] = 0;
1596         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in));
1597         eval_modes_in[index] = NULL;
1598         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in));
1599         eval_mode_offsets_in[index] = NULL;
1600         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in));
1601         (*data)->assembled_bases_in[index] = NULL;
1602         num_active_bases_in++;
1603       }
1604       if (eval_mode != CEED_EVAL_WEIGHT) {
1605         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1606         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1607         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1608         for (CeedInt d = 0; d < q_comp; d++) {
1609           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1610           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1611           offset += num_comp;
1612         }
1613         num_eval_modes_in[index] += q_comp;
1614       }
1615       CeedCall(CeedBasisDestroy(&basis_in));
1616     }
1617     CeedCall(CeedVectorDestroy(&vec));
1618   }
1619 
1620   // Determine active output basis
1621   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1622   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1623   offset = 0;
1624   for (CeedInt i = 0; i < num_output_fields; i++) {
1625     CeedVector vec;
1626 
1627     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1628     if (vec == CEED_VECTOR_ACTIVE) {
1629       CeedInt      index = -1, num_comp, q_comp;
1630       CeedEvalMode eval_mode;
1631       CeedBasis    basis_out = NULL;
1632 
1633       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1634       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1635       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1636       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1637       for (CeedInt i = 0; i < num_active_bases_out; i++) {
1638         if ((*data)->active_bases_out[i] == basis_out) index = i;
1639       }
1640       if (index == -1) {
1641         CeedElemRestriction elem_rstr_out;
1642 
1643         index = num_active_bases_out;
1644         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out));
1645         (*data)->active_bases_out[num_active_bases_out] = NULL;
1646         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out]));
1647         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out));
1648         (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL;
1649         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1650         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out]));
1651         CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
1652         CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out));
1653         num_eval_modes_out[index] = 0;
1654         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out));
1655         eval_modes_out[index] = NULL;
1656         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out));
1657         eval_mode_offsets_out[index] = NULL;
1658         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out));
1659         (*data)->assembled_bases_out[index] = NULL;
1660         num_active_bases_out++;
1661       }
1662       if (eval_mode != CEED_EVAL_WEIGHT) {
1663         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1664         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1665         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1666         for (CeedInt d = 0; d < q_comp; d++) {
1667           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1668           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1669           offset += num_comp;
1670         }
1671         num_eval_modes_out[index] += q_comp;
1672       }
1673       CeedCall(CeedBasisDestroy(&basis_out));
1674     }
1675     CeedCall(CeedVectorDestroy(&vec));
1676   }
1677   CeedCall(CeedQFunctionDestroy(&qf));
1678   (*data)->num_active_bases_in   = num_active_bases_in;
1679   (*data)->num_eval_modes_in     = num_eval_modes_in;
1680   (*data)->eval_modes_in         = eval_modes_in;
1681   (*data)->eval_mode_offsets_in  = eval_mode_offsets_in;
1682   (*data)->num_active_bases_out  = num_active_bases_out;
1683   (*data)->num_eval_modes_out    = num_eval_modes_out;
1684   (*data)->eval_modes_out        = eval_modes_out;
1685   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1686   (*data)->num_output_components = offset;
1687   return CEED_ERROR_SUCCESS;
1688 }
1689 
1690 /**
1691   @brief Get `CeedOperator` @ref CeedEvalMode for assembly.
1692 
1693   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1694 
1695   @param[in]  data                  `CeedOperatorAssemblyData`
1696   @param[out] num_active_bases_in   Total number of active bases for input
1697   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input @ref CeedEvalMode, or `NULL`.
1698                                       `eval_modes_in[0]` holds an array of eval modes for the first active `CeedBasis`.
1699   @param[out] eval_modes_in         Pointer to hold arrays of input @ref CeedEvalMode, or `NULL`
1700   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point
1701   @param[out] num_active_bases_out  Total number of active bases for output
1702   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output @ref CeedEvalMode, or `NULL`
1703   @param[out] eval_modes_out        Pointer to hold arrays of output @ref CeedEvalMode, or `NULL`
1704   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1705   @param[out] num_output_components The number of columns in the assembled `CeedQFunction` matrix for each quadrature point, including contributions of all active bases
1706 
1707   @return An error code: 0 - success, otherwise - failure
1708 
1709   @ref Backend
1710 **/
1711 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in,
1712                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out,
1713                                          CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out,
1714                                          CeedSize *num_output_components) {
1715   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1716   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1717   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1718   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1719   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1720   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1721   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1722   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1723   if (num_output_components) *num_output_components = data->num_output_components;
1724   return CEED_ERROR_SUCCESS;
1725 }
1726 
1727 /**
1728   @brief Get `CeedOperator` `CeedBasis` data for assembly.
1729 
1730   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1731 
1732   @param[in]  data                 `CeedOperatorAssemblyData`
1733   @param[out] num_active_bases_in  Number of active input bases, or `NULL`
1734   @param[out] active_bases_in      Pointer to hold active input `CeedBasis`, or `NULL`
1735   @param[out] assembled_bases_in   Pointer to hold assembled active input `B` , or `NULL`
1736   @param[out] num_active_bases_out Number of active output bases, or `NULL`
1737   @param[out] active_bases_out     Pointer to hold active output `CeedBasis`, or `NULL`
1738   @param[out] assembled_bases_out  Pointer to hold assembled active output `B` , or `NULL`
1739 
1740   @return An error code: 0 - success, otherwise - failure
1741 
1742   @ref Backend
1743 **/
1744 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in,
1745                                      const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out,
1746                                      const CeedScalar ***assembled_bases_out) {
1747   // Assemble B_in, B_out if needed
1748   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1749     CeedInt num_qpts;
1750 
1751     if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts));
1752     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts));
1753     for (CeedInt b = 0; b < data->num_active_bases_in; b++) {
1754       bool        has_eval_none = false;
1755       CeedInt     num_nodes;
1756       CeedScalar *B_in = NULL, *identity = NULL;
1757 
1758       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes));
1759       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1760 
1761       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1762         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1763       }
1764       if (has_eval_none) {
1765         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1766         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1767           identity[i * num_nodes + i] = 1.0;
1768         }
1769       }
1770 
1771       for (CeedInt q = 0; q < num_qpts; q++) {
1772         for (CeedInt n = 0; n < num_nodes; n++) {
1773           CeedInt      d_in              = 0, q_comp_in;
1774           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1775 
1776           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1777             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1778             const CeedScalar *B  = NULL;
1779 
1780             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B));
1781             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in));
1782             if (q_comp_in > 1) {
1783               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1784               else B = &B[(++d_in) * num_qpts * num_nodes];
1785             }
1786             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1787             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1788           }
1789         }
1790       }
1791       if (identity) CeedCall(CeedFree(&identity));
1792       data->assembled_bases_in[b] = B_in;
1793     }
1794   }
1795 
1796   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1797     CeedInt num_qpts;
1798 
1799     if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts));
1800     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts));
1801     for (CeedInt b = 0; b < data->num_active_bases_out; b++) {
1802       bool        has_eval_none = false;
1803       CeedInt     num_nodes;
1804       CeedScalar *B_out = NULL, *identity = NULL;
1805 
1806       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes));
1807       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1808 
1809       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1810         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1811       }
1812       if (has_eval_none) {
1813         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1814         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1815           identity[i * num_nodes + i] = 1.0;
1816         }
1817       }
1818 
1819       for (CeedInt q = 0; q < num_qpts; q++) {
1820         for (CeedInt n = 0; n < num_nodes; n++) {
1821           CeedInt      d_out              = 0, q_comp_out;
1822           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1823 
1824           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1825             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1826             const CeedScalar *B  = NULL;
1827 
1828             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B));
1829             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out));
1830             if (q_comp_out > 1) {
1831               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1832               else B = &B[(++d_out) * num_qpts * num_nodes];
1833             }
1834             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1835             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1836           }
1837         }
1838       }
1839       if (identity) CeedCall(CeedFree(&identity));
1840       data->assembled_bases_out[b] = B_out;
1841     }
1842   }
1843 
1844   // Pass out assembled data
1845   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1846   if (active_bases_in) *active_bases_in = data->active_bases_in;
1847   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1848   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1849   if (active_bases_out) *active_bases_out = data->active_bases_out;
1850   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1851   return CEED_ERROR_SUCCESS;
1852 }
1853 
1854 /**
1855   @brief Get `CeedOperator` `CeedBasis` data for assembly.
1856 
1857   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1858 
1859   @param[in]  data                      `CeedOperatorAssemblyData`
1860   @param[out] num_active_elem_rstrs_in  Number of active input element restrictions, or `NULL`
1861   @param[out] active_elem_rstrs_in      Pointer to hold active input `CeedElemRestriction`, or `NULL`
1862   @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or `NULL`
1863   @param[out] active_elem_rstrs_out     Pointer to hold active output `CeedElemRestriction`, or `NULL`
1864 
1865   @return An error code: 0 - success, otherwise - failure
1866 
1867   @ref Backend
1868 **/
1869 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in,
1870                                                 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out,
1871                                                 CeedElemRestriction **active_elem_rstrs_out) {
1872   if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in;
1873   if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in;
1874   if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out;
1875   if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out;
1876   return CEED_ERROR_SUCCESS;
1877 }
1878 
1879 /**
1880   @brief Destroy `CeedOperatorAssemblyData`
1881 
1882   @param[in,out] data `CeedOperatorAssemblyData` to destroy
1883 
1884   @return An error code: 0 - success, otherwise - failure
1885 
1886   @ref Backend
1887 **/
1888 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1889   if (!*data) {
1890     *data = NULL;
1891     return CEED_ERROR_SUCCESS;
1892   }
1893   CeedCall(CeedDestroy(&(*data)->ceed));
1894   for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) {
1895     CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b]));
1896     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b]));
1897     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1898     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1899     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1900   }
1901   for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) {
1902     CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b]));
1903     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b]));
1904     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1905     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1906     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1907   }
1908   CeedCall(CeedFree(&(*data)->active_bases_in));
1909   CeedCall(CeedFree(&(*data)->active_bases_out));
1910   CeedCall(CeedFree(&(*data)->active_elem_rstrs_in));
1911   CeedCall(CeedFree(&(*data)->active_elem_rstrs_out));
1912   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1913   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1914   CeedCall(CeedFree(&(*data)->eval_modes_in));
1915   CeedCall(CeedFree(&(*data)->eval_modes_out));
1916   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1917   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1918   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1919   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1920 
1921   CeedCall(CeedFree(data));
1922   return CEED_ERROR_SUCCESS;
1923 }
1924 
1925 /**
1926   @brief Retrieve fallback `CeedOperator` with a reference `Ceed` for advanced `CeedOperator` functionality
1927 
1928   @param[in]  op          `CeedOperator` to retrieve fallback for
1929   @param[out] op_fallback Fallback `CeedOperator`
1930 
1931   @return An error code: 0 - success, otherwise - failure
1932 
1933   @ref Backend
1934 **/
1935 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
1936   // Create if needed
1937   if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op));
1938   if (op->op_fallback) {
1939     bool is_debug;
1940     Ceed ceed;
1941 
1942     CeedCall(CeedOperatorGetCeed(op, &ceed));
1943     CeedCall(CeedIsDebug(ceed, &is_debug));
1944     if (is_debug) {
1945       Ceed        ceed_fallback;
1946       const char *resource, *resource_fallback, *op_name;
1947 
1948       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
1949       CeedCall(CeedGetResource(ceed, &resource));
1950       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
1951       CeedCall(CeedOperatorGetName(op, &op_name));
1952 
1953       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
1954       CeedDebug(ceed, "Falling back from Operator with backend %s at address %p to Operator with backend %s at address %p for CeedOperator \"%s\"\n",
1955                 resource, op, resource_fallback, op->op_fallback, op_name);
1956       CeedCall(CeedDestroy(&ceed_fallback));
1957     }
1958     CeedCall(CeedDestroy(&ceed));
1959   }
1960   *op_fallback = op->op_fallback;
1961   return CEED_ERROR_SUCCESS;
1962 }
1963 
1964 /**
1965   @brief Get the parent `CeedOperator` for a fallback `CeedOperator`
1966 
1967   @param[in]  op     `CeedOperator` context
1968   @param[out] parent Variable to store parent `CeedOperator` context
1969 
1970   @return An error code: 0 - success, otherwise - failure
1971 
1972   @ref Backend
1973 **/
1974 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) {
1975   *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL;
1976   return CEED_ERROR_SUCCESS;
1977 }
1978 
1979 /**
1980   @brief Get the `Ceed` context of the parent `CeedOperator` for a fallback `CeedOperator`
1981 
1982   @param[in]  op     `CeedOperator` context
1983   @param[out] parent Variable to store parent `Ceed` context
1984 
1985   @return An error code: 0 - success, otherwise - failure
1986 
1987   @ref Backend
1988 **/
1989 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) {
1990   *parent = NULL;
1991   if (op->op_fallback_parent) CeedCall(CeedReferenceCopy(op->op_fallback_parent->ceed, parent));
1992   else CeedCall(CeedReferenceCopy(CeedOperatorReturnCeed(op), parent));
1993   return CEED_ERROR_SUCCESS;
1994 }
1995 
1996 /// @}
1997 
1998 /// ----------------------------------------------------------------------------
1999 /// CeedOperator Public API
2000 /// ----------------------------------------------------------------------------
2001 /// @addtogroup CeedOperatorUser
2002 /// @{
2003 
2004 /**
2005   @brief Assemble a linear `CeedQFunction` associated with a `CeedOperator`.
2006 
2007   This returns a `CeedVector` containing a matrix at each quadrature point providing the action of the `CeedQFunction` associated with the `CeedOperator`.
2008   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices representing the action of the `CeedQFunction` for a corresponding quadrature point on an element.
2009 
2010   Inputs and outputs are in the order provided by the user when adding `CeedOperator` fields.
2011   For example, a `CeedQFunction` with inputs `u` and `gradu` and outputs `gradv` and `v` , provided in that order, would result in an assembled `CeedQFunction` that consists of `(1 + dim) x (dim + 1)` matrices at each quadrature point acting on the input ` [u, du_0, du_1]` and producing the output `[dv_0, dv_1, v]`.
2012 
2013   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2014 
2015   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2016   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
2017   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
2018   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2019 
2020   @return An error code: 0 - success, otherwise - failure
2021 
2022   @ref User
2023 **/
2024 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
2025   CeedCall(CeedOperatorCheckReady(op));
2026 
2027   if (op->LinearAssembleQFunction) {
2028     // Backend version
2029     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
2030   } else {
2031     // Operator fallback
2032     CeedOperator op_fallback;
2033 
2034     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleQFunction\n");
2035     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2036     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
2037     else return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
2038   }
2039   return CEED_ERROR_SUCCESS;
2040 }
2041 
2042 /**
2043   @brief Assemble `CeedQFunction` and store result internally.
2044 
2045   Return copied references of stored data to the caller.
2046   Caller is responsible for ownership and destruction of the copied references.
2047   See also @ref CeedOperatorLinearAssembleQFunction().
2048 
2049   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
2050         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
2051 
2052   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2053   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
2054   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
2055   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2056 
2057   @return An error code: 0 - success, otherwise - failure
2058 
2059   @ref User
2060 **/
2061 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
2062   return CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(op, true, assembled, rstr, request);
2063 }
2064 
2065 /**
2066   @brief Assemble the diagonal of a square linear `CeedOperator`
2067 
2068   This overwrites a `CeedVector` with the diagonal of a linear `CeedOperator`.
2069 
2070   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2071 
2072   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2073 
2074   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2075   @param[out] assembled `CeedVector` to store assembled `CeedOperator` diagonal
2076   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2077 
2078   @return An error code: 0 - success, otherwise - failure
2079 
2080   @ref User
2081 **/
2082 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2083   bool     is_composite;
2084   CeedSize input_size = 0, output_size = 0;
2085 
2086   CeedCall(CeedOperatorCheckReady(op));
2087   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2088 
2089   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2090   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2091 
2092   // Early exit for empty operator
2093   if (!is_composite) {
2094     CeedInt num_elem = 0;
2095 
2096     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2097     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2098   }
2099 
2100   if (op->LinearAssembleDiagonal) {
2101     // Backend version
2102     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
2103     return CEED_ERROR_SUCCESS;
2104   } else if (op->LinearAssembleAddDiagonal) {
2105     // Backend version with zeroing first
2106     CeedCall(CeedVectorSetValue(assembled, 0.0));
2107     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
2108     return CEED_ERROR_SUCCESS;
2109   } else if (is_composite) {
2110     // Default to summing contributions of suboperators
2111     CeedCall(CeedVectorSetValue(assembled, 0.0));
2112     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
2113     return CEED_ERROR_SUCCESS;
2114   } else {
2115     // Operator fallback
2116     CeedOperator op_fallback;
2117 
2118     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleDiagonal\n");
2119     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2120     if (op_fallback) {
2121       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
2122       return CEED_ERROR_SUCCESS;
2123     }
2124   }
2125   // Default interface implementation
2126   CeedCall(CeedVectorSetValue(assembled, 0.0));
2127   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
2128   return CEED_ERROR_SUCCESS;
2129 }
2130 
2131 /**
2132   @brief Assemble the diagonal of a square linear `CeedOperator`.
2133 
2134   This sums into a `CeedVector` the diagonal of a linear `CeedOperator`.
2135 
2136   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2137 
2138   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2139 
2140   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2141   @param[out] assembled `CeedVector` to store assembled `CeedOperator` diagonal
2142   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2143 
2144   @return An error code: 0 - success, otherwise - failure
2145 
2146   @ref User
2147 **/
2148 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2149   bool     is_composite;
2150   CeedSize input_size = 0, output_size = 0;
2151 
2152   CeedCall(CeedOperatorCheckReady(op));
2153   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2154 
2155   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2156   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2157 
2158   // Early exit for empty operator
2159   if (!is_composite) {
2160     CeedInt num_elem = 0;
2161 
2162     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2163     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2164   }
2165 
2166   if (op->LinearAssembleAddDiagonal) {
2167     // Backend version
2168     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
2169     return CEED_ERROR_SUCCESS;
2170   } else if (is_composite) {
2171     // Default to summing contributions of suboperators
2172     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
2173     return CEED_ERROR_SUCCESS;
2174   } else {
2175     // Operator fallback
2176     CeedOperator op_fallback;
2177 
2178     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleAddDiagonal\n");
2179     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2180     if (op_fallback) {
2181       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
2182       return CEED_ERROR_SUCCESS;
2183     }
2184   }
2185   // Default interface implementation
2186   CeedCall(CeedSingleOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
2187   return CEED_ERROR_SUCCESS;
2188 }
2189 
2190 /**
2191    @brief Fully assemble the point-block diagonal pattern of a linear `CeedOperator`.
2192 
2193    Expected to be used in conjunction with @ref CeedOperatorLinearAssemblePointBlockDiagonal().
2194 
2195    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2196    Note that the `(i, j)` pairs are unique.
2197    This function returns the number of entries and their `(i, j)` locations, while @ref CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in the same ordering.
2198 
2199    This will generally be slow unless your operator is low-order.
2200 
2201    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2202 
2203    @param[in]  op          `CeedOperator` to assemble
2204    @param[out] num_entries Number of entries in coordinate nonzero pattern
2205    @param[out] rows        Row number for each entry
2206    @param[out] cols        Column number for each entry
2207 
2208    @ref User
2209 **/
2210 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2211   bool          is_composite;
2212   CeedInt       num_active_components, num_sub_operators;
2213   CeedOperator *sub_operators;
2214 
2215   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2216 
2217   CeedSize input_size = 0, output_size = 0;
2218   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2219   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2220 
2221   if (is_composite) {
2222     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_operators));
2223     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2224   } else {
2225     sub_operators     = &op;
2226     num_sub_operators = 1;
2227   }
2228 
2229   // Verify operator can be assembled correctly
2230   {
2231     CeedOperatorAssemblyData data;
2232     CeedInt                  num_active_elem_rstrs, comp_stride;
2233     CeedElemRestriction     *active_elem_rstrs;
2234 
2235     // Get initial values to check against
2236     CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data));
2237     CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
2238     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride));
2239     CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components));
2240 
2241     // Verify that all active element restrictions have same component stride and number of components
2242     for (CeedInt k = 0; k < num_sub_operators; k++) {
2243       CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data));
2244       CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
2245       for (CeedInt i = 0; i < num_active_elem_rstrs; i++) {
2246         CeedInt comp_stride_sub, num_active_components_sub;
2247 
2248         CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub));
2249         CeedCheck(comp_stride == comp_stride_sub, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION,
2250                   "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub);
2251         CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub));
2252         CeedCheck(num_active_components == num_active_components_sub, CeedOperatorReturnCeed(op), CEED_ERROR_INCOMPATIBLE,
2253                   "All suboperators must have the same number of output components."
2254                   " Previous: %" CeedInt_FMT " Current: %" CeedInt_FMT,
2255                   num_active_components, num_active_components_sub);
2256       }
2257     }
2258   }
2259   *num_entries = input_size * num_active_components;
2260   CeedCall(CeedCalloc(*num_entries, rows));
2261   CeedCall(CeedCalloc(*num_entries, cols));
2262 
2263   for (CeedInt o = 0; o < num_sub_operators; o++) {
2264     CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr;
2265     CeedInt             comp_stride, num_elem, elem_size;
2266     const CeedInt      *offsets, *point_block_offsets;
2267 
2268     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr));
2269     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride));
2270     CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem));
2271     CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size));
2272     CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets));
2273 
2274     CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr));
2275     CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets));
2276 
2277     for (CeedSize i = 0; i < num_elem * elem_size; i++) {
2278       for (CeedInt c_out = 0; c_out < num_active_components; c_out++) {
2279         for (CeedInt c_in = 0; c_in < num_active_components; c_in++) {
2280           (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride;
2281           (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride;
2282         }
2283       }
2284     }
2285 
2286     CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets));
2287     CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets));
2288     CeedCall(CeedElemRestrictionDestroy(&active_elem_rstr));
2289     CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr));
2290   }
2291   return CEED_ERROR_SUCCESS;
2292 }
2293 
2294 /**
2295   @brief Assemble the point block diagonal of a square linear `CeedOperator`.
2296 
2297   This overwrites a `CeedVector` with the point block diagonal of a linear `CeedOperator`.
2298 
2299   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2300 
2301   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2302 
2303   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2304   @param[out] assembled `CeedVector` to store assembled `CeedOperator` point block diagonal, provided in row-major form with an `num_comp * num_comp` block at each node.
2305                           The dimensions of this vector are derived from the active vector for the `CeedOperator`.
2306                           The array has shape `[nodes, component out, component in]`.
2307   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2308 
2309   @return An error code: 0 - success, otherwise - failure
2310 
2311   @ref User
2312 **/
2313 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2314   bool     is_composite;
2315   CeedSize input_size = 0, output_size = 0;
2316 
2317   CeedCall(CeedOperatorCheckReady(op));
2318   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2319 
2320   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2321   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2322 
2323   // Early exit for empty operator
2324   if (!is_composite) {
2325     CeedInt num_elem = 0;
2326 
2327     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2328     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2329   }
2330 
2331   if (op->LinearAssemblePointBlockDiagonal) {
2332     // Backend version
2333     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
2334     return CEED_ERROR_SUCCESS;
2335   } else if (op->LinearAssembleAddPointBlockDiagonal) {
2336     // Backend version with zeroing first
2337     CeedCall(CeedVectorSetValue(assembled, 0.0));
2338     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2339     return CEED_ERROR_SUCCESS;
2340   } else {
2341     // Operator fallback
2342     CeedOperator op_fallback;
2343 
2344     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssemblePointBlockDiagonal\n");
2345     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2346     if (op_fallback) {
2347       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
2348       return CEED_ERROR_SUCCESS;
2349     }
2350   }
2351   // Default interface implementation
2352   CeedCall(CeedVectorSetValue(assembled, 0.0));
2353   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2354   return CEED_ERROR_SUCCESS;
2355 }
2356 
2357 /**
2358   @brief Assemble the point block diagonal of a square linear `CeedOperator`.
2359 
2360   This sums into a `CeedVector` with the point block diagonal of a linear `CeedOperator`.
2361 
2362   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2363 
2364   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2365 
2366   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2367   @param[out] assembled `CeedVector` to store assembled CeedOperator point block diagonal, provided in row-major form with an `num_comp * num_comp` block at each node.
2368                           The dimensions of this vector are derived from the active vector for the `CeedOperator`.
2369                           The array has shape `[nodes, component out, component in]`.
2370   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2371 
2372   @return An error code: 0 - success, otherwise - failure
2373 
2374   @ref User
2375 **/
2376 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2377   bool     is_composite;
2378   CeedSize input_size = 0, output_size = 0;
2379 
2380   CeedCall(CeedOperatorCheckReady(op));
2381   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2382 
2383   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2384   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2385 
2386   // Early exit for empty operator
2387   if (!is_composite) {
2388     CeedInt num_elem = 0;
2389 
2390     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2391     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2392   }
2393 
2394   if (op->LinearAssembleAddPointBlockDiagonal) {
2395     // Backend version
2396     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
2397     return CEED_ERROR_SUCCESS;
2398   } else {
2399     // Operator fallback
2400     CeedOperator op_fallback;
2401 
2402     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleAddPointBlockDiagonal\n");
2403     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2404     if (op_fallback) {
2405       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
2406       return CEED_ERROR_SUCCESS;
2407     }
2408   }
2409   // Default interface implementation
2410   if (is_composite) {
2411     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
2412   } else {
2413     CeedCall(CeedSingleOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
2414   }
2415   return CEED_ERROR_SUCCESS;
2416 }
2417 
2418 /**
2419    @brief Fully assemble the nonzero pattern of a linear `CeedOperator`.
2420 
2421    Expected to be used in conjunction with @ref CeedOperatorLinearAssemble().
2422 
2423    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2424    Note that the `(i, j)` pairs are not unique and may repeat.
2425    This function returns the number of entries and their `(i, j)` locations, while @ref CeedOperatorLinearAssemble() provides the values in the same ordering.
2426 
2427    This will generally be slow unless your operator is low-order.
2428 
2429    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2430 
2431    @param[in]  op          `CeedOperator` to assemble
2432    @param[out] num_entries Number of entries in coordinate nonzero pattern
2433    @param[out] rows        Row number for each entry
2434    @param[out] cols        Column number for each entry
2435 
2436    @ref User
2437 **/
2438 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2439   bool          is_composite;
2440   CeedInt       num_suboperators, offset = 0;
2441   CeedSize      single_entries;
2442   CeedOperator *sub_operators;
2443 
2444   CeedCall(CeedOperatorCheckReady(op));
2445   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2446 
2447   if (op->LinearAssembleSymbolic) {
2448     // Backend version
2449     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
2450     return CEED_ERROR_SUCCESS;
2451   } else {
2452     // Operator fallback
2453     CeedOperator op_fallback;
2454 
2455     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleSymbolic\n");
2456     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2457     if (op_fallback) {
2458       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
2459       return CEED_ERROR_SUCCESS;
2460     }
2461   }
2462 
2463   // Default interface implementation
2464 
2465   // Count entries and allocate rows, cols arrays
2466   *num_entries = 0;
2467   if (is_composite) {
2468     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2469     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2470     for (CeedInt k = 0; k < num_suboperators; ++k) {
2471       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2472       *num_entries += single_entries;
2473     }
2474   } else {
2475     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
2476     *num_entries += single_entries;
2477   }
2478   CeedCall(CeedCalloc(*num_entries, rows));
2479   CeedCall(CeedCalloc(*num_entries, cols));
2480 
2481   // Assemble nonzero locations
2482   if (is_composite) {
2483     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2484     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2485     for (CeedInt k = 0; k < num_suboperators; ++k) {
2486       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
2487       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2488       offset += single_entries;
2489     }
2490   } else {
2491     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
2492   }
2493   return CEED_ERROR_SUCCESS;
2494 }
2495 
2496 /**
2497    @brief Fully assemble the nonzero entries of a linear operator.
2498 
2499    Expected to be used in conjunction with @ref CeedOperatorLinearAssembleSymbolic().
2500 
2501    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2502    Note that the `(i, j)` pairs are not unique and may repeat.
2503    This function returns the values of the nonzero entries to be added, their `(i, j)` locations are provided by @ref CeedOperatorLinearAssembleSymbolic().
2504 
2505    This will generally be slow unless your operator is low-order.
2506 
2507    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2508 
2509    @param[in]  op     `CeedOperator` to assemble
2510    @param[out] values Values to assemble into matrix
2511 
2512    @ref User
2513 **/
2514 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
2515   bool          is_composite;
2516   CeedInt       num_suboperators, offset = 0;
2517   CeedSize      single_entries = 0;
2518   CeedOperator *sub_operators;
2519 
2520   CeedCall(CeedOperatorCheckReady(op));
2521   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2522 
2523   // Early exit for empty operator
2524   if (!is_composite) {
2525     CeedInt num_elem = 0;
2526 
2527     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2528     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2529   }
2530 
2531   if (op->LinearAssemble) {
2532     // Backend version
2533     CeedCall(op->LinearAssemble(op, values));
2534     return CEED_ERROR_SUCCESS;
2535   } else if (is_composite) {
2536     // Default to summing contributions of suboperators
2537     CeedCall(CeedVectorSetValue(values, 0.0));
2538     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2539     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2540     for (CeedInt k = 0; k < num_suboperators; k++) {
2541       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2542       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2543       offset += single_entries;
2544     }
2545     return CEED_ERROR_SUCCESS;
2546   } else if (op->LinearAssembleSingle) {
2547     CeedCall(CeedVectorSetValue(values, 0.0));
2548     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2549     return CEED_ERROR_SUCCESS;
2550   } else {
2551     // Operator fallback
2552     CeedOperator op_fallback;
2553 
2554     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssemble\n");
2555     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2556     if (op_fallback) {
2557       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2558       return CEED_ERROR_SUCCESS;
2559     }
2560   }
2561 
2562   // Default to interface version if non-composite and no fallback
2563   CeedCall(CeedVectorSetValue(values, 0.0));
2564   CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2565   return CEED_ERROR_SUCCESS;
2566 }
2567 
2568 /**
2569   @brief Get the multiplicity of nodes across sub-operators in a composite `CeedOperator`.
2570 
2571   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2572 
2573   @param[in]  op               Composite `CeedOperator`
2574   @param[in]  num_skip_indices Number of sub-operators to skip
2575   @param[in]  skip_indices     Array of indices of sub-operators to skip
2576   @param[out] mult             Vector to store multiplicity (of size `l_size` )
2577 
2578   @return An error code: 0 - success, otherwise - failure
2579 
2580   @ref User
2581 **/
2582 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2583   Ceed                ceed;
2584   CeedInt             num_suboperators;
2585   CeedSize            l_vec_len;
2586   CeedScalar         *mult_array;
2587   CeedVector          ones_l_vec;
2588   CeedElemRestriction elem_rstr, mult_elem_rstr;
2589   CeedOperator       *sub_operators;
2590 
2591   CeedCall(CeedOperatorCheckReady(op));
2592 
2593   // Zero mult vector
2594   CeedCall(CeedVectorSetValue(mult, 0.0));
2595 
2596   // Get suboperators
2597   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2598   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2599   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2600 
2601   // Work vector
2602   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2603   CeedCall(CeedOperatorGetCeed(op, &ceed));
2604   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2605   CeedCall(CeedDestroy(&ceed));
2606   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2607   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2608 
2609   // Compute multiplicity across suboperators
2610   for (CeedInt i = 0; i < num_suboperators; i++) {
2611     const CeedScalar *sub_mult_array;
2612     CeedVector        sub_mult_l_vec, ones_e_vec;
2613 
2614     // -- Check for suboperator to skip
2615     for (CeedInt j = 0; j < num_skip_indices; j++) {
2616       if (skip_indices[j] == i) continue;
2617     }
2618 
2619     // -- Sub operator multiplicity
2620     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2621     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr));
2622     CeedCall(CeedElemRestrictionDestroy(&elem_rstr));
2623     CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2624     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2625     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2626     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2627     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2628     // ---- Flag every node present in the current suboperator
2629     for (CeedSize j = 0; j < l_vec_len; j++) {
2630       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2631     }
2632     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2633     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2634     CeedCall(CeedVectorDestroy(&ones_e_vec));
2635     CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr));
2636   }
2637   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2638   CeedCall(CeedVectorDestroy(&ones_l_vec));
2639   return CEED_ERROR_SUCCESS;
2640 }
2641 
2642 /**
2643   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator`, creating the prolongation basis from the fine and coarse grid interpolation.
2644 
2645   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2646 
2647   @param[in]  op_fine      Fine grid `CeedOperator`
2648   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2649   @param[in]  rstr_coarse  Coarse grid `CeedElemRestriction`
2650   @param[in]  basis_coarse Coarse grid active vector `CeedBasis`
2651   @param[out] op_coarse    Coarse grid `CeedOperator`
2652   @param[out] op_prolong   Coarse to fine `CeedOperator`, or `NULL`
2653   @param[out] op_restrict  Fine to coarse `CeedOperator`, or `NULL`
2654 
2655   @return An error code: 0 - success, otherwise - failure
2656 
2657   @ref User
2658 **/
2659 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2660                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2661   CeedBasis basis_c_to_f = NULL;
2662 
2663   CeedCall(CeedOperatorCheckReady(op_fine));
2664 
2665   // Build prolongation matrix, if required
2666   if (op_prolong || op_restrict) {
2667     CeedBasis basis_fine;
2668 
2669     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2670     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2671     CeedCall(CeedBasisDestroy(&basis_fine));
2672   }
2673 
2674   // Core code
2675   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2676   return CEED_ERROR_SUCCESS;
2677 }
2678 
2679 /**
2680   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator` with a tensor basis for the active basis.
2681 
2682   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2683 
2684   @param[in]  op_fine       Fine grid `CeedOperator`
2685   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2686   @param[in]  rstr_coarse   Coarse grid `CeedElemRestriction`
2687   @param[in]  basis_coarse  Coarse grid active vector `CeedBasis`
2688   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction `CeedOperator`
2689   @param[out] op_coarse     Coarse grid `CeedOperator`
2690   @param[out] op_prolong    Coarse to fine `CeedOperator`, or `NULL`
2691   @param[out] op_restrict   Fine to coarse `CeedOperator`, or `NULL`
2692 
2693   @return An error code: 0 - success, otherwise - failure
2694 
2695   @ref User
2696 **/
2697 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2698                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2699                                              CeedOperator *op_restrict) {
2700   Ceed      ceed;
2701   CeedInt   Q_f, Q_c;
2702   CeedBasis basis_fine, basis_c_to_f = NULL;
2703 
2704   CeedCall(CeedOperatorCheckReady(op_fine));
2705   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2706 
2707   // Check for compatible quadrature spaces
2708   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2709   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2710   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2711   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION,
2712             "Bases must have compatible quadrature spaces."
2713             " Fine grid: %" CeedInt_FMT " points, Coarse grid: %" CeedInt_FMT " points",
2714             Q_f, Q_c);
2715 
2716   // Create coarse to fine basis, if required
2717   if (op_prolong || op_restrict) {
2718     CeedInt     dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2719     CeedScalar *q_ref, *q_weight, *grad;
2720 
2721     // Check if interpolation matrix is provided
2722     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2723               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2724     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2725     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2726     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2727     CeedCall(CeedBasisDestroy(&basis_fine));
2728     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2729     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2730     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2731     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2732     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2733     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2734     CeedCall(CeedFree(&q_ref));
2735     CeedCall(CeedFree(&q_weight));
2736     CeedCall(CeedFree(&grad));
2737   }
2738 
2739   // Core code
2740   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2741   CeedCall(CeedDestroy(&ceed));
2742   return CEED_ERROR_SUCCESS;
2743 }
2744 
2745 /**
2746   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator` with a non-tensor basis for the active vector
2747 
2748   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2749 
2750   @param[in]  op_fine       Fine grid `CeedOperator`
2751   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2752   @param[in]  rstr_coarse   Coarse grid `CeedElemRestriction`
2753   @param[in]  basis_coarse  Coarse grid active vector `CeedBasis`
2754   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction `CeedOperator`
2755   @param[out] op_coarse     Coarse grid `CeedOperator`
2756   @param[out] op_prolong    Coarse to fine `CeedOperator`, or `NULL`
2757   @param[out] op_restrict   Fine to coarse `CeedOperator`, or `NULL`
2758 
2759   @return An error code: 0 - success, otherwise - failure
2760 
2761   @ref User
2762 **/
2763 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2764                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2765                                        CeedOperator *op_restrict) {
2766   Ceed      ceed;
2767   CeedInt   Q_f, Q_c;
2768   CeedBasis basis_fine, basis_c_to_f = NULL;
2769 
2770   CeedCall(CeedOperatorCheckReady(op_fine));
2771   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2772 
2773   // Check for compatible quadrature spaces
2774   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2775   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2776   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2777   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2778 
2779   // Coarse to fine basis
2780   if (op_prolong || op_restrict) {
2781     CeedInt          dim, num_comp, num_nodes_c, num_nodes_f;
2782     CeedScalar      *q_ref, *q_weight, *grad;
2783     CeedElemTopology topo;
2784 
2785     // Check if interpolation matrix is provided
2786     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2787               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2788     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2789     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2790     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2791     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2792     CeedCall(CeedBasisDestroy(&basis_fine));
2793     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2794     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2795     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2796     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2797     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2798     CeedCall(CeedFree(&q_ref));
2799     CeedCall(CeedFree(&q_weight));
2800     CeedCall(CeedFree(&grad));
2801   }
2802 
2803   // Core code
2804   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2805   CeedCall(CeedDestroy(&ceed));
2806   return CEED_ERROR_SUCCESS;
2807 }
2808 
2809 /**
2810   @brief Build a FDM based approximate inverse for each element for a `CeedOperator`.
2811 
2812   This returns a `CeedOperator` and `CeedVector` to apply a Fast Diagonalization Method based approximate inverse.
2813   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2814   The assembled `CeedQFunction` is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T \hat S V\f$.
2815   The `CeedOperator` must be linear and non-composite.
2816   The associated `CeedQFunction` must therefore also be linear.
2817 
2818   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2819 
2820   @param[in]  op      `CeedOperator` to create element inverses
2821   @param[out] fdm_inv `CeedOperator` to apply the action of a FDM based inverse for each element
2822   @param[in]  request Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2823 
2824   @return An error code: 0 - success, otherwise - failure
2825 
2826   @ref User
2827 **/
2828 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2829   Ceed                 ceed, ceed_parent;
2830   bool                 interp = false, grad = false, is_tensor_basis = true;
2831   CeedInt              num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2832   CeedScalar          *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg;
2833   const CeedScalar    *interp_1d, *grad_1d, *q_weight_1d;
2834   CeedVector           q_data;
2835   CeedElemRestriction  rstr  = NULL, rstr_qd_i;
2836   CeedBasis            basis = NULL, fdm_basis;
2837   CeedQFunctionContext ctx_fdm;
2838   CeedQFunctionField  *qf_fields;
2839   CeedQFunction        qf, qf_fdm;
2840   CeedOperatorField   *op_fields;
2841 
2842   CeedCall(CeedOperatorCheckReady(op));
2843 
2844   if (op->CreateFDMElementInverse) {
2845     // Backend version
2846     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2847     return CEED_ERROR_SUCCESS;
2848   } else {
2849     // Operator fallback
2850     CeedOperator op_fallback;
2851 
2852     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorCreateFDMElementInverse\n");
2853     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2854     if (op_fallback) {
2855       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2856       return CEED_ERROR_SUCCESS;
2857     }
2858   }
2859 
2860   // Default interface implementation
2861   CeedCall(CeedOperatorGetCeed(op, &ceed));
2862   CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent));
2863   CeedCall(CeedOperatorGetQFunction(op, &qf));
2864 
2865   // Determine active input basis
2866   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2867   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2868   for (CeedInt i = 0; i < num_input_fields; i++) {
2869     CeedVector vec;
2870 
2871     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2872     if (vec == CEED_VECTOR_ACTIVE) {
2873       CeedEvalMode eval_mode;
2874 
2875       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2876       interp = interp || eval_mode == CEED_EVAL_INTERP;
2877       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2878       if (!basis) CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2879       if (!rstr) CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2880     }
2881     CeedCall(CeedVectorDestroy(&vec));
2882   }
2883   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2884   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2885   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2886   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2887   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2888   CeedCall(CeedBasisGetDimension(basis, &dim));
2889   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2890   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2891 
2892   // Build and diagonalize 1D Mass and Laplacian
2893   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2894   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2895   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2896   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2897   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2898   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2899   CeedCall(CeedCalloc(P_1d, &lambda));
2900   // -- Build matrices
2901   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2902   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2903   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2904   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2905 
2906   // -- Diagonalize
2907   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2908   CeedCall(CeedFree(&mass));
2909   CeedCall(CeedFree(&laplace));
2910   for (CeedInt i = 0; i < P_1d; i++) {
2911     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2912   }
2913   CeedCall(CeedFree(&x));
2914 
2915   {
2916     CeedInt             layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2917     CeedScalar          max_norm = 0;
2918     const CeedScalar   *assembled_array, *q_weight_array;
2919     CeedVector          assembled = NULL, q_weight;
2920     CeedElemRestriction rstr_qf   = NULL;
2921 
2922     // Assemble QFunction
2923     CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2924     CeedCall(CeedElemRestrictionGetELayout(rstr_qf, layout));
2925     CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2926     CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2927 
2928     // Calculate element averages
2929     CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2930     CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2931     CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2932     CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2933     CeedCall(CeedCalloc(num_elem, &elem_avg));
2934     const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2935 
2936     for (CeedInt e = 0; e < num_elem; e++) {
2937       CeedInt count = 0;
2938 
2939       for (CeedInt q = 0; q < num_qpts; q++) {
2940         for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2941           if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2942             elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2943             count++;
2944           }
2945         }
2946       }
2947       if (count) {
2948         elem_avg[e] /= count;
2949       } else {
2950         elem_avg[e] = 1.0;
2951       }
2952     }
2953     CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2954     CeedCall(CeedVectorDestroy(&assembled));
2955     CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2956     CeedCall(CeedVectorDestroy(&q_weight));
2957   }
2958 
2959   // Build FDM diagonal
2960   {
2961     CeedScalar *q_data_array, *fdm_diagonal;
2962 
2963     CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2964     const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2965     for (CeedInt c = 0; c < num_comp; c++) {
2966       for (CeedInt n = 0; n < num_nodes; n++) {
2967         if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2968         if (grad) {
2969           for (CeedInt d = 0; d < dim; d++) {
2970             CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2971             fdm_diagonal[c * num_nodes + n] += lambda[i];
2972           }
2973         }
2974         if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2975       }
2976     }
2977     CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2978     CeedCall(CeedVectorSetValue(q_data, 0.0));
2979     CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2980     for (CeedInt e = 0; e < num_elem; e++) {
2981       for (CeedInt c = 0; c < num_comp; c++) {
2982         for (CeedInt n = 0; n < num_nodes; n++) {
2983           q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2984         }
2985       }
2986     }
2987     CeedCall(CeedFree(&elem_avg));
2988     CeedCall(CeedFree(&fdm_diagonal));
2989     CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2990   }
2991 
2992   // Setup FDM operator
2993   // -- Basis
2994   {
2995     CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2996 
2997     CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2998     CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2999     CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
3000     CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
3001     CeedCall(CeedFree(&fdm_interp));
3002     CeedCall(CeedFree(&grad_dummy));
3003     CeedCall(CeedFree(&q_ref_dummy));
3004     CeedCall(CeedFree(&q_weight_dummy));
3005     CeedCall(CeedFree(&lambda));
3006   }
3007 
3008   // -- Restriction
3009   {
3010     CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp};
3011     CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp,
3012                                               (CeedSize)num_elem * (CeedSize)num_comp * (CeedSize)num_nodes, strides, &rstr_qd_i));
3013   }
3014 
3015   // -- QFunction
3016   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
3017   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
3018   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
3019   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
3020   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
3021 
3022   // -- QFunction context
3023   {
3024     CeedInt *num_comp_data;
3025 
3026     CeedCall(CeedCalloc(1, &num_comp_data));
3027     num_comp_data[0] = num_comp;
3028     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
3029     CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
3030   }
3031   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
3032   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
3033 
3034   // -- Operator
3035   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
3036   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
3037   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data));
3038   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
3039 
3040   // Cleanup
3041   CeedCall(CeedDestroy(&ceed));
3042   CeedCall(CeedDestroy(&ceed_parent));
3043   CeedCall(CeedVectorDestroy(&q_data));
3044   CeedCall(CeedElemRestrictionDestroy(&rstr));
3045   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
3046   CeedCall(CeedBasisDestroy(&basis));
3047   CeedCall(CeedBasisDestroy(&fdm_basis));
3048   CeedCall(CeedQFunctionDestroy(&qf));
3049   CeedCall(CeedQFunctionDestroy(&qf_fdm));
3050   return CEED_ERROR_SUCCESS;
3051 }
3052 
3053 /// @}
3054