xref: /libCEED/interface/ceed-preconditioning.c (revision f36e753100926c940e21f506e95c0b0794520501)
1 // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a `CeedQFunction` with a reference `Ceed` to fallback for advanced `CeedOperator` functionality
28 
29   @param[in]  fallback_ceed `Ceed` on which to create fallback `CeedQFunction`
30   @param[in]  qf            `CeedQFunction` to create fallback for
31   @param[out] qf_fallback   Fallback `CeedQFunction`
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   char               *source_path_with_name = NULL;
39   CeedInt             num_input_fields, num_output_fields;
40   CeedQFunctionField *input_fields, *output_fields;
41 
42   // Check if NULL qf passed in
43   if (!qf) return CEED_ERROR_SUCCESS;
44 
45   CeedDebug(CeedQFunctionReturnCeed(qf), "Creating fallback CeedQFunction\n");
46 
47   if (qf->source_path) {
48     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
49 
50     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
51     memcpy(source_path_with_name, qf->source_path, path_len);
52     memcpy(&source_path_with_name[path_len], ":", 1);
53     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
54   } else if (qf->user_source) {
55     CeedCall(CeedStringAllocCopy(qf->user_source, &source_path_with_name));
56   } else {
57     CeedCall(CeedCalloc(1, &source_path_with_name));
58   }
59 
60   {
61     CeedInt           vec_length;
62     CeedQFunctionUser f;
63 
64     CeedCall(CeedQFunctionGetVectorLength(qf, &vec_length));
65     CeedCall(CeedQFunctionGetUserFunction(qf, &f));
66     CeedCall(CeedQFunctionCreateInterior(fallback_ceed, vec_length, f, source_path_with_name, qf_fallback));
67   }
68   {
69     CeedQFunctionContext ctx;
70 
71     CeedCall(CeedQFunctionGetContext(qf, &ctx));
72     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
73     CeedCall(CeedQFunctionContextDestroy(&ctx));
74   }
75   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
76   for (CeedInt i = 0; i < num_input_fields; i++) {
77     const char  *field_name;
78     CeedInt      size;
79     CeedEvalMode eval_mode;
80 
81     CeedCall(CeedQFunctionFieldGetData(input_fields[i], &field_name, &size, &eval_mode));
82     CeedCall(CeedQFunctionAddInput(*qf_fallback, field_name, size, eval_mode));
83   }
84   for (CeedInt i = 0; i < num_output_fields; i++) {
85     const char  *field_name;
86     CeedInt      size;
87     CeedEvalMode eval_mode;
88 
89     CeedCall(CeedQFunctionFieldGetData(output_fields[i], &field_name, &size, &eval_mode));
90     CeedCall(CeedQFunctionAddOutput(*qf_fallback, field_name, size, eval_mode));
91   }
92   CeedCall(CeedFree(&source_path_with_name));
93   return CEED_ERROR_SUCCESS;
94 }
95 
96 /**
97   @brief Duplicate a `CeedOperator` with a reference `Ceed` to fallback for advanced `CeedOperator` functionality
98 
99   @param[in,out] op `CeedOperator` to create fallback for
100 
101   @return An error code: 0 - success, otherwise - failure
102 
103   @ref Developer
104 **/
105 static int CeedOperatorCreateFallback(CeedOperator op) {
106   bool         is_composite;
107   Ceed         ceed, ceed_fallback;
108   CeedOperator op_fallback;
109 
110   // Check not already created
111   if (op->op_fallback) return CEED_ERROR_SUCCESS;
112 
113   // Fallback Ceed
114   CeedCall(CeedOperatorGetCeed(op, &ceed));
115   CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
116   CeedCall(CeedDestroy(&ceed));
117   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
118 
119   CeedDebug(CeedOperatorReturnCeed(op), "Creating fallback CeedOperator\n");
120 
121   // Clone Op
122   CeedCall(CeedOperatorIsComposite(op, &is_composite));
123   if (is_composite) {
124     CeedInt       num_suboperators;
125     CeedOperator *sub_operators;
126 
127     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
128     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
129     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
130     for (CeedInt i = 0; i < num_suboperators; i++) {
131       CeedOperator op_sub_fallback;
132 
133       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
134       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
135     }
136   } else {
137     bool               is_at_points = false;
138     CeedInt            num_input_fields, num_output_fields;
139     CeedQFunction      qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
140     CeedOperatorField *input_fields, *output_fields;
141 
142     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
143     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
144     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
145     CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
146     if (is_at_points) {
147       CeedVector          points;
148       CeedElemRestriction rstr_points;
149 
150       CeedCall(CeedOperatorCreateAtPoints(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
151       CeedCall(CeedOperatorAtPointsGetPoints(op, &rstr_points, &points));
152       CeedCall(CeedOperatorAtPointsSetPoints(op_fallback, rstr_points, points));
153       CeedCall(CeedVectorDestroy(&points));
154       CeedCall(CeedElemRestrictionDestroy(&rstr_points));
155     } else {
156       CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
157     }
158     CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
159     for (CeedInt i = 0; i < num_input_fields; i++) {
160       const char         *field_name;
161       CeedVector          vec;
162       CeedElemRestriction rstr;
163       CeedBasis           basis;
164 
165       CeedCall(CeedOperatorFieldGetData(input_fields[i], &field_name, &rstr, &basis, &vec));
166       CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec));
167       CeedCall(CeedVectorDestroy(&vec));
168       CeedCall(CeedElemRestrictionDestroy(&rstr));
169       CeedCall(CeedBasisDestroy(&basis));
170     }
171     for (CeedInt i = 0; i < num_output_fields; i++) {
172       const char         *field_name;
173       CeedVector          vec;
174       CeedElemRestriction rstr;
175       CeedBasis           basis;
176 
177       CeedCall(CeedOperatorFieldGetData(output_fields[i], &field_name, &rstr, &basis, &vec));
178       CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec));
179       CeedCall(CeedVectorDestroy(&vec));
180       CeedCall(CeedElemRestrictionDestroy(&rstr));
181       CeedCall(CeedBasisDestroy(&basis));
182     }
183     {
184       CeedQFunctionAssemblyData data;
185 
186       CeedCall(CeedOperatorGetQFunctionAssemblyData(op, &data));
187       CeedCall(CeedQFunctionAssemblyDataReferenceCopy(data, &op_fallback->qf_assembled));
188     }
189     // Cleanup
190     CeedCall(CeedQFunctionDestroy(&qf_fallback));
191     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
192     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
193   }
194   CeedCall(CeedOperatorSetName(op_fallback, op->name));
195   CeedCall(CeedOperatorCheckReady(op_fallback));
196   // Note: No ref-counting here so we don't get caught in a reference loop.
197   //       The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback.
198   op->op_fallback                 = op_fallback;
199   op_fallback->op_fallback_parent = op;
200   CeedCall(CeedDestroy(&ceed_fallback));
201   return CEED_ERROR_SUCCESS;
202 }
203 
204 /**
205   @brief Core logic for assembling operator diagonal or point block diagonal
206 
207   @param[in]  op             `CeedOperator` to assemble diagonal or point block diagonal
208   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
209   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
210   @param[out] assembled      `CeedVector` to store assembled diagonal
211 
212   @return An error code: 0 - success, otherwise - failure
213 
214   @ref Developer
215 **/
216 static inline int CeedSingleOperatorLinearAssembleAddDiagonal_Mesh(CeedOperator op, CeedRequest *request, const bool is_point_block,
217                                                                    CeedVector assembled) {
218   bool is_composite;
219 
220   CeedCall(CeedOperatorIsComposite(op, &is_composite));
221   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
222 
223   // Assemble QFunction
224   CeedInt             layout_qf[3];
225   const CeedScalar   *assembled_qf_array;
226   CeedVector          assembled_qf        = NULL;
227   CeedElemRestriction assembled_elem_rstr = NULL;
228 
229   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
230   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf));
231   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
232   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
233 
234   // Get assembly data
235   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
236   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
237   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
238   CeedBasis               *active_bases_in, *active_bases_out;
239   CeedElemRestriction     *active_elem_rstrs_in, *active_elem_rstrs_out;
240   CeedOperatorAssemblyData data;
241 
242   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
243   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
244                                                 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out,
245                                                 &num_output_components));
246   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL));
247   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out));
248 
249   // Loop over all active bases (find matching input/output pairs)
250   for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) {
251     CeedInt             b_in, b_out, num_elem, num_nodes, num_qpts, num_comp;
252     bool                has_eval_none = false;
253     CeedScalar         *elem_diag_array, *identity = NULL;
254     CeedVector          elem_diag;
255     CeedElemRestriction diag_elem_rstr;
256 
257     if (num_active_bases_in <= num_active_bases_out) {
258       b_in = b;
259       for (b_out = 0; b_out < num_active_bases_out; b_out++) {
260         if (active_bases_in[b_in] == active_bases_out[b_out]) {
261           break;
262         }
263       }
264       if (b_out == num_active_bases_out) {
265         continue;
266       }  // No matching output basis found
267     } else {
268       b_out = b;
269       for (b_in = 0; b_in < num_active_bases_in; b_in++) {
270         if (active_bases_in[b_in] == active_bases_out[b_out]) {
271           break;
272         }
273       }
274       if (b_in == num_active_bases_in) {
275         continue;
276       }  // No matching output basis found
277     }
278     CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
279               "Cannot assemble operator diagonal with different input and output active element restrictions");
280 
281     // Assemble point block diagonal restriction, if needed
282     if (is_point_block) {
283       CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr));
284     } else {
285       CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr));
286     }
287 
288     // Create diagonal vector
289     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
290 
291     // Assemble element operator diagonals
292     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
293     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
294     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
295     CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes));
296     CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp));
297     if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes;
298     else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts));
299 
300     // Construct identity matrix for basis if required
301     for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) {
302       has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE);
303     }
304     for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) {
305       has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE);
306     }
307     if (has_eval_none) {
308       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
309       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
310     }
311 
312     // Compute the diagonal of B^T D B
313     // Each element
314     for (CeedSize e = 0; e < num_elem; e++) {
315       // Each basis eval mode pair
316       CeedInt      d_out              = 0, q_comp_out;
317       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
318 
319       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) {
320         CeedInt           d_in              = 0, q_comp_in;
321         const CeedScalar *B_t               = NULL;
322         CeedEvalMode      eval_mode_in_prev = CEED_EVAL_NONE;
323 
324         CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t));
325         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out));
326         if (q_comp_out > 1) {
327           if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0;
328           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
329         }
330         eval_mode_out_prev = eval_modes_out[b_out][e_out];
331 
332         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) {
333           const CeedScalar *B = NULL;
334 
335           CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B));
336           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in));
337           if (q_comp_in > 1) {
338             if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0;
339             else B = &B[(++d_in) * num_qpts * num_nodes];
340           }
341           eval_mode_in_prev = eval_modes_in[b_in][e_in];
342 
343           // Each component
344           for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
345             // Each qpt/node pair
346             for (CeedInt q = 0; q < num_qpts; q++) {
347               if (is_point_block) {
348                 // Point Block Diagonal
349                 for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
350                   const CeedSize c_offset =
351                       (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
352                   const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
353 
354                   for (CeedInt n = 0; n < num_nodes; n++) {
355                     elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
356                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
357                   }
358                 }
359               } else {
360                 // Diagonal Only
361                 const CeedInt c_offset =
362                     (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
363                 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
364 
365                 for (CeedInt n = 0; n < num_nodes; n++) {
366                   elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
367                 }
368               }
369             }
370           }
371         }
372       }
373     }
374     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
375 
376     // Assemble local operator diagonal
377     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
378 
379     // Cleanup
380     CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
381     CeedCall(CeedVectorDestroy(&elem_diag));
382     CeedCall(CeedFree(&identity));
383   }
384   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
385   CeedCall(CeedVectorDestroy(&assembled_qf));
386   return CEED_ERROR_SUCCESS;
387 }
388 
389 /**
390   @brief Core logic for assembling operator diagonal or point block diagonal
391 
392   @param[in]  op             `CeedOperator` to assemble diagonal or point block diagonal
393   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
394   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
395   @param[out] assembled      `CeedVector` to store assembled diagonal
396 
397   @return An error code: 0 - success, otherwise - failure
398 
399   @ref Developer
400 **/
401 static inline int CeedSingleOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block,
402                                                               CeedVector assembled) {
403   bool is_at_points;
404 
405   CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
406   CeedCheck(!is_at_points, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "AtPoints operator not supported");
407   CeedCall(CeedSingleOperatorLinearAssembleAddDiagonal_Mesh(op, request, is_point_block, assembled));
408   return CEED_ERROR_SUCCESS;
409 }
410 
411 /**
412   @brief Core logic for assembling composite operator diagonal
413 
414   @param[in]  op             `CeedOperator` to assemble point block diagonal
415   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
416   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
417   @param[out] assembled      `CeedVector` to store assembled diagonal
418 
419   @return An error code: 0 - success, otherwise - failure
420 
421   @ref Developer
422 **/
423 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block,
424                                                                  CeedVector assembled) {
425   CeedInt       num_sub;
426   CeedOperator *suboperators;
427 
428   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
429   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
430   for (CeedInt i = 0; i < num_sub; i++) {
431     if (is_point_block) {
432       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
433     } else {
434       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
435     }
436   }
437   return CEED_ERROR_SUCCESS;
438 }
439 
440 /**
441   @brief Build nonzero pattern for non-composite CeedOperator`.
442 
443   Users should generally use @ref CeedOperatorLinearAssembleSymbolic().
444 
445   @param[in]  op     `CeedOperator` to assemble nonzero pattern
446   @param[in]  offset Offset for number of entries
447   @param[out] rows   Row number for each entry
448   @param[out] cols   Column number for each entry
449 
450   @return An error code: 0 - success, otherwise - failure
451 
452   @ref Developer
453 **/
454 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
455   Ceed                ceed;
456   bool                is_composite;
457   CeedSize            num_nodes_in, num_nodes_out, local_num_entries, count = 0;
458   CeedInt             num_elem_in, elem_size_in, num_comp_in, layout_er_in[3];
459   CeedInt             num_elem_out, elem_size_out, num_comp_out, layout_er_out[3];
460   CeedScalar         *array;
461   const CeedScalar   *elem_dof_a_in, *elem_dof_a_out;
462   CeedVector          index_vec_in, index_vec_out, elem_dof_in, elem_dof_out;
463   CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out;
464 
465   CeedCall(CeedOperatorIsComposite(op, &is_composite));
466   CeedCall(CeedOperatorGetCeed(op, &ceed));
467   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
468 
469   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out));
470   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
471   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
472   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
473   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
474   CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, layout_er_in));
475 
476   // Determine elem_dof relation for input
477   CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in));
478   CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array));
479   for (CeedSize i = 0; i < num_nodes_in; i++) array[i] = i;
480   CeedCall(CeedVectorRestoreArray(index_vec_in, &array));
481   CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in));
482   CeedCall(CeedVectorSetValue(elem_dof_in, 0.0));
483   CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in));
484   CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE));
485   CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in));
486   CeedCall(CeedVectorDestroy(&index_vec_in));
487   CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in));
488 
489   if (elem_rstr_in != elem_rstr_out) {
490     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
491     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
492               "Active input and output operator restrictions must have the same number of elements."
493               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
494               num_elem_in, num_elem_out);
495     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
496     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
497     CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, layout_er_out));
498 
499     // Determine elem_dof relation for output
500     CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out));
501     CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array));
502     for (CeedSize i = 0; i < num_nodes_out; i++) array[i] = i;
503     CeedCall(CeedVectorRestoreArray(index_vec_out, &array));
504     CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out));
505     CeedCall(CeedVectorSetValue(elem_dof_out, 0.0));
506     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out));
507     CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE));
508     CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out));
509     CeedCall(CeedVectorDestroy(&index_vec_out));
510     CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out));
511   } else {
512     num_elem_out     = num_elem_in;
513     elem_size_out    = elem_size_in;
514     num_comp_out     = num_comp_in;
515     layout_er_out[0] = layout_er_in[0];
516     layout_er_out[1] = layout_er_in[1];
517     layout_er_out[2] = layout_er_in[2];
518     elem_dof_a_out   = elem_dof_a_in;
519   }
520   local_num_entries = (CeedSize)elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
521 
522   // Determine i, j locations for element matrices
523   for (CeedInt e = 0; e < num_elem_in; e++) {
524     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
525       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
526         for (CeedInt i = 0; i < elem_size_out; i++) {
527           for (CeedInt j = 0; j < elem_size_in; j++) {
528             const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2];
529             const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2];
530             const CeedInt row                = elem_dof_a_out[elem_dof_index_row];
531             const CeedInt col                = elem_dof_a_in[elem_dof_index_col];
532 
533             rows[offset + count] = row;
534             cols[offset + count] = col;
535             count++;
536           }
537         }
538       }
539     }
540   }
541   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
542   CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in));
543   CeedCall(CeedVectorDestroy(&elem_dof_in));
544   if (elem_rstr_in != elem_rstr_out) {
545     CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out));
546     CeedCall(CeedVectorDestroy(&elem_dof_out));
547   }
548   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
549   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
550   CeedCall(CeedDestroy(&ceed));
551   return CEED_ERROR_SUCCESS;
552 }
553 
554 /**
555   @brief Core logic to assemble `CeedQFunction` and store result internally.
556 
557   Return copied references of stored data to the caller.
558   Caller is responsible for ownership and destruction of the copied references.
559   See also @ref CeedOperatorLinearAssembleQFunction().
560 
561   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
562         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
563 
564   @param[in]  op         `CeedOperator` to assemble `CeedQFunction`
565   @param[in]  use_parent Boolean flag to check for fallback parent implementation
566   @param[out] assembled  `CeedVector` to store assembled `CeedQFunction` at quadrature points
567   @param[out] rstr       `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
568   @param[in]  request    Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
569 
570   @return An error code: 0 - success, otherwise - failure
571 
572   @ref User
573 **/
574 static int CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(CeedOperator op, bool use_parent, CeedVector *assembled, CeedElemRestriction *rstr,
575                                                                  CeedRequest *request) {
576   int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL;
577   CeedOperator op_assemble                                                                           = NULL;
578   CeedOperator op_fallback_parent                                                                    = NULL;
579 
580   CeedCall(CeedOperatorCheckReady(op));
581 
582   // Determine if fallback parent or operator has implementation
583   CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent));
584   if (op_fallback_parent && use_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) {
585     // -- Backend version for op fallback parent is faster, if it exists
586     CeedDebug(CeedOperatorReturnCeed(op), "Using fallback parent for CeedOperatorLinearAssembleQFunctionBuildOrUpdate\n");
587     LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate;
588     op_assemble                   = op_fallback_parent;
589   } else if (op->LinearAssembleQFunctionUpdate) {
590     // -- Backend version for op
591     LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate;
592     op_assemble                   = op;
593   }
594 
595   // Assemble QFunction
596   if (LinearAssembleQFunctionUpdate) {
597     // Backend or fallback parent version
598     CeedQFunctionAssemblyData data;
599     bool                      data_is_setup;
600     CeedVector                assembled_vec  = NULL;
601     CeedElemRestriction       assembled_rstr = NULL;
602 
603     CeedCall(CeedOperatorGetQFunctionAssemblyData(op, &data));
604     CeedCall(CeedQFunctionAssemblyDataIsSetup(data, &data_is_setup));
605     if (data_is_setup) {
606       bool update_needed;
607 
608       CeedCall(CeedQFunctionAssemblyDataGetObjects(data, &assembled_vec, &assembled_rstr));
609       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(data, &update_needed));
610       if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request));
611     } else {
612       CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request));
613       CeedCall(CeedQFunctionAssemblyDataSetObjects(data, assembled_vec, assembled_rstr));
614     }
615     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(data, false));
616 
617     // Copy reference from internally held copy
618     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
619     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
620     CeedCall(CeedVectorDestroy(&assembled_vec));
621     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
622   } else {
623     // Operator fallback
624     CeedOperator op_fallback;
625 
626     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleQFunctionBuildOrUpdate\n");
627     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
628     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
629     else return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
630   }
631   return CEED_ERROR_SUCCESS;
632 }
633 
634 /**
635   @brief Assemble `CeedQFunction` and store result internally, but do not use fallback parent.
636 
637   Return copied references of stored data to the caller.
638   Caller is responsible for ownership and destruction of the copied references.
639   See also @ref CeedOperatorLinearAssembleQFunction().
640 
641   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
642         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
643 
644   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
645   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
646   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
647   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
648 
649   @return An error code: 0 - success, otherwise - failure
650 
651   @ref Developer
652 **/
653 int CeedOperatorFallbackLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr,
654                                                              CeedRequest *request) {
655   return CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(op, false, assembled, rstr, request);
656 }
657 
658 /**
659   @brief Assemble nonzero entries for non-composite `CeedOperator`.
660 
661   Users should generally use @ref CeedOperatorLinearAssemble().
662 
663   @param[in]  op     `CeedOperator` to assemble
664   @param[in]  offset Offset for number of entries
665   @param[out] values Values to assemble into matrix
666 
667   @return An error code: 0 - success, otherwise - failure
668 
669   @ref Developer
670 **/
671 int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
672   bool is_composite, is_at_points;
673 
674   CeedCall(CeedOperatorIsComposite(op, &is_composite));
675   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
676 
677   // Early exit for empty operator
678   {
679     CeedInt num_elem = 0;
680 
681     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
682     if (num_elem == 0) return CEED_ERROR_SUCCESS;
683   }
684 
685   if (op->LinearAssembleSingle) {
686     // Backend version
687     CeedCall(op->LinearAssembleSingle(op, offset, values));
688     return CEED_ERROR_SUCCESS;
689   } else {
690     // Operator fallback
691     CeedOperator op_fallback;
692 
693     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedSingleOperatorAssemble\n");
694     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
695     if (op_fallback) {
696       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
697       return CEED_ERROR_SUCCESS;
698     }
699   }
700 
701   CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
702   CeedCheck(!is_at_points, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
703             "Backend does not implement CeedOperatorLinearAssemble for AtPoints operator");
704 
705   // Assemble QFunction
706   CeedInt             layout_qf[3];
707   const CeedScalar   *assembled_qf_array;
708   CeedVector          assembled_qf        = NULL;
709   CeedElemRestriction assembled_elem_rstr = NULL;
710 
711   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE));
712   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf));
713   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
714   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
715 
716   // Get assembly data
717   CeedInt                  num_elem_in, elem_size_in, num_comp_in, num_qpts_in;
718   CeedInt                  num_elem_out, elem_size_out, num_comp_out, num_qpts_out;
719   CeedSize                 local_num_entries, count = 0;
720   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
721   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
722   CeedBasis               *active_bases_in, *active_bases_out, basis_in, basis_out;
723   const CeedScalar       **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out;
724   CeedElemRestriction      elem_rstr_in, elem_rstr_out;
725   CeedRestrictionType      elem_rstr_type_in, elem_rstr_type_out;
726   const bool              *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL;
727   const CeedInt8          *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL;
728   CeedOperatorAssemblyData data;
729 
730   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
731   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out,
732                                                 &num_eval_modes_out, &eval_modes_out, NULL, NULL));
733 
734   CeedCheck(num_active_bases_in == 1 && num_active_bases_out == 1, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
735             "Cannot assemble operator with multiple active bases");
736   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
737             "Cannot assemble operator without inputs/outputs");
738 
739   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out));
740   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
741   basis_in  = active_bases_in[0];
742   basis_out = active_bases_out[0];
743   B_mat_in  = B_mats_in[0];
744   B_mat_out = B_mats_out[0];
745 
746   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
747   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
748   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
749   if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in;
750   else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in));
751 
752   CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in));
753   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
754     CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in));
755   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
756     CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in));
757   }
758 
759   if (elem_rstr_in != elem_rstr_out) {
760     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
761     CeedCheck(num_elem_in == num_elem_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
762               "Active input and output operator restrictions must have the same number of elements."
763               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
764               num_elem_in, num_elem_out);
765     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
766     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
767     if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out;
768     else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out));
769     CeedCheck(num_qpts_in == num_qpts_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
770               "Active input and output bases must have the same number of quadrature points."
771               " Input has %" CeedInt_FMT " points; output has %" CeedInt_FMT "points.",
772               num_qpts_in, num_qpts_out);
773 
774     CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out));
775     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
776       CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out));
777     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
778       CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out));
779     }
780   } else {
781     num_elem_out  = num_elem_in;
782     elem_size_out = elem_size_in;
783     num_comp_out  = num_comp_in;
784     num_qpts_out  = num_qpts_in;
785 
786     elem_rstr_orients_out      = elem_rstr_orients_in;
787     elem_rstr_curl_orients_out = elem_rstr_curl_orients_in;
788   }
789   local_num_entries = (CeedSize)elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
790 
791   // Loop over elements and put in data structure
792   // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
793   CeedTensorContract contract;
794   CeedScalar        *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL;
795 
796   CeedCall(CeedBasisGetTensorContract(basis_in, &contract));
797   CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat));
798   CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat));
799   if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b));
800 
801   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
802   for (CeedSize e = 0; e < num_elem_in; e++) {
803     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
804       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
805         // Compute B^T*D
806         for (CeedSize n = 0; n < elem_size_out; n++) {
807           for (CeedSize q = 0; q < num_qpts_in; q++) {
808             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
809               const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in;
810               CeedScalar     sum       = 0.0;
811 
812               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
813                 const CeedSize b_out_index     = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n;
814                 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out;
815                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
816 
817                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
818               }
819               BTD_mat[btd_index] = sum;
820             }
821           }
822         }
823 
824         // Form element matrix itself (for each block component)
825         if (contract) {
826           CeedCall(CeedTensorContractApply(contract, 1, num_qpts_in * num_eval_modes_in[0], elem_size_in, elem_size_out, BTD_mat, CEED_NOTRANSPOSE,
827                                            false, B_mat_in, elem_mat));
828         } else {
829           Ceed ceed;
830 
831           CeedCall(CeedOperatorGetCeed(op, &ceed));
832           CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size_out, elem_size_in, num_qpts_in * num_eval_modes_in[0]));
833           CeedCall(CeedDestroy(&ceed));
834         }
835 
836         // Transform the element matrix if required
837         if (elem_rstr_orients_out) {
838           const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out];
839 
840           for (CeedInt i = 0; i < elem_size_out; i++) {
841             const double orient = elem_orients[i] ? -1.0 : 1.0;
842 
843             for (CeedInt j = 0; j < elem_size_in; j++) {
844               elem_mat[i * elem_size_in + j] *= orient;
845             }
846           }
847         } else if (elem_rstr_curl_orients_out) {
848           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out];
849 
850           // T^T*(B^T*D*B)
851           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
852           for (CeedInt i = 0; i < elem_size_out; i++) {
853             for (CeedInt j = 0; j < elem_size_in; j++) {
854               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] +
855                                                (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) +
856                                                (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0);
857             }
858           }
859         }
860         if (elem_rstr_orients_in) {
861           const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in];
862 
863           for (CeedInt i = 0; i < elem_size_out; i++) {
864             for (CeedInt j = 0; j < elem_size_in; j++) {
865               elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0;
866             }
867           }
868         } else if (elem_rstr_curl_orients_in) {
869           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in];
870 
871           // (B^T*D*B)*T
872           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
873           for (CeedInt i = 0; i < elem_size_out; i++) {
874             for (CeedInt j = 0; j < elem_size_in; j++) {
875               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] +
876                                                (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) +
877                                                (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0);
878             }
879           }
880         }
881 
882         // Put element matrix in coordinate data structure
883         for (CeedInt i = 0; i < elem_size_out; i++) {
884           for (CeedInt j = 0; j < elem_size_in; j++) {
885             vals[offset + count] = elem_mat[i * elem_size_in + j];
886             count++;
887           }
888         }
889       }
890     }
891   }
892   CeedCheck(count == local_num_entries, CeedOperatorReturnCeed(op), CEED_ERROR_MAJOR, "Error computing entries");
893   CeedCall(CeedVectorRestoreArray(values, &vals));
894 
895   // Cleanup
896   CeedCall(CeedFree(&BTD_mat));
897   CeedCall(CeedFree(&elem_mat));
898   CeedCall(CeedFree(&elem_mat_b));
899   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
900     CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in));
901   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
902     CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in));
903   }
904   if (elem_rstr_in != elem_rstr_out) {
905     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
906       CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out));
907     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
908       CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out));
909     }
910   }
911   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
912   CeedCall(CeedVectorDestroy(&assembled_qf));
913   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
914   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
915   return CEED_ERROR_SUCCESS;
916 }
917 
918 /**
919   @brief Count number of entries for assembled `CeedOperator`
920 
921   @param[in]  op          `CeedOperator` to assemble
922   @param[out] num_entries Number of entries in assembled representation
923 
924   @return An error code: 0 - success, otherwise - failure
925 
926   @ref Utility
927 **/
928 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) {
929   bool                is_composite;
930   CeedInt             num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out;
931   CeedElemRestriction rstr_in, rstr_out;
932 
933   CeedCall(CeedOperatorIsComposite(op, &is_composite));
934   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
935 
936   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out));
937   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in));
938   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in));
939   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
940   if (rstr_in != rstr_out) {
941     CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out));
942     CeedCheck(num_elem_in == num_elem_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
943               "Active input and output operator restrictions must have the same number of elements."
944               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
945               num_elem_in, num_elem_out);
946     CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out));
947     CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
948   } else {
949     num_elem_out  = num_elem_in;
950     elem_size_out = elem_size_in;
951     num_comp_out  = num_comp_in;
952   }
953   CeedCall(CeedElemRestrictionDestroy(&rstr_in));
954   CeedCall(CeedElemRestrictionDestroy(&rstr_out));
955   *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in;
956   return CEED_ERROR_SUCCESS;
957 }
958 
959 /**
960   @brief Common code for creating a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator`
961 
962   @param[in]  op_fine      Fine grid `CeedOperator`
963   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
964   @param[in]  rstr_coarse  Coarse grid `CeedElemRestriction`
965   @param[in]  basis_coarse Coarse grid active vector `CeedBasis`
966   @param[in]  basis_c_to_f `CeedBasis` for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction operators
967   @param[out] op_coarse    Coarse grid `CeedOperator`
968   @param[out] op_prolong   Coarse to fine `CeedOperator`, or `NULL`
969   @param[out] op_restrict  Fine to coarse `CeedOperator`, or `NULL`
970 
971   @return An error code: 0 - success, otherwise - failure
972 
973   @ref Developer
974 **/
975 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
976                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
977   bool                is_composite;
978   Ceed                ceed;
979   CeedInt             num_comp, num_input_fields, num_output_fields;
980   CeedVector          mult_vec         = NULL;
981   CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL;
982   CeedOperatorField  *input_fields, *output_fields;
983 
984   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
985 
986   // Check for composite operator
987   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
988   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
989 
990   // Coarse Grid
991   {
992     bool is_at_points;
993 
994     CeedCall(CeedOperatorIsAtPoints(op_fine, &is_at_points));
995     if (is_at_points) {
996       CeedVector          point_coords;
997       CeedElemRestriction rstr_points;
998 
999       CeedCall(CeedOperatorCreateAtPoints(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
1000       CeedCall(CeedOperatorAtPointsGetPoints(op_fine, &rstr_points, &point_coords));
1001       CeedCall(CeedOperatorAtPointsSetPoints(*op_coarse, rstr_points, point_coords));
1002       CeedCall(CeedVectorDestroy(&point_coords));
1003       CeedCall(CeedElemRestrictionDestroy(&rstr_points));
1004     } else {
1005       CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
1006     }
1007   }
1008   CeedCall(CeedOperatorGetFields(op_fine, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
1009   // -- Clone input fields
1010   for (CeedInt i = 0; i < num_input_fields; i++) {
1011     const char         *field_name;
1012     CeedVector          vec;
1013     CeedElemRestriction rstr  = NULL;
1014     CeedBasis           basis = NULL;
1015 
1016     CeedCall(CeedOperatorFieldGetName(input_fields[i], &field_name));
1017     CeedCall(CeedOperatorFieldGetVector(input_fields[i], &vec));
1018     if (vec == CEED_VECTOR_ACTIVE) {
1019       CeedCall(CeedElemRestrictionReferenceCopy(rstr_coarse, &rstr));
1020       CeedCall(CeedBasisReferenceCopy(basis_coarse, &basis));
1021       if (!rstr_fine) CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_fine));
1022     } else {
1023       CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr));
1024       CeedCall(CeedOperatorFieldGetBasis(input_fields[i], &basis));
1025     }
1026     CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec));
1027     CeedCall(CeedVectorDestroy(&vec));
1028     CeedCall(CeedElemRestrictionDestroy(&rstr));
1029     CeedCall(CeedBasisDestroy(&basis));
1030   }
1031   // -- Clone output fields
1032   for (CeedInt i = 0; i < num_output_fields; i++) {
1033     const char         *field_name;
1034     CeedVector          vec;
1035     CeedElemRestriction rstr  = NULL;
1036     CeedBasis           basis = NULL;
1037 
1038     CeedCall(CeedOperatorFieldGetName(output_fields[i], &field_name));
1039     CeedCall(CeedOperatorFieldGetVector(output_fields[i], &vec));
1040     if (vec == CEED_VECTOR_ACTIVE) {
1041       CeedCall(CeedElemRestrictionReferenceCopy(rstr_coarse, &rstr));
1042       CeedCall(CeedBasisReferenceCopy(basis_coarse, &basis));
1043       if (!rstr_fine) CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_fine));
1044     } else {
1045       CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr));
1046       CeedCall(CeedOperatorFieldGetBasis(output_fields[i], &basis));
1047     }
1048     CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec));
1049     CeedCall(CeedVectorDestroy(&vec));
1050     CeedCall(CeedElemRestrictionDestroy(&rstr));
1051     CeedCall(CeedBasisDestroy(&basis));
1052   }
1053   // -- Clone QFunctionAssemblyData
1054   {
1055     CeedQFunctionAssemblyData fine_data;
1056 
1057     CeedCall(CeedOperatorGetQFunctionAssemblyData(op_fine, &fine_data));
1058     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(fine_data, &(*op_coarse)->qf_assembled));
1059   }
1060 
1061   // Multiplicity vector
1062   if (op_restrict || op_prolong) {
1063     CeedVector          mult_e_vec;
1064     CeedRestrictionType rstr_type;
1065 
1066     CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type));
1067     CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED,
1068               "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported");
1069     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
1070     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
1071     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
1072     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
1073     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
1074     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
1075     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
1076     CeedCall(CeedVectorDestroy(&mult_e_vec));
1077     CeedCall(CeedVectorReciprocal(mult_vec));
1078   }
1079 
1080   // Clone name
1081   bool   has_name = op_fine->name;
1082   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
1083   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
1084 
1085   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
1086   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
1087             "Prolongation or restriction operator creation requires coarse-to-fine basis");
1088 
1089   // Restriction/Prolongation Operators
1090   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
1091 
1092   // Restriction
1093   if (op_restrict) {
1094     CeedInt             *num_comp_r_data;
1095     CeedQFunctionContext ctx_r;
1096     CeedQFunction        qf_restrict;
1097 
1098     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
1099     CeedCall(CeedCalloc(1, &num_comp_r_data));
1100     num_comp_r_data[0] = num_comp;
1101     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
1102     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
1103     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
1104     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
1105     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
1106     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
1107     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
1108     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
1109 
1110     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
1111     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
1112     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
1113     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
1114 
1115     // Set name
1116     char *restriction_name;
1117 
1118     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
1119     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
1120     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
1121     CeedCall(CeedFree(&restriction_name));
1122 
1123     // Check
1124     CeedCall(CeedOperatorCheckReady(*op_restrict));
1125 
1126     // Cleanup
1127     CeedCall(CeedQFunctionDestroy(&qf_restrict));
1128   }
1129 
1130   // Prolongation
1131   if (op_prolong) {
1132     CeedInt             *num_comp_p_data;
1133     CeedQFunctionContext ctx_p;
1134     CeedQFunction        qf_prolong;
1135 
1136     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
1137     CeedCall(CeedCalloc(1, &num_comp_p_data));
1138     num_comp_p_data[0] = num_comp;
1139     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
1140     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
1141     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
1142     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
1143     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
1144     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
1145     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
1146     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
1147 
1148     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
1149     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
1150     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
1151     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
1152 
1153     // Set name
1154     char *prolongation_name;
1155 
1156     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
1157     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
1158     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
1159     CeedCall(CeedFree(&prolongation_name));
1160 
1161     // Check
1162     CeedCall(CeedOperatorCheckReady(*op_prolong));
1163 
1164     // Cleanup
1165     CeedCall(CeedQFunctionDestroy(&qf_prolong));
1166   }
1167 
1168   // Check
1169   CeedCall(CeedOperatorCheckReady(*op_coarse));
1170 
1171   // Cleanup
1172   CeedCall(CeedDestroy(&ceed));
1173   CeedCall(CeedVectorDestroy(&mult_vec));
1174   CeedCall(CeedElemRestrictionDestroy(&rstr_fine));
1175   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
1176   CeedCall(CeedBasisDestroy(&basis_c_to_f));
1177   return CEED_ERROR_SUCCESS;
1178 }
1179 
1180 /**
1181   @brief Build 1D mass matrix and Laplacian with perturbation
1182 
1183   @param[in]  interp_1d   Interpolation matrix in one dimension
1184   @param[in]  grad_1d     Gradient matrix in one dimension
1185   @param[in]  q_weight_1d Quadrature weights in one dimension
1186   @param[in]  P_1d        Number of basis nodes in one dimension
1187   @param[in]  Q_1d        Number of quadrature points in one dimension
1188   @param[in]  dim         Dimension of basis
1189   @param[out] mass        Assembled mass matrix in one dimension
1190   @param[out] laplace     Assembled perturbed Laplacian in one dimension
1191 
1192   @return An error code: 0 - success, otherwise - failure
1193 
1194   @ref Developer
1195 **/
1196 CeedPragmaOptimizeOff
1197 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
1198                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
1199   for (CeedInt i = 0; i < P_1d; i++) {
1200     for (CeedInt j = 0; j < P_1d; j++) {
1201       CeedScalar sum = 0.0;
1202       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
1203       mass[i + j * P_1d] = sum;
1204     }
1205   }
1206   // -- Laplacian
1207   for (CeedInt i = 0; i < P_1d; i++) {
1208     for (CeedInt j = 0; j < P_1d; j++) {
1209       CeedScalar sum = 0.0;
1210 
1211       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
1212       laplace[i + j * P_1d] = sum;
1213     }
1214   }
1215   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
1216   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
1217   return CEED_ERROR_SUCCESS;
1218 }
1219 CeedPragmaOptimizeOn
1220 
1221 /// @}
1222 
1223 /// ----------------------------------------------------------------------------
1224 /// CeedOperator Backend API
1225 /// ----------------------------------------------------------------------------
1226 /// @addtogroup CeedOperatorBackend
1227 /// @{
1228 
1229 /**
1230   @brief Select correct basis matrix pointer based on @ref CeedEvalMode
1231 
1232   @param[in]  basis     `CeedBasis` from which to get the basis matrix
1233   @param[in]  eval_mode Current basis evaluation mode
1234   @param[in]  identity  Pointer to identity matrix
1235   @param[out] basis_ptr `CeedBasis` pointer to set
1236 
1237   @ref Backend
1238 **/
1239 int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
1240   switch (eval_mode) {
1241     case CEED_EVAL_NONE:
1242       *basis_ptr = identity;
1243       break;
1244     case CEED_EVAL_INTERP:
1245       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
1246       break;
1247     case CEED_EVAL_GRAD:
1248       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
1249       break;
1250     case CEED_EVAL_DIV:
1251       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
1252       break;
1253     case CEED_EVAL_CURL:
1254       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
1255       break;
1256     case CEED_EVAL_WEIGHT:
1257       break;  // Caught by QF Assembly
1258   }
1259   assert(*basis_ptr != NULL);
1260   return CEED_ERROR_SUCCESS;
1261 }
1262 
1263 /**
1264   @brief Create point block restriction for active `CeedOperatorField`
1265 
1266   @param[in]  rstr             Original `CeedElemRestriction` for active field
1267   @param[out] point_block_rstr Address of the variable where the newly created `CeedElemRestriction` will be stored
1268 
1269   @return An error code: 0 - success, otherwise - failure
1270 
1271   @ref Backend
1272 **/
1273 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) {
1274   Ceed           ceed;
1275   CeedInt        num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets;
1276   CeedSize       l_size;
1277   const CeedInt *offsets;
1278 
1279   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
1280   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
1281 
1282   // Expand offsets
1283   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
1284   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
1285   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
1286   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
1287   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
1288   shift = num_comp;
1289   if (comp_stride != 1) shift *= num_comp;
1290   CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets));
1291   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
1292     point_block_offsets[i] = offsets[i] * shift;
1293   }
1294 
1295   // Create new restriction
1296   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
1297                                      point_block_offsets, point_block_rstr));
1298 
1299   // Cleanup
1300   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
1301   CeedCall(CeedDestroy(&ceed));
1302   return CEED_ERROR_SUCCESS;
1303 }
1304 
1305 /**
1306   @brief Get `CeedQFunctionAssemblyData`
1307 
1308   @param[in]  op   `CeedOperator` to assemble
1309   @param[out] data `CeedQFunctionAssemblyData`
1310 
1311   @return An error code: 0 - success, otherwise - failure
1312 
1313   @ref Backend
1314 **/
1315 int CeedOperatorGetQFunctionAssemblyData(CeedOperator op, CeedQFunctionAssemblyData *data) {
1316   if (!op->qf_assembled) {
1317     CeedQFunctionAssemblyData data;
1318 
1319     CeedCall(CeedQFunctionAssemblyDataCreate(op->ceed, &data));
1320     op->qf_assembled = data;
1321   }
1322   *data = op->qf_assembled;
1323   return CEED_ERROR_SUCCESS;
1324 }
1325 
1326 /**
1327   @brief Create object holding `CeedQFunction` assembly data for `CeedOperator`
1328 
1329   @param[in]  ceed `Ceed` object used to create the `CeedQFunctionAssemblyData`
1330   @param[out] data Address of the variable where the newly created `CeedQFunctionAssemblyData` will be stored
1331 
1332   @return An error code: 0 - success, otherwise - failure
1333 
1334   @ref Backend
1335 **/
1336 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
1337   CeedCall(CeedCalloc(1, data));
1338   (*data)->ref_count = 1;
1339   (*data)->ceed      = ceed;
1340   CeedCall(CeedReference(ceed));
1341   return CEED_ERROR_SUCCESS;
1342 }
1343 
1344 /**
1345   @brief Increment the reference counter for a `CeedQFunctionAssemblyData`
1346 
1347   @param[in,out] data `CeedQFunctionAssemblyData` to increment the reference counter
1348 
1349   @return An error code: 0 - success, otherwise - failure
1350 
1351   @ref Backend
1352 **/
1353 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1354   data->ref_count++;
1355   return CEED_ERROR_SUCCESS;
1356 }
1357 
1358 /**
1359   @brief Set re-use of `CeedQFunctionAssemblyData`
1360 
1361   @param[in,out] data       `CeedQFunctionAssemblyData` to mark for reuse
1362   @param[in]     reuse_data Boolean flag indicating data re-use
1363 
1364   @return An error code: 0 - success, otherwise - failure
1365 
1366   @ref Backend
1367 **/
1368 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
1369   data->reuse_data        = reuse_data;
1370   data->needs_data_update = true;
1371   return CEED_ERROR_SUCCESS;
1372 }
1373 
1374 /**
1375   @brief Mark `CeedQFunctionAssemblyData` as stale
1376 
1377   @param[in,out] data              `CeedQFunctionAssemblyData` to mark as stale
1378   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
1379 
1380   @return An error code: 0 - success, otherwise - failure
1381 
1382   @ref Backend
1383 **/
1384 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
1385   data->needs_data_update = needs_data_update;
1386   return CEED_ERROR_SUCCESS;
1387 }
1388 
1389 /**
1390   @brief Determine if `CeedQFunctionAssemblyData` needs update
1391 
1392   @param[in]  data             `CeedQFunctionAssemblyData` to mark as stale
1393   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1394 
1395   @return An error code: 0 - success, otherwise - failure
1396 
1397   @ref Backend
1398 **/
1399 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1400   *is_update_needed = !data->reuse_data || data->needs_data_update;
1401   return CEED_ERROR_SUCCESS;
1402 }
1403 
1404 /**
1405   @brief Copy the pointer to a `CeedQFunctionAssemblyData`.
1406 
1407   Both pointers should be destroyed with @ref CeedQFunctionAssemblyDataDestroy().
1408 
1409   Note: If the value of ` *data_copy` passed to this function is non-`NULL` , then it is assumed that ` *data_copy` is a pointer to a `CeedQFunctionAssemblyData`.
1410         This `CeedQFunctionAssemblyData` will be destroyed if ` *data_copy` is the only reference to this `CeedQFunctionAssemblyData`.
1411 
1412   @param[in]     data      `CeedQFunctionAssemblyData` to copy reference to
1413   @param[in,out] data_copy Variable to store copied reference
1414 
1415   @return An error code: 0 - success, otherwise - failure
1416 
1417   @ref Backend
1418 **/
1419 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1420   CeedCall(CeedQFunctionAssemblyDataReference(data));
1421   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1422   *data_copy = data;
1423   return CEED_ERROR_SUCCESS;
1424 }
1425 
1426 /**
1427   @brief Get setup status for internal objects for `CeedQFunctionAssemblyData`
1428 
1429   @param[in]  data     `CeedQFunctionAssemblyData` to retrieve status
1430   @param[out] is_setup Boolean flag for setup status
1431 
1432   @return An error code: 0 - success, otherwise - failure
1433 
1434   @ref Backend
1435 **/
1436 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1437   *is_setup = data->is_setup;
1438   return CEED_ERROR_SUCCESS;
1439 }
1440 
1441 /**
1442   @brief Set internal objects for `CeedQFunctionAssemblyData`
1443 
1444   @param[in,out] data `CeedQFunctionAssemblyData` to set objects
1445   @param[in]     vec  `CeedVector` to store assembled `CeedQFunction` at quadrature points
1446   @param[in]     rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
1447 
1448   @return An error code: 0 - success, otherwise - failure
1449 
1450   @ref Backend
1451 **/
1452 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1453   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1454   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1455 
1456   data->is_setup = true;
1457   return CEED_ERROR_SUCCESS;
1458 }
1459 
1460 /**
1461   @brief Get internal objects for `CeedQFunctionAssemblyData`
1462 
1463   @param[in,out] data `CeedQFunctionAssemblyData` to set objects
1464   @param[out]    vec  `CeedVector` to store assembled `CeedQFunction` at quadrature points
1465   @param[out]    rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
1466 
1467   @return An error code: 0 - success, otherwise - failure
1468 
1469   @ref Backend
1470 **/
1471 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1472   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1473 
1474   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1475   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1476   return CEED_ERROR_SUCCESS;
1477 }
1478 
1479 /**
1480   @brief Destroy `CeedQFunctionAssemblyData`
1481 
1482   @param[in,out] data  `CeedQFunctionAssemblyData` to destroy
1483 
1484   @return An error code: 0 - success, otherwise - failure
1485 
1486   @ref Backend
1487 **/
1488 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1489   if (!*data || --(*data)->ref_count > 0) {
1490     *data = NULL;
1491     return CEED_ERROR_SUCCESS;
1492   }
1493   CeedCall(CeedDestroy(&(*data)->ceed));
1494   CeedCall(CeedVectorDestroy(&(*data)->vec));
1495   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1496 
1497   CeedCall(CeedFree(data));
1498   return CEED_ERROR_SUCCESS;
1499 }
1500 
1501 /**
1502   @brief Get `CeedOperatorAssemblyData`
1503 
1504   @param[in]  op   `CeedOperator` to assemble
1505   @param[out] data `CeedOperatorAssemblyData`
1506 
1507   @return An error code: 0 - success, otherwise - failure
1508 
1509   @ref Backend
1510 **/
1511 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1512   if (!op->op_assembled) {
1513     CeedOperatorAssemblyData data;
1514 
1515     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1516     op->op_assembled = data;
1517   }
1518   *data = op->op_assembled;
1519   return CEED_ERROR_SUCCESS;
1520 }
1521 
1522 /**
1523   @brief Create object holding `CeedOperator` assembly data.
1524 
1525   The `CeedOperatorAssemblyData` holds an array with references to every active `CeedBasis` used in the `CeedOperator`.
1526   An array with references to the corresponding active `CeedElemRestriction` is also stored.
1527   For each active `CeedBasis, the `CeedOperatorAssemblyData` holds an array of all input and output @ref CeedEvalMode for this `CeedBasis`.
1528   The `CeedOperatorAssemblyData` holds an array of offsets for indexing into the assembled `CeedQFunction` arrays to the row representing each @ref CeedEvalMode.
1529   The number of input columns across all active bases for the assembled `CeedQFunction` is also stored.
1530   Lastly, the `CeedOperatorAssembly` data holds assembled matrices representing the full action of the `CeedBasis` for all @ref CeedEvalMode.
1531 
1532   @param[in]  ceed `Ceed` object used to create the `CeedOperatorAssemblyData`
1533   @param[in]  op   `CeedOperator` to be assembled
1534   @param[out] data Address of the variable where the newly created `CeedOperatorAssemblyData` will be stored
1535 
1536   @return An error code: 0 - success, otherwise - failure
1537 
1538   @ref Backend
1539 **/
1540 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1541   CeedInt             num_active_bases_in = 0, num_active_bases_out = 0, offset = 0;
1542   CeedInt             num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL;
1543   CeedSize          **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1544   CeedEvalMode      **eval_modes_in = NULL, **eval_modes_out = NULL;
1545   CeedQFunctionField *qf_fields;
1546   CeedQFunction       qf;
1547   CeedOperatorField  *op_fields;
1548   bool                is_composite;
1549 
1550   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1551   CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators.");
1552 
1553   // Allocate
1554   CeedCall(CeedCalloc(1, data));
1555   (*data)->ceed = ceed;
1556   CeedCall(CeedReference(ceed));
1557 
1558   // Build OperatorAssembly data
1559   CeedCall(CeedOperatorGetQFunction(op, &qf));
1560 
1561   // Determine active input basis
1562   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1563   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1564   for (CeedInt i = 0; i < num_input_fields; i++) {
1565     CeedVector vec;
1566 
1567     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1568     if (vec == CEED_VECTOR_ACTIVE) {
1569       CeedInt      index = -1, num_comp, q_comp;
1570       CeedEvalMode eval_mode;
1571       CeedBasis    basis_in = NULL;
1572 
1573       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1574       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1575       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1576       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1577       for (CeedInt i = 0; i < num_active_bases_in; i++) {
1578         if ((*data)->active_bases_in[i] == basis_in) index = i;
1579       }
1580       if (index == -1) {
1581         CeedElemRestriction elem_rstr_in;
1582 
1583         index = num_active_bases_in;
1584         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in));
1585         (*data)->active_bases_in[num_active_bases_in] = NULL;
1586         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in]));
1587         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in));
1588         (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL;
1589         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1590         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in]));
1591         CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
1592         CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in));
1593         num_eval_modes_in[index] = 0;
1594         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in));
1595         eval_modes_in[index] = NULL;
1596         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in));
1597         eval_mode_offsets_in[index] = NULL;
1598         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in));
1599         (*data)->assembled_bases_in[index] = NULL;
1600         num_active_bases_in++;
1601       }
1602       if (eval_mode != CEED_EVAL_WEIGHT) {
1603         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1604         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1605         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1606         for (CeedInt d = 0; d < q_comp; d++) {
1607           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1608           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1609           offset += num_comp;
1610         }
1611         num_eval_modes_in[index] += q_comp;
1612       }
1613       CeedCall(CeedBasisDestroy(&basis_in));
1614     }
1615     CeedCall(CeedVectorDestroy(&vec));
1616   }
1617 
1618   // Determine active output basis
1619   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1620   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1621   offset = 0;
1622   for (CeedInt i = 0; i < num_output_fields; i++) {
1623     CeedVector vec;
1624 
1625     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1626     if (vec == CEED_VECTOR_ACTIVE) {
1627       CeedInt      index = -1, num_comp, q_comp;
1628       CeedEvalMode eval_mode;
1629       CeedBasis    basis_out = NULL;
1630 
1631       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1632       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1633       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1634       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1635       for (CeedInt i = 0; i < num_active_bases_out; i++) {
1636         if ((*data)->active_bases_out[i] == basis_out) index = i;
1637       }
1638       if (index == -1) {
1639         CeedElemRestriction elem_rstr_out;
1640 
1641         index = num_active_bases_out;
1642         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out));
1643         (*data)->active_bases_out[num_active_bases_out] = NULL;
1644         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out]));
1645         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out));
1646         (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL;
1647         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1648         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out]));
1649         CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
1650         CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out));
1651         num_eval_modes_out[index] = 0;
1652         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out));
1653         eval_modes_out[index] = NULL;
1654         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out));
1655         eval_mode_offsets_out[index] = NULL;
1656         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out));
1657         (*data)->assembled_bases_out[index] = NULL;
1658         num_active_bases_out++;
1659       }
1660       if (eval_mode != CEED_EVAL_WEIGHT) {
1661         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1662         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1663         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1664         for (CeedInt d = 0; d < q_comp; d++) {
1665           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1666           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1667           offset += num_comp;
1668         }
1669         num_eval_modes_out[index] += q_comp;
1670       }
1671       CeedCall(CeedBasisDestroy(&basis_out));
1672     }
1673     CeedCall(CeedVectorDestroy(&vec));
1674   }
1675   CeedCall(CeedQFunctionDestroy(&qf));
1676   (*data)->num_active_bases_in   = num_active_bases_in;
1677   (*data)->num_eval_modes_in     = num_eval_modes_in;
1678   (*data)->eval_modes_in         = eval_modes_in;
1679   (*data)->eval_mode_offsets_in  = eval_mode_offsets_in;
1680   (*data)->num_active_bases_out  = num_active_bases_out;
1681   (*data)->num_eval_modes_out    = num_eval_modes_out;
1682   (*data)->eval_modes_out        = eval_modes_out;
1683   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1684   (*data)->num_output_components = offset;
1685   return CEED_ERROR_SUCCESS;
1686 }
1687 
1688 /**
1689   @brief Get `CeedOperator` @ref CeedEvalMode for assembly.
1690 
1691   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1692 
1693   @param[in]  data                  `CeedOperatorAssemblyData`
1694   @param[out] num_active_bases_in   Total number of active bases for input
1695   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input @ref CeedEvalMode, or `NULL`.
1696                                       `eval_modes_in[0]` holds an array of eval modes for the first active `CeedBasis`.
1697   @param[out] eval_modes_in         Pointer to hold arrays of input @ref CeedEvalMode, or `NULL`
1698   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point
1699   @param[out] num_active_bases_out  Total number of active bases for output
1700   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output @ref CeedEvalMode, or `NULL`
1701   @param[out] eval_modes_out        Pointer to hold arrays of output @ref CeedEvalMode, or `NULL`
1702   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1703   @param[out] num_output_components The number of columns in the assembled `CeedQFunction` matrix for each quadrature point, including contributions of all active bases
1704 
1705   @return An error code: 0 - success, otherwise - failure
1706 
1707   @ref Backend
1708 **/
1709 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in,
1710                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out,
1711                                          CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out,
1712                                          CeedSize *num_output_components) {
1713   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1714   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1715   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1716   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1717   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1718   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1719   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1720   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1721   if (num_output_components) *num_output_components = data->num_output_components;
1722   return CEED_ERROR_SUCCESS;
1723 }
1724 
1725 /**
1726   @brief Get `CeedOperator` `CeedBasis` data for assembly.
1727 
1728   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1729 
1730   @param[in]  data                 `CeedOperatorAssemblyData`
1731   @param[out] num_active_bases_in  Number of active input bases, or `NULL`
1732   @param[out] active_bases_in      Pointer to hold active input `CeedBasis`, or `NULL`
1733   @param[out] assembled_bases_in   Pointer to hold assembled active input `B` , or `NULL`
1734   @param[out] num_active_bases_out Number of active output bases, or `NULL`
1735   @param[out] active_bases_out     Pointer to hold active output `CeedBasis`, or `NULL`
1736   @param[out] assembled_bases_out  Pointer to hold assembled active output `B` , or `NULL`
1737 
1738   @return An error code: 0 - success, otherwise - failure
1739 
1740   @ref Backend
1741 **/
1742 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in,
1743                                      const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out,
1744                                      const CeedScalar ***assembled_bases_out) {
1745   // Assemble B_in, B_out if needed
1746   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1747     CeedInt num_qpts;
1748 
1749     if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts));
1750     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts));
1751     for (CeedInt b = 0; b < data->num_active_bases_in; b++) {
1752       bool        has_eval_none = false;
1753       CeedInt     num_nodes;
1754       CeedScalar *B_in = NULL, *identity = NULL;
1755 
1756       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes));
1757       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1758 
1759       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1760         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1761       }
1762       if (has_eval_none) {
1763         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1764         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1765           identity[i * num_nodes + i] = 1.0;
1766         }
1767       }
1768 
1769       for (CeedInt q = 0; q < num_qpts; q++) {
1770         for (CeedInt n = 0; n < num_nodes; n++) {
1771           CeedInt      d_in              = 0, q_comp_in;
1772           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1773 
1774           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1775             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1776             const CeedScalar *B  = NULL;
1777 
1778             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B));
1779             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in));
1780             if (q_comp_in > 1) {
1781               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1782               else B = &B[(++d_in) * num_qpts * num_nodes];
1783             }
1784             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1785             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1786           }
1787         }
1788       }
1789       if (identity) CeedCall(CeedFree(&identity));
1790       data->assembled_bases_in[b] = B_in;
1791     }
1792   }
1793 
1794   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1795     CeedInt num_qpts;
1796 
1797     if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts));
1798     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts));
1799     for (CeedInt b = 0; b < data->num_active_bases_out; b++) {
1800       bool        has_eval_none = false;
1801       CeedInt     num_nodes;
1802       CeedScalar *B_out = NULL, *identity = NULL;
1803 
1804       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes));
1805       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1806 
1807       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1808         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1809       }
1810       if (has_eval_none) {
1811         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1812         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1813           identity[i * num_nodes + i] = 1.0;
1814         }
1815       }
1816 
1817       for (CeedInt q = 0; q < num_qpts; q++) {
1818         for (CeedInt n = 0; n < num_nodes; n++) {
1819           CeedInt      d_out              = 0, q_comp_out;
1820           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1821 
1822           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1823             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1824             const CeedScalar *B  = NULL;
1825 
1826             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B));
1827             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out));
1828             if (q_comp_out > 1) {
1829               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1830               else B = &B[(++d_out) * num_qpts * num_nodes];
1831             }
1832             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1833             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1834           }
1835         }
1836       }
1837       if (identity) CeedCall(CeedFree(&identity));
1838       data->assembled_bases_out[b] = B_out;
1839     }
1840   }
1841 
1842   // Pass out assembled data
1843   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1844   if (active_bases_in) *active_bases_in = data->active_bases_in;
1845   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1846   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1847   if (active_bases_out) *active_bases_out = data->active_bases_out;
1848   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1849   return CEED_ERROR_SUCCESS;
1850 }
1851 
1852 /**
1853   @brief Get `CeedOperator` `CeedBasis` data for assembly.
1854 
1855   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1856 
1857   @param[in]  data                      `CeedOperatorAssemblyData`
1858   @param[out] num_active_elem_rstrs_in  Number of active input element restrictions, or `NULL`
1859   @param[out] active_elem_rstrs_in      Pointer to hold active input `CeedElemRestriction`, or `NULL`
1860   @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or `NULL`
1861   @param[out] active_elem_rstrs_out     Pointer to hold active output `CeedElemRestriction`, or `NULL`
1862 
1863   @return An error code: 0 - success, otherwise - failure
1864 
1865   @ref Backend
1866 **/
1867 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in,
1868                                                 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out,
1869                                                 CeedElemRestriction **active_elem_rstrs_out) {
1870   if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in;
1871   if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in;
1872   if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out;
1873   if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out;
1874   return CEED_ERROR_SUCCESS;
1875 }
1876 
1877 /**
1878   @brief Destroy `CeedOperatorAssemblyData`
1879 
1880   @param[in,out] data `CeedOperatorAssemblyData` to destroy
1881 
1882   @return An error code: 0 - success, otherwise - failure
1883 
1884   @ref Backend
1885 **/
1886 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1887   if (!*data) {
1888     *data = NULL;
1889     return CEED_ERROR_SUCCESS;
1890   }
1891   CeedCall(CeedDestroy(&(*data)->ceed));
1892   for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) {
1893     CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b]));
1894     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b]));
1895     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1896     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1897     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1898   }
1899   for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) {
1900     CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b]));
1901     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b]));
1902     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1903     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1904     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1905   }
1906   CeedCall(CeedFree(&(*data)->active_bases_in));
1907   CeedCall(CeedFree(&(*data)->active_bases_out));
1908   CeedCall(CeedFree(&(*data)->active_elem_rstrs_in));
1909   CeedCall(CeedFree(&(*data)->active_elem_rstrs_out));
1910   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1911   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1912   CeedCall(CeedFree(&(*data)->eval_modes_in));
1913   CeedCall(CeedFree(&(*data)->eval_modes_out));
1914   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1915   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1916   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1917   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1918 
1919   CeedCall(CeedFree(data));
1920   return CEED_ERROR_SUCCESS;
1921 }
1922 
1923 /**
1924   @brief Retrieve fallback `CeedOperator` with a reference `Ceed` for advanced `CeedOperator` functionality
1925 
1926   @param[in]  op          `CeedOperator` to retrieve fallback for
1927   @param[out] op_fallback Fallback `CeedOperator`
1928 
1929   @return An error code: 0 - success, otherwise - failure
1930 
1931   @ref Backend
1932 **/
1933 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
1934   // Create if needed
1935   if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op));
1936   if (op->op_fallback) {
1937     bool is_debug;
1938     Ceed ceed;
1939 
1940     CeedCall(CeedOperatorGetCeed(op, &ceed));
1941     CeedCall(CeedIsDebug(ceed, &is_debug));
1942     if (is_debug) {
1943       Ceed        ceed_fallback;
1944       const char *resource, *resource_fallback, *op_name;
1945 
1946       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
1947       CeedCall(CeedGetResource(ceed, &resource));
1948       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
1949       CeedCall(CeedOperatorGetName(op, &op_name));
1950 
1951       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
1952       CeedDebug(ceed, "Falling back from Operator with backend %s at address %p to Operator with backend %s at address %p for CeedOperator \"%s\"\n",
1953                 resource, op, resource_fallback, op->op_fallback, op_name);
1954       CeedCall(CeedDestroy(&ceed_fallback));
1955     }
1956     CeedCall(CeedDestroy(&ceed));
1957   }
1958   *op_fallback = op->op_fallback;
1959   return CEED_ERROR_SUCCESS;
1960 }
1961 
1962 /**
1963   @brief Get the parent `CeedOperator` for a fallback `CeedOperator`
1964 
1965   @param[in]  op     `CeedOperator` context
1966   @param[out] parent Variable to store parent `CeedOperator` context
1967 
1968   @return An error code: 0 - success, otherwise - failure
1969 
1970   @ref Backend
1971 **/
1972 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) {
1973   *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL;
1974   return CEED_ERROR_SUCCESS;
1975 }
1976 
1977 /**
1978   @brief Get the `Ceed` context of the parent `CeedOperator` for a fallback `CeedOperator`
1979 
1980   @param[in]  op     `CeedOperator` context
1981   @param[out] parent Variable to store parent `Ceed` context
1982 
1983   @return An error code: 0 - success, otherwise - failure
1984 
1985   @ref Backend
1986 **/
1987 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) {
1988   *parent = NULL;
1989   if (op->op_fallback_parent) CeedCall(CeedReferenceCopy(op->op_fallback_parent->ceed, parent));
1990   else CeedCall(CeedReferenceCopy(CeedOperatorReturnCeed(op), parent));
1991   return CEED_ERROR_SUCCESS;
1992 }
1993 
1994 /// @}
1995 
1996 /// ----------------------------------------------------------------------------
1997 /// CeedOperator Public API
1998 /// ----------------------------------------------------------------------------
1999 /// @addtogroup CeedOperatorUser
2000 /// @{
2001 
2002 /**
2003   @brief Assemble a linear `CeedQFunction` associated with a `CeedOperator`.
2004 
2005   This returns a `CeedVector` containing a matrix at each quadrature point providing the action of the `CeedQFunction` associated with the `CeedOperator`.
2006   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices representing the action of the `CeedQFunction` for a corresponding quadrature point on an element.
2007 
2008   Inputs and outputs are in the order provided by the user when adding `CeedOperator` fields.
2009   For example, a `CeedQFunction` with inputs `u` and `gradu` and outputs `gradv` and `v` , provided in that order, would result in an assembled `CeedQFunction` that consists of `(1 + dim) x (dim + 1)` matrices at each quadrature point acting on the input ` [u, du_0, du_1]` and producing the output `[dv_0, dv_1, v]`.
2010 
2011   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2012 
2013   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2014   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
2015   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
2016   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2017 
2018   @return An error code: 0 - success, otherwise - failure
2019 
2020   @ref User
2021 **/
2022 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
2023   CeedCall(CeedOperatorCheckReady(op));
2024 
2025   if (op->LinearAssembleQFunction) {
2026     // Backend version
2027     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
2028   } else {
2029     // Operator fallback
2030     CeedOperator op_fallback;
2031 
2032     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleQFunction\n");
2033     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2034     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
2035     else return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
2036   }
2037   return CEED_ERROR_SUCCESS;
2038 }
2039 
2040 /**
2041   @brief Assemble `CeedQFunction` and store result internally.
2042 
2043   Return copied references of stored data to the caller.
2044   Caller is responsible for ownership and destruction of the copied references.
2045   See also @ref CeedOperatorLinearAssembleQFunction().
2046 
2047   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
2048         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
2049 
2050   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2051   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
2052   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
2053   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2054 
2055   @return An error code: 0 - success, otherwise - failure
2056 
2057   @ref User
2058 **/
2059 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
2060   return CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(op, true, assembled, rstr, request);
2061 }
2062 
2063 /**
2064   @brief Assemble the diagonal of a square linear `CeedOperator`
2065 
2066   This overwrites a `CeedVector` with the diagonal of a linear `CeedOperator`.
2067 
2068   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2069 
2070   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2071 
2072   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2073   @param[out] assembled `CeedVector` to store assembled `CeedOperator` diagonal
2074   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2075 
2076   @return An error code: 0 - success, otherwise - failure
2077 
2078   @ref User
2079 **/
2080 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2081   bool     is_composite;
2082   CeedSize input_size = 0, output_size = 0;
2083 
2084   CeedCall(CeedOperatorCheckReady(op));
2085   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2086 
2087   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2088   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2089 
2090   // Early exit for empty operator
2091   if (!is_composite) {
2092     CeedInt num_elem = 0;
2093 
2094     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2095     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2096   }
2097 
2098   if (op->LinearAssembleDiagonal) {
2099     // Backend version
2100     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
2101     return CEED_ERROR_SUCCESS;
2102   } else if (op->LinearAssembleAddDiagonal) {
2103     // Backend version with zeroing first
2104     CeedCall(CeedVectorSetValue(assembled, 0.0));
2105     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
2106     return CEED_ERROR_SUCCESS;
2107   } else if (is_composite) {
2108     // Default to summing contributions of suboperators
2109     CeedCall(CeedVectorSetValue(assembled, 0.0));
2110     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
2111     return CEED_ERROR_SUCCESS;
2112   } else {
2113     // Operator fallback
2114     CeedOperator op_fallback;
2115 
2116     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleDiagonal\n");
2117     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2118     if (op_fallback) {
2119       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
2120       return CEED_ERROR_SUCCESS;
2121     }
2122   }
2123   // Default interface implementation
2124   CeedCall(CeedVectorSetValue(assembled, 0.0));
2125   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
2126   return CEED_ERROR_SUCCESS;
2127 }
2128 
2129 /**
2130   @brief Assemble the diagonal of a square linear `CeedOperator`.
2131 
2132   This sums into a `CeedVector` the diagonal of a linear `CeedOperator`.
2133 
2134   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2135 
2136   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2137 
2138   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2139   @param[out] assembled `CeedVector` to store assembled `CeedOperator` diagonal
2140   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2141 
2142   @return An error code: 0 - success, otherwise - failure
2143 
2144   @ref User
2145 **/
2146 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2147   bool     is_composite;
2148   CeedSize input_size = 0, output_size = 0;
2149 
2150   CeedCall(CeedOperatorCheckReady(op));
2151   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2152 
2153   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2154   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2155 
2156   // Early exit for empty operator
2157   if (!is_composite) {
2158     CeedInt num_elem = 0;
2159 
2160     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2161     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2162   }
2163 
2164   if (op->LinearAssembleAddDiagonal) {
2165     // Backend version
2166     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
2167     return CEED_ERROR_SUCCESS;
2168   } else if (is_composite) {
2169     // Default to summing contributions of suboperators
2170     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
2171     return CEED_ERROR_SUCCESS;
2172   } else {
2173     // Operator fallback
2174     CeedOperator op_fallback;
2175 
2176     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleAddDiagonal\n");
2177     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2178     if (op_fallback) {
2179       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
2180       return CEED_ERROR_SUCCESS;
2181     }
2182   }
2183   // Default interface implementation
2184   CeedCall(CeedSingleOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
2185   return CEED_ERROR_SUCCESS;
2186 }
2187 
2188 /**
2189    @brief Fully assemble the point-block diagonal pattern of a linear `CeedOperator`.
2190 
2191    Expected to be used in conjunction with @ref CeedOperatorLinearAssemblePointBlockDiagonal().
2192 
2193    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2194    Note that the `(i, j)` pairs are unique.
2195    This function returns the number of entries and their `(i, j)` locations, while @ref CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in the same ordering.
2196 
2197    This will generally be slow unless your operator is low-order.
2198 
2199    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2200 
2201    @param[in]  op          `CeedOperator` to assemble
2202    @param[out] num_entries Number of entries in coordinate nonzero pattern
2203    @param[out] rows        Row number for each entry
2204    @param[out] cols        Column number for each entry
2205 
2206    @ref User
2207 **/
2208 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2209   bool          is_composite;
2210   CeedInt       num_active_components, num_sub_operators;
2211   CeedOperator *sub_operators;
2212 
2213   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2214 
2215   CeedSize input_size = 0, output_size = 0;
2216   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2217   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2218 
2219   if (is_composite) {
2220     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_operators));
2221     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2222   } else {
2223     sub_operators     = &op;
2224     num_sub_operators = 1;
2225   }
2226 
2227   // Verify operator can be assembled correctly
2228   {
2229     CeedOperatorAssemblyData data;
2230     CeedInt                  num_active_elem_rstrs, comp_stride;
2231     CeedElemRestriction     *active_elem_rstrs;
2232 
2233     // Get initial values to check against
2234     CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data));
2235     CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
2236     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride));
2237     CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components));
2238 
2239     // Verify that all active element restrictions have same component stride and number of components
2240     for (CeedInt k = 0; k < num_sub_operators; k++) {
2241       CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data));
2242       CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
2243       for (CeedInt i = 0; i < num_active_elem_rstrs; i++) {
2244         CeedInt comp_stride_sub, num_active_components_sub;
2245 
2246         CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub));
2247         CeedCheck(comp_stride == comp_stride_sub, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION,
2248                   "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub);
2249         CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub));
2250         CeedCheck(num_active_components == num_active_components_sub, CeedOperatorReturnCeed(op), CEED_ERROR_INCOMPATIBLE,
2251                   "All suboperators must have the same number of output components."
2252                   " Previous: %" CeedInt_FMT " Current: %" CeedInt_FMT,
2253                   num_active_components, num_active_components_sub);
2254       }
2255     }
2256   }
2257   *num_entries = input_size * num_active_components;
2258   CeedCall(CeedCalloc(*num_entries, rows));
2259   CeedCall(CeedCalloc(*num_entries, cols));
2260 
2261   for (CeedInt o = 0; o < num_sub_operators; o++) {
2262     CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr;
2263     CeedInt             comp_stride, num_elem, elem_size;
2264     const CeedInt      *offsets, *point_block_offsets;
2265 
2266     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr));
2267     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride));
2268     CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem));
2269     CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size));
2270     CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets));
2271 
2272     CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr));
2273     CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets));
2274 
2275     for (CeedSize i = 0; i < num_elem * elem_size; i++) {
2276       for (CeedInt c_out = 0; c_out < num_active_components; c_out++) {
2277         for (CeedInt c_in = 0; c_in < num_active_components; c_in++) {
2278           (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride;
2279           (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride;
2280         }
2281       }
2282     }
2283 
2284     CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets));
2285     CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets));
2286     CeedCall(CeedElemRestrictionDestroy(&active_elem_rstr));
2287     CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr));
2288   }
2289   return CEED_ERROR_SUCCESS;
2290 }
2291 
2292 /**
2293   @brief Assemble the point block diagonal of a square linear `CeedOperator`.
2294 
2295   This overwrites a `CeedVector` with the point block diagonal of a linear `CeedOperator`.
2296 
2297   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2298 
2299   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2300 
2301   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2302   @param[out] assembled `CeedVector` to store assembled `CeedOperator` point block diagonal, provided in row-major form with an `num_comp * num_comp` block at each node.
2303                           The dimensions of this vector are derived from the active vector for the `CeedOperator`.
2304                           The array has shape `[nodes, component out, component in]`.
2305   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2306 
2307   @return An error code: 0 - success, otherwise - failure
2308 
2309   @ref User
2310 **/
2311 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2312   bool     is_composite;
2313   CeedSize input_size = 0, output_size = 0;
2314 
2315   CeedCall(CeedOperatorCheckReady(op));
2316   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2317 
2318   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2319   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2320 
2321   // Early exit for empty operator
2322   if (!is_composite) {
2323     CeedInt num_elem = 0;
2324 
2325     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2326     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2327   }
2328 
2329   if (op->LinearAssemblePointBlockDiagonal) {
2330     // Backend version
2331     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
2332     return CEED_ERROR_SUCCESS;
2333   } else if (op->LinearAssembleAddPointBlockDiagonal) {
2334     // Backend version with zeroing first
2335     CeedCall(CeedVectorSetValue(assembled, 0.0));
2336     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2337     return CEED_ERROR_SUCCESS;
2338   } else {
2339     // Operator fallback
2340     CeedOperator op_fallback;
2341 
2342     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssemblePointBlockDiagonal\n");
2343     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2344     if (op_fallback) {
2345       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
2346       return CEED_ERROR_SUCCESS;
2347     }
2348   }
2349   // Default interface implementation
2350   CeedCall(CeedVectorSetValue(assembled, 0.0));
2351   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2352   return CEED_ERROR_SUCCESS;
2353 }
2354 
2355 /**
2356   @brief Assemble the point block diagonal of a square linear `CeedOperator`.
2357 
2358   This sums into a `CeedVector` with the point block diagonal of a linear `CeedOperator`.
2359 
2360   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2361 
2362   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2363 
2364   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2365   @param[out] assembled `CeedVector` to store assembled CeedOperator point block diagonal, provided in row-major form with an `num_comp * num_comp` block at each node.
2366                           The dimensions of this vector are derived from the active vector for the `CeedOperator`.
2367                           The array has shape `[nodes, component out, component in]`.
2368   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2369 
2370   @return An error code: 0 - success, otherwise - failure
2371 
2372   @ref User
2373 **/
2374 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2375   bool     is_composite;
2376   CeedSize input_size = 0, output_size = 0;
2377 
2378   CeedCall(CeedOperatorCheckReady(op));
2379   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2380 
2381   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2382   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2383 
2384   // Early exit for empty operator
2385   if (!is_composite) {
2386     CeedInt num_elem = 0;
2387 
2388     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2389     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2390   }
2391 
2392   if (op->LinearAssembleAddPointBlockDiagonal) {
2393     // Backend version
2394     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
2395     return CEED_ERROR_SUCCESS;
2396   } else {
2397     // Operator fallback
2398     CeedOperator op_fallback;
2399 
2400     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleAddPointBlockDiagonal\n");
2401     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2402     if (op_fallback) {
2403       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
2404       return CEED_ERROR_SUCCESS;
2405     }
2406   }
2407   // Default interface implementation
2408   if (is_composite) {
2409     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
2410   } else {
2411     CeedCall(CeedSingleOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
2412   }
2413   return CEED_ERROR_SUCCESS;
2414 }
2415 
2416 /**
2417    @brief Fully assemble the nonzero pattern of a linear `CeedOperator`.
2418 
2419    Expected to be used in conjunction with @ref CeedOperatorLinearAssemble().
2420 
2421    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2422    Note that the `(i, j)` pairs are not unique and may repeat.
2423    This function returns the number of entries and their `(i, j)` locations, while @ref CeedOperatorLinearAssemble() provides the values in the same ordering.
2424 
2425    This will generally be slow unless your operator is low-order.
2426 
2427    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2428 
2429    @param[in]  op          `CeedOperator` to assemble
2430    @param[out] num_entries Number of entries in coordinate nonzero pattern
2431    @param[out] rows        Row number for each entry
2432    @param[out] cols        Column number for each entry
2433 
2434    @ref User
2435 **/
2436 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2437   bool          is_composite;
2438   CeedInt       num_suboperators, offset = 0;
2439   CeedSize      single_entries;
2440   CeedOperator *sub_operators;
2441 
2442   CeedCall(CeedOperatorCheckReady(op));
2443   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2444 
2445   if (op->LinearAssembleSymbolic) {
2446     // Backend version
2447     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
2448     return CEED_ERROR_SUCCESS;
2449   } else {
2450     // Operator fallback
2451     CeedOperator op_fallback;
2452 
2453     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleSymbolic\n");
2454     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2455     if (op_fallback) {
2456       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
2457       return CEED_ERROR_SUCCESS;
2458     }
2459   }
2460 
2461   // Default interface implementation
2462 
2463   // Count entries and allocate rows, cols arrays
2464   *num_entries = 0;
2465   if (is_composite) {
2466     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2467     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2468     for (CeedInt k = 0; k < num_suboperators; ++k) {
2469       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2470       *num_entries += single_entries;
2471     }
2472   } else {
2473     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
2474     *num_entries += single_entries;
2475   }
2476   CeedCall(CeedCalloc(*num_entries, rows));
2477   CeedCall(CeedCalloc(*num_entries, cols));
2478 
2479   // Assemble nonzero locations
2480   if (is_composite) {
2481     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2482     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2483     for (CeedInt k = 0; k < num_suboperators; ++k) {
2484       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
2485       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2486       offset += single_entries;
2487     }
2488   } else {
2489     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
2490   }
2491   return CEED_ERROR_SUCCESS;
2492 }
2493 
2494 /**
2495    @brief Fully assemble the nonzero entries of a linear operator.
2496 
2497    Expected to be used in conjunction with @ref CeedOperatorLinearAssembleSymbolic().
2498 
2499    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2500    Note that the `(i, j)` pairs are not unique and may repeat.
2501    This function returns the values of the nonzero entries to be added, their `(i, j)` locations are provided by @ref CeedOperatorLinearAssembleSymbolic().
2502 
2503    This will generally be slow unless your operator is low-order.
2504 
2505    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2506 
2507    @param[in]  op     `CeedOperator` to assemble
2508    @param[out] values Values to assemble into matrix
2509 
2510    @ref User
2511 **/
2512 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
2513   bool          is_composite;
2514   CeedInt       num_suboperators, offset = 0;
2515   CeedSize      single_entries = 0;
2516   CeedOperator *sub_operators;
2517 
2518   CeedCall(CeedOperatorCheckReady(op));
2519   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2520 
2521   // Early exit for empty operator
2522   if (!is_composite) {
2523     CeedInt num_elem = 0;
2524 
2525     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2526     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2527   }
2528 
2529   if (op->LinearAssemble) {
2530     // Backend version
2531     CeedCall(op->LinearAssemble(op, values));
2532     return CEED_ERROR_SUCCESS;
2533   } else if (is_composite) {
2534     // Default to summing contributions of suboperators
2535     CeedCall(CeedVectorSetValue(values, 0.0));
2536     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2537     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2538     for (CeedInt k = 0; k < num_suboperators; k++) {
2539       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2540       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2541       offset += single_entries;
2542     }
2543     return CEED_ERROR_SUCCESS;
2544   } else if (op->LinearAssembleSingle) {
2545     CeedCall(CeedVectorSetValue(values, 0.0));
2546     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2547     return CEED_ERROR_SUCCESS;
2548   } else {
2549     // Operator fallback
2550     CeedOperator op_fallback;
2551 
2552     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssemble\n");
2553     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2554     if (op_fallback) {
2555       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2556       return CEED_ERROR_SUCCESS;
2557     }
2558   }
2559 
2560   // Default to interface version if non-composite and no fallback
2561   CeedCall(CeedVectorSetValue(values, 0.0));
2562   CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2563   return CEED_ERROR_SUCCESS;
2564 }
2565 
2566 /**
2567   @brief Get the multiplicity of nodes across sub-operators in a composite `CeedOperator`.
2568 
2569   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2570 
2571   @param[in]  op               Composite `CeedOperator`
2572   @param[in]  num_skip_indices Number of sub-operators to skip
2573   @param[in]  skip_indices     Array of indices of sub-operators to skip
2574   @param[out] mult             Vector to store multiplicity (of size `l_size` )
2575 
2576   @return An error code: 0 - success, otherwise - failure
2577 
2578   @ref User
2579 **/
2580 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2581   Ceed                ceed;
2582   CeedInt             num_suboperators;
2583   CeedSize            l_vec_len;
2584   CeedScalar         *mult_array;
2585   CeedVector          ones_l_vec;
2586   CeedElemRestriction elem_rstr, mult_elem_rstr;
2587   CeedOperator       *sub_operators;
2588 
2589   CeedCall(CeedOperatorCheckReady(op));
2590 
2591   // Zero mult vector
2592   CeedCall(CeedVectorSetValue(mult, 0.0));
2593 
2594   // Get suboperators
2595   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2596   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2597   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2598 
2599   // Work vector
2600   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2601   CeedCall(CeedOperatorGetCeed(op, &ceed));
2602   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2603   CeedCall(CeedDestroy(&ceed));
2604   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2605   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2606 
2607   // Compute multiplicity across suboperators
2608   for (CeedInt i = 0; i < num_suboperators; i++) {
2609     const CeedScalar *sub_mult_array;
2610     CeedVector        sub_mult_l_vec, ones_e_vec;
2611 
2612     // -- Check for suboperator to skip
2613     for (CeedInt j = 0; j < num_skip_indices; j++) {
2614       if (skip_indices[j] == i) continue;
2615     }
2616 
2617     // -- Sub operator multiplicity
2618     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2619     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr));
2620     CeedCall(CeedElemRestrictionDestroy(&elem_rstr));
2621     CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2622     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2623     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2624     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2625     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2626     // ---- Flag every node present in the current suboperator
2627     for (CeedSize j = 0; j < l_vec_len; j++) {
2628       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2629     }
2630     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2631     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2632     CeedCall(CeedVectorDestroy(&ones_e_vec));
2633     CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr));
2634   }
2635   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2636   CeedCall(CeedVectorDestroy(&ones_l_vec));
2637   return CEED_ERROR_SUCCESS;
2638 }
2639 
2640 /**
2641   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator`, creating the prolongation basis from the fine and coarse grid interpolation.
2642 
2643   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2644 
2645   @param[in]  op_fine      Fine grid `CeedOperator`
2646   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2647   @param[in]  rstr_coarse  Coarse grid `CeedElemRestriction`
2648   @param[in]  basis_coarse Coarse grid active vector `CeedBasis`
2649   @param[out] op_coarse    Coarse grid `CeedOperator`
2650   @param[out] op_prolong   Coarse to fine `CeedOperator`, or `NULL`
2651   @param[out] op_restrict  Fine to coarse `CeedOperator`, or `NULL`
2652 
2653   @return An error code: 0 - success, otherwise - failure
2654 
2655   @ref User
2656 **/
2657 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2658                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2659   CeedBasis basis_c_to_f = NULL;
2660 
2661   CeedCall(CeedOperatorCheckReady(op_fine));
2662 
2663   // Build prolongation matrix, if required
2664   if (op_prolong || op_restrict) {
2665     CeedBasis basis_fine;
2666 
2667     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2668     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2669     CeedCall(CeedBasisDestroy(&basis_fine));
2670   }
2671 
2672   // Core code
2673   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2674   return CEED_ERROR_SUCCESS;
2675 }
2676 
2677 /**
2678   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator` with a tensor basis for the active basis.
2679 
2680   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2681 
2682   @param[in]  op_fine       Fine grid `CeedOperator`
2683   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2684   @param[in]  rstr_coarse   Coarse grid `CeedElemRestriction`
2685   @param[in]  basis_coarse  Coarse grid active vector `CeedBasis`
2686   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction `CeedOperator`
2687   @param[out] op_coarse     Coarse grid `CeedOperator`
2688   @param[out] op_prolong    Coarse to fine `CeedOperator`, or `NULL`
2689   @param[out] op_restrict   Fine to coarse `CeedOperator`, or `NULL`
2690 
2691   @return An error code: 0 - success, otherwise - failure
2692 
2693   @ref User
2694 **/
2695 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2696                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2697                                              CeedOperator *op_restrict) {
2698   Ceed      ceed;
2699   CeedInt   Q_f, Q_c;
2700   CeedBasis basis_fine, basis_c_to_f = NULL;
2701 
2702   CeedCall(CeedOperatorCheckReady(op_fine));
2703   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2704 
2705   // Check for compatible quadrature spaces
2706   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2707   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2708   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2709   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION,
2710             "Bases must have compatible quadrature spaces."
2711             " Fine grid: %" CeedInt_FMT " points, Coarse grid: %" CeedInt_FMT " points",
2712             Q_f, Q_c);
2713 
2714   // Create coarse to fine basis, if required
2715   if (op_prolong || op_restrict) {
2716     CeedInt     dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2717     CeedScalar *q_ref, *q_weight, *grad;
2718 
2719     // Check if interpolation matrix is provided
2720     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2721               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2722     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2723     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2724     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2725     CeedCall(CeedBasisDestroy(&basis_fine));
2726     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2727     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2728     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2729     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2730     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2731     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2732     CeedCall(CeedFree(&q_ref));
2733     CeedCall(CeedFree(&q_weight));
2734     CeedCall(CeedFree(&grad));
2735   }
2736 
2737   // Core code
2738   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2739   CeedCall(CeedDestroy(&ceed));
2740   return CEED_ERROR_SUCCESS;
2741 }
2742 
2743 /**
2744   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator` with a non-tensor basis for the active vector
2745 
2746   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2747 
2748   @param[in]  op_fine       Fine grid `CeedOperator`
2749   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2750   @param[in]  rstr_coarse   Coarse grid `CeedElemRestriction`
2751   @param[in]  basis_coarse  Coarse grid active vector `CeedBasis`
2752   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction `CeedOperator`
2753   @param[out] op_coarse     Coarse grid `CeedOperator`
2754   @param[out] op_prolong    Coarse to fine `CeedOperator`, or `NULL`
2755   @param[out] op_restrict   Fine to coarse `CeedOperator`, or `NULL`
2756 
2757   @return An error code: 0 - success, otherwise - failure
2758 
2759   @ref User
2760 **/
2761 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2762                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2763                                        CeedOperator *op_restrict) {
2764   Ceed      ceed;
2765   CeedInt   Q_f, Q_c;
2766   CeedBasis basis_fine, basis_c_to_f = NULL;
2767 
2768   CeedCall(CeedOperatorCheckReady(op_fine));
2769   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2770 
2771   // Check for compatible quadrature spaces
2772   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2773   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2774   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2775   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2776 
2777   // Coarse to fine basis
2778   if (op_prolong || op_restrict) {
2779     CeedInt          dim, num_comp, num_nodes_c, num_nodes_f;
2780     CeedScalar      *q_ref, *q_weight, *grad;
2781     CeedElemTopology topo;
2782 
2783     // Check if interpolation matrix is provided
2784     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2785               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2786     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2787     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2788     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2789     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2790     CeedCall(CeedBasisDestroy(&basis_fine));
2791     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2792     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2793     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2794     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2795     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2796     CeedCall(CeedFree(&q_ref));
2797     CeedCall(CeedFree(&q_weight));
2798     CeedCall(CeedFree(&grad));
2799   }
2800 
2801   // Core code
2802   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2803   CeedCall(CeedDestroy(&ceed));
2804   return CEED_ERROR_SUCCESS;
2805 }
2806 
2807 /**
2808   @brief Build a FDM based approximate inverse for each element for a `CeedOperator`.
2809 
2810   This returns a `CeedOperator` and `CeedVector` to apply a Fast Diagonalization Method based approximate inverse.
2811   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2812   The assembled `CeedQFunction` is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T \hat S V\f$.
2813   The `CeedOperator` must be linear and non-composite.
2814   The associated `CeedQFunction` must therefore also be linear.
2815 
2816   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2817 
2818   @param[in]  op      `CeedOperator` to create element inverses
2819   @param[out] fdm_inv `CeedOperator` to apply the action of a FDM based inverse for each element
2820   @param[in]  request Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2821 
2822   @return An error code: 0 - success, otherwise - failure
2823 
2824   @ref User
2825 **/
2826 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2827   Ceed                 ceed, ceed_parent;
2828   bool                 interp = false, grad = false, is_tensor_basis = true;
2829   CeedInt              num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2830   CeedScalar          *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg;
2831   const CeedScalar    *interp_1d, *grad_1d, *q_weight_1d;
2832   CeedVector           q_data;
2833   CeedElemRestriction  rstr  = NULL, rstr_qd_i;
2834   CeedBasis            basis = NULL, fdm_basis;
2835   CeedQFunctionContext ctx_fdm;
2836   CeedQFunctionField  *qf_fields;
2837   CeedQFunction        qf, qf_fdm;
2838   CeedOperatorField   *op_fields;
2839 
2840   CeedCall(CeedOperatorCheckReady(op));
2841 
2842   if (op->CreateFDMElementInverse) {
2843     // Backend version
2844     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2845     return CEED_ERROR_SUCCESS;
2846   } else {
2847     // Operator fallback
2848     CeedOperator op_fallback;
2849 
2850     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorCreateFDMElementInverse\n");
2851     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2852     if (op_fallback) {
2853       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2854       return CEED_ERROR_SUCCESS;
2855     }
2856   }
2857 
2858   // Default interface implementation
2859   CeedCall(CeedOperatorGetCeed(op, &ceed));
2860   CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent));
2861   CeedCall(CeedOperatorGetQFunction(op, &qf));
2862 
2863   // Determine active input basis
2864   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2865   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2866   for (CeedInt i = 0; i < num_input_fields; i++) {
2867     CeedVector vec;
2868 
2869     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2870     if (vec == CEED_VECTOR_ACTIVE) {
2871       CeedEvalMode eval_mode;
2872 
2873       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2874       interp = interp || eval_mode == CEED_EVAL_INTERP;
2875       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2876       if (!basis) CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2877       if (!rstr) CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2878     }
2879     CeedCall(CeedVectorDestroy(&vec));
2880   }
2881   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2882   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2883   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2884   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2885   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2886   CeedCall(CeedBasisGetDimension(basis, &dim));
2887   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2888   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2889 
2890   // Build and diagonalize 1D Mass and Laplacian
2891   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2892   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2893   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2894   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2895   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2896   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2897   CeedCall(CeedCalloc(P_1d, &lambda));
2898   // -- Build matrices
2899   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2900   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2901   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2902   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2903 
2904   // -- Diagonalize
2905   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2906   CeedCall(CeedFree(&mass));
2907   CeedCall(CeedFree(&laplace));
2908   for (CeedInt i = 0; i < P_1d; i++) {
2909     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2910   }
2911   CeedCall(CeedFree(&x));
2912 
2913   {
2914     CeedInt             layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2915     CeedScalar          max_norm = 0;
2916     const CeedScalar   *assembled_array, *q_weight_array;
2917     CeedVector          assembled = NULL, q_weight;
2918     CeedElemRestriction rstr_qf   = NULL;
2919 
2920     // Assemble QFunction
2921     CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2922     CeedCall(CeedElemRestrictionGetELayout(rstr_qf, layout));
2923     CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2924     CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2925 
2926     // Calculate element averages
2927     CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2928     CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2929     CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2930     CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2931     CeedCall(CeedCalloc(num_elem, &elem_avg));
2932     const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2933 
2934     for (CeedInt e = 0; e < num_elem; e++) {
2935       CeedInt count = 0;
2936 
2937       for (CeedInt q = 0; q < num_qpts; q++) {
2938         for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2939           if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2940             elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2941             count++;
2942           }
2943         }
2944       }
2945       if (count) {
2946         elem_avg[e] /= count;
2947       } else {
2948         elem_avg[e] = 1.0;
2949       }
2950     }
2951     CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2952     CeedCall(CeedVectorDestroy(&assembled));
2953     CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2954     CeedCall(CeedVectorDestroy(&q_weight));
2955   }
2956 
2957   // Build FDM diagonal
2958   {
2959     CeedScalar *q_data_array, *fdm_diagonal;
2960 
2961     CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2962     const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2963     for (CeedInt c = 0; c < num_comp; c++) {
2964       for (CeedInt n = 0; n < num_nodes; n++) {
2965         if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2966         if (grad) {
2967           for (CeedInt d = 0; d < dim; d++) {
2968             CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2969             fdm_diagonal[c * num_nodes + n] += lambda[i];
2970           }
2971         }
2972         if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2973       }
2974     }
2975     CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2976     CeedCall(CeedVectorSetValue(q_data, 0.0));
2977     CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2978     for (CeedInt e = 0; e < num_elem; e++) {
2979       for (CeedInt c = 0; c < num_comp; c++) {
2980         for (CeedInt n = 0; n < num_nodes; n++) {
2981           q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2982         }
2983       }
2984     }
2985     CeedCall(CeedFree(&elem_avg));
2986     CeedCall(CeedFree(&fdm_diagonal));
2987     CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2988   }
2989 
2990   // Setup FDM operator
2991   // -- Basis
2992   {
2993     CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2994 
2995     CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2996     CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2997     CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2998     CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2999     CeedCall(CeedFree(&fdm_interp));
3000     CeedCall(CeedFree(&grad_dummy));
3001     CeedCall(CeedFree(&q_ref_dummy));
3002     CeedCall(CeedFree(&q_weight_dummy));
3003     CeedCall(CeedFree(&lambda));
3004   }
3005 
3006   // -- Restriction
3007   {
3008     CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp};
3009     CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp,
3010                                               (CeedSize)num_elem * (CeedSize)num_comp * (CeedSize)num_nodes, strides, &rstr_qd_i));
3011   }
3012 
3013   // -- QFunction
3014   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
3015   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
3016   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
3017   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
3018   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
3019 
3020   // -- QFunction context
3021   {
3022     CeedInt *num_comp_data;
3023 
3024     CeedCall(CeedCalloc(1, &num_comp_data));
3025     num_comp_data[0] = num_comp;
3026     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
3027     CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
3028   }
3029   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
3030   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
3031 
3032   // -- Operator
3033   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
3034   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
3035   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data));
3036   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
3037 
3038   // Cleanup
3039   CeedCall(CeedDestroy(&ceed));
3040   CeedCall(CeedDestroy(&ceed_parent));
3041   CeedCall(CeedVectorDestroy(&q_data));
3042   CeedCall(CeedElemRestrictionDestroy(&rstr));
3043   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
3044   CeedCall(CeedBasisDestroy(&basis));
3045   CeedCall(CeedBasisDestroy(&fdm_basis));
3046   CeedCall(CeedQFunctionDestroy(&qf));
3047   CeedCall(CeedQFunctionDestroy(&qf_fdm));
3048   return CEED_ERROR_SUCCESS;
3049 }
3050 
3051 /// @}
3052