xref: /libCEED/interface/ceed-preconditioning.c (revision c8758636b5c68890a733ece6c4c5be0c3dca218d)
1 // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a `CeedQFunction` with a reference `Ceed` to fallback for advanced `CeedOperator` functionality
28 
29   @param[in]  fallback_ceed `Ceed` on which to create fallback `CeedQFunction`
30   @param[in]  qf            `CeedQFunction` to create fallback for
31   @param[out] qf_fallback   Fallback `CeedQFunction`
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   char               *source_path_with_name = NULL;
39   CeedInt             num_input_fields, num_output_fields;
40   CeedQFunctionField *input_fields, *output_fields;
41 
42   // Check if NULL qf passed in
43   if (!qf) return CEED_ERROR_SUCCESS;
44 
45   CeedDebug256(CeedQFunctionReturnCeed(qf), 1, "---------- CeedOperator Fallback ----------\n");
46   CeedDebug(CeedQFunctionReturnCeed(qf), "Creating fallback CeedQFunction\n");
47 
48   if (qf->source_path) {
49     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
50 
51     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
52     memcpy(source_path_with_name, qf->source_path, path_len);
53     memcpy(&source_path_with_name[path_len], ":", 1);
54     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
55   } else if (qf->user_source) {
56     CeedCall(CeedStringAllocCopy(qf->user_source, &source_path_with_name));
57   } else {
58     CeedCall(CeedCalloc(1, &source_path_with_name));
59   }
60 
61   {
62     CeedInt           vec_length;
63     CeedQFunctionUser f;
64 
65     CeedCall(CeedQFunctionGetVectorLength(qf, &vec_length));
66     CeedCall(CeedQFunctionGetUserFunction(qf, &f));
67     CeedCall(CeedQFunctionCreateInterior(fallback_ceed, vec_length, f, source_path_with_name, qf_fallback));
68   }
69   {
70     CeedQFunctionContext ctx;
71 
72     CeedCall(CeedQFunctionGetContext(qf, &ctx));
73     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
74     CeedCall(CeedQFunctionContextDestroy(&ctx));
75   }
76   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
77   for (CeedInt i = 0; i < num_input_fields; i++) {
78     const char  *field_name;
79     CeedInt      size;
80     CeedEvalMode eval_mode;
81 
82     CeedCall(CeedQFunctionFieldGetData(input_fields[i], &field_name, &size, &eval_mode));
83     CeedCall(CeedQFunctionAddInput(*qf_fallback, field_name, size, eval_mode));
84   }
85   for (CeedInt i = 0; i < num_output_fields; i++) {
86     const char  *field_name;
87     CeedInt      size;
88     CeedEvalMode eval_mode;
89 
90     CeedCall(CeedQFunctionFieldGetData(output_fields[i], &field_name, &size, &eval_mode));
91     CeedCall(CeedQFunctionAddOutput(*qf_fallback, field_name, size, eval_mode));
92   }
93   CeedCall(CeedFree(&source_path_with_name));
94   return CEED_ERROR_SUCCESS;
95 }
96 
97 /**
98   @brief Duplicate a `CeedOperator` with a reference `Ceed` to fallback for advanced `CeedOperator` functionality
99 
100   @param[in,out] op `CeedOperator` to create fallback for
101 
102   @return An error code: 0 - success, otherwise - failure
103 
104   @ref Developer
105 **/
106 static int CeedOperatorCreateFallback(CeedOperator op) {
107   bool         is_composite;
108   Ceed         ceed, ceed_fallback;
109   CeedOperator op_fallback;
110 
111   // Check not already created
112   if (op->op_fallback) return CEED_ERROR_SUCCESS;
113 
114   // Fallback Ceed
115   CeedCall(CeedOperatorGetCeed(op, &ceed));
116   CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
117   CeedCall(CeedDestroy(&ceed));
118   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
119 
120   CeedDebug256(CeedOperatorReturnCeed(op), 1, "---------- CeedOperator Fallback ----------\n");
121   CeedDebug(CeedOperatorReturnCeed(op), "Creating fallback CeedOperator\n");
122 
123   // Clone Op
124   CeedCall(CeedOperatorIsComposite(op, &is_composite));
125   if (is_composite) {
126     CeedInt       num_suboperators;
127     CeedOperator *sub_operators;
128 
129     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
130     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
131     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
132     for (CeedInt i = 0; i < num_suboperators; i++) {
133       CeedOperator op_sub_fallback;
134 
135       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
136       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
137     }
138   } else {
139     bool               is_at_points = false;
140     CeedInt            num_input_fields, num_output_fields;
141     CeedQFunction      qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
142     CeedOperatorField *input_fields, *output_fields;
143 
144     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
145     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
146     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
147     CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
148     if (is_at_points) {
149       CeedVector          points;
150       CeedElemRestriction rstr_points;
151 
152       CeedCall(CeedOperatorCreateAtPoints(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
153       CeedCall(CeedOperatorAtPointsGetPoints(op, &rstr_points, &points));
154       CeedCall(CeedOperatorAtPointsSetPoints(op_fallback, rstr_points, points));
155       CeedCall(CeedVectorDestroy(&points));
156       CeedCall(CeedElemRestrictionDestroy(&rstr_points));
157     } else {
158       CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
159     }
160     CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
161     for (CeedInt i = 0; i < num_input_fields; i++) {
162       const char         *field_name;
163       CeedVector          vec;
164       CeedElemRestriction rstr;
165       CeedBasis           basis;
166 
167       CeedCall(CeedOperatorFieldGetData(input_fields[i], &field_name, &rstr, &basis, &vec));
168       CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec));
169       CeedCall(CeedVectorDestroy(&vec));
170       CeedCall(CeedElemRestrictionDestroy(&rstr));
171       CeedCall(CeedBasisDestroy(&basis));
172     }
173     for (CeedInt i = 0; i < num_output_fields; i++) {
174       const char         *field_name;
175       CeedVector          vec;
176       CeedElemRestriction rstr;
177       CeedBasis           basis;
178 
179       CeedCall(CeedOperatorFieldGetData(output_fields[i], &field_name, &rstr, &basis, &vec));
180       CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec));
181       CeedCall(CeedVectorDestroy(&vec));
182       CeedCall(CeedElemRestrictionDestroy(&rstr));
183       CeedCall(CeedBasisDestroy(&basis));
184     }
185     {
186       CeedQFunctionAssemblyData data;
187 
188       CeedCall(CeedOperatorGetQFunctionAssemblyData(op, &data));
189       CeedCall(CeedQFunctionAssemblyDataReferenceCopy(data, &op_fallback->qf_assembled));
190     }
191     // Cleanup
192     CeedCall(CeedQFunctionDestroy(&qf_fallback));
193     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
194     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
195   }
196   CeedCall(CeedOperatorSetName(op_fallback, op->name));
197   CeedCall(CeedOperatorCheckReady(op_fallback));
198   // Note: No ref-counting here so we don't get caught in a reference loop.
199   //       The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback.
200   op->op_fallback                 = op_fallback;
201   op_fallback->op_fallback_parent = op;
202   CeedCall(CeedDestroy(&ceed_fallback));
203   return CEED_ERROR_SUCCESS;
204 }
205 
206 /**
207   @brief Core logic for assembling operator diagonal or point block diagonal
208 
209   @param[in]  op             `CeedOperator` to assemble diagonal or point block diagonal
210   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
211   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
212   @param[out] assembled      `CeedVector` to store assembled diagonal
213 
214   @return An error code: 0 - success, otherwise - failure
215 
216   @ref Developer
217 **/
218 static inline int CeedSingleOperatorLinearAssembleAddDiagonal_Mesh(CeedOperator op, CeedRequest *request, const bool is_point_block,
219                                                                    CeedVector assembled) {
220   bool is_composite;
221 
222   CeedCall(CeedOperatorIsComposite(op, &is_composite));
223   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
224 
225   // Assemble QFunction
226   CeedInt             layout_qf[3];
227   const CeedScalar   *assembled_qf_array;
228   CeedVector          assembled_qf        = NULL;
229   CeedElemRestriction assembled_elem_rstr = NULL;
230 
231   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
232   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf));
233   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
234   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
235 
236   // Get assembly data
237   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
238   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
239   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
240   CeedBasis               *active_bases_in, *active_bases_out;
241   CeedElemRestriction     *active_elem_rstrs_in, *active_elem_rstrs_out;
242   CeedOperatorAssemblyData data;
243 
244   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
245   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
246                                                 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out,
247                                                 &num_output_components));
248   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL));
249   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out));
250 
251   // Loop over all active bases (find matching input/output pairs)
252   for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) {
253     CeedInt             b_in, b_out, num_elem, num_nodes, num_qpts, num_comp;
254     bool                has_eval_none = false;
255     CeedScalar         *elem_diag_array, *identity = NULL;
256     CeedVector          elem_diag;
257     CeedElemRestriction diag_elem_rstr;
258 
259     if (num_active_bases_in <= num_active_bases_out) {
260       b_in = b;
261       for (b_out = 0; b_out < num_active_bases_out; b_out++) {
262         if (active_bases_in[b_in] == active_bases_out[b_out]) {
263           break;
264         }
265       }
266       if (b_out == num_active_bases_out) {
267         continue;
268       }  // No matching output basis found
269     } else {
270       b_out = b;
271       for (b_in = 0; b_in < num_active_bases_in; b_in++) {
272         if (active_bases_in[b_in] == active_bases_out[b_out]) {
273           break;
274         }
275       }
276       if (b_in == num_active_bases_in) {
277         continue;
278       }  // No matching output basis found
279     }
280     CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
281               "Cannot assemble operator diagonal with different input and output active element restrictions");
282 
283     // Assemble point block diagonal restriction, if needed
284     if (is_point_block) {
285       CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr));
286     } else {
287       CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr));
288     }
289 
290     // Create diagonal vector
291     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
292 
293     // Assemble element operator diagonals
294     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
295     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
296     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
297     CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes));
298     CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp));
299     if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes;
300     else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts));
301 
302     // Construct identity matrix for basis if required
303     for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) {
304       has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE);
305     }
306     for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) {
307       has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE);
308     }
309     if (has_eval_none) {
310       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
311       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
312     }
313 
314     // Compute the diagonal of B^T D B
315     // Each element
316     for (CeedSize e = 0; e < num_elem; e++) {
317       // Each basis eval mode pair
318       CeedInt      d_out              = 0, q_comp_out;
319       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
320 
321       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) {
322         CeedInt           d_in              = 0, q_comp_in;
323         const CeedScalar *B_t               = NULL;
324         CeedEvalMode      eval_mode_in_prev = CEED_EVAL_NONE;
325 
326         CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t));
327         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out));
328         if (q_comp_out > 1) {
329           if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0;
330           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
331         }
332         eval_mode_out_prev = eval_modes_out[b_out][e_out];
333 
334         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) {
335           const CeedScalar *B = NULL;
336 
337           CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B));
338           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in));
339           if (q_comp_in > 1) {
340             if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0;
341             else B = &B[(++d_in) * num_qpts * num_nodes];
342           }
343           eval_mode_in_prev = eval_modes_in[b_in][e_in];
344 
345           // Each component
346           for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
347             // Each qpt/node pair
348             for (CeedInt q = 0; q < num_qpts; q++) {
349               if (is_point_block) {
350                 // Point Block Diagonal
351                 for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
352                   const CeedSize c_offset =
353                       (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
354                   const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
355 
356                   for (CeedInt n = 0; n < num_nodes; n++) {
357                     elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
358                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
359                   }
360                 }
361               } else {
362                 // Diagonal Only
363                 const CeedInt c_offset =
364                     (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
365                 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
366 
367                 for (CeedInt n = 0; n < num_nodes; n++) {
368                   elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
369                 }
370               }
371             }
372           }
373         }
374       }
375     }
376     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
377 
378     // Assemble local operator diagonal
379     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
380 
381     // Cleanup
382     CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
383     CeedCall(CeedVectorDestroy(&elem_diag));
384     CeedCall(CeedFree(&identity));
385   }
386   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
387   CeedCall(CeedVectorDestroy(&assembled_qf));
388   return CEED_ERROR_SUCCESS;
389 }
390 
391 /**
392   @brief Core logic for assembling operator diagonal or point block diagonal
393 
394   @param[in]  op             `CeedOperator` to assemble diagonal or point block diagonal
395   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
396   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
397   @param[out] assembled      `CeedVector` to store assembled diagonal
398 
399   @return An error code: 0 - success, otherwise - failure
400 
401   @ref Developer
402 **/
403 static inline int CeedSingleOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block,
404                                                               CeedVector assembled) {
405   bool is_at_points;
406 
407   CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
408   CeedCheck(!is_at_points, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "AtPoints operator not supported");
409   CeedCall(CeedSingleOperatorLinearAssembleAddDiagonal_Mesh(op, request, is_point_block, assembled));
410   return CEED_ERROR_SUCCESS;
411 }
412 
413 /**
414   @brief Core logic for assembling composite operator diagonal
415 
416   @param[in]  op             `CeedOperator` to assemble point block diagonal
417   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
418   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
419   @param[out] assembled      `CeedVector` to store assembled diagonal
420 
421   @return An error code: 0 - success, otherwise - failure
422 
423   @ref Developer
424 **/
425 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block,
426                                                                  CeedVector assembled) {
427   CeedInt       num_sub;
428   CeedOperator *suboperators;
429 
430   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
431   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
432   for (CeedInt i = 0; i < num_sub; i++) {
433     if (is_point_block) {
434       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
435     } else {
436       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
437     }
438   }
439   return CEED_ERROR_SUCCESS;
440 }
441 
442 /**
443   @brief Build nonzero pattern for non-composite CeedOperator`.
444 
445   Users should generally use @ref CeedOperatorLinearAssembleSymbolic().
446 
447   @param[in]  op     `CeedOperator` to assemble nonzero pattern
448   @param[in]  offset Offset for number of entries
449   @param[out] rows   Row number for each entry
450   @param[out] cols   Column number for each entry
451 
452   @return An error code: 0 - success, otherwise - failure
453 
454   @ref Developer
455 **/
456 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
457   Ceed                ceed;
458   bool                is_composite;
459   CeedSize            num_nodes_in, num_nodes_out, local_num_entries, count = 0;
460   CeedInt             num_elem_in, elem_size_in, num_comp_in, layout_er_in[3];
461   CeedInt             num_elem_out, elem_size_out, num_comp_out, layout_er_out[3];
462   CeedScalar         *array;
463   const CeedScalar   *elem_dof_a_in, *elem_dof_a_out;
464   CeedVector          index_vec_in, index_vec_out, elem_dof_in, elem_dof_out;
465   CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out;
466 
467   CeedCall(CeedOperatorIsComposite(op, &is_composite));
468   CeedCall(CeedOperatorGetCeed(op, &ceed));
469   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
470 
471   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out));
472   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
473   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
474   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
475   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
476   CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, layout_er_in));
477 
478   // Determine elem_dof relation for input
479   CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in));
480   CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array));
481   for (CeedSize i = 0; i < num_nodes_in; i++) array[i] = i;
482   CeedCall(CeedVectorRestoreArray(index_vec_in, &array));
483   CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in));
484   CeedCall(CeedVectorSetValue(elem_dof_in, 0.0));
485   CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in));
486   CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE));
487   CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in));
488   CeedCall(CeedVectorDestroy(&index_vec_in));
489   CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in));
490 
491   if (elem_rstr_in != elem_rstr_out) {
492     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
493     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
494               "Active input and output operator restrictions must have the same number of elements."
495               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
496               num_elem_in, num_elem_out);
497     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
498     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
499     CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, layout_er_out));
500 
501     // Determine elem_dof relation for output
502     CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out));
503     CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array));
504     for (CeedSize i = 0; i < num_nodes_out; i++) array[i] = i;
505     CeedCall(CeedVectorRestoreArray(index_vec_out, &array));
506     CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out));
507     CeedCall(CeedVectorSetValue(elem_dof_out, 0.0));
508     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out));
509     CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE));
510     CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out));
511     CeedCall(CeedVectorDestroy(&index_vec_out));
512     CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out));
513   } else {
514     num_elem_out     = num_elem_in;
515     elem_size_out    = elem_size_in;
516     num_comp_out     = num_comp_in;
517     layout_er_out[0] = layout_er_in[0];
518     layout_er_out[1] = layout_er_in[1];
519     layout_er_out[2] = layout_er_in[2];
520     elem_dof_a_out   = elem_dof_a_in;
521   }
522   local_num_entries = (CeedSize)elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
523 
524   // Determine i, j locations for element matrices
525   for (CeedInt e = 0; e < num_elem_in; e++) {
526     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
527       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
528         for (CeedInt i = 0; i < elem_size_out; i++) {
529           for (CeedInt j = 0; j < elem_size_in; j++) {
530             const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2];
531             const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2];
532             const CeedInt row                = elem_dof_a_out[elem_dof_index_row];
533             const CeedInt col                = elem_dof_a_in[elem_dof_index_col];
534 
535             rows[offset + count] = row;
536             cols[offset + count] = col;
537             count++;
538           }
539         }
540       }
541     }
542   }
543   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
544   CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in));
545   CeedCall(CeedVectorDestroy(&elem_dof_in));
546   if (elem_rstr_in != elem_rstr_out) {
547     CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out));
548     CeedCall(CeedVectorDestroy(&elem_dof_out));
549   }
550   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
551   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
552   CeedCall(CeedDestroy(&ceed));
553   return CEED_ERROR_SUCCESS;
554 }
555 
556 /**
557   @brief Core logic to assemble `CeedQFunction` and store result internally.
558 
559   Return copied references of stored data to the caller.
560   Caller is responsible for ownership and destruction of the copied references.
561   See also @ref CeedOperatorLinearAssembleQFunction().
562 
563   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
564         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
565 
566   @param[in]  op         `CeedOperator` to assemble `CeedQFunction`
567   @param[in]  use_parent Boolean flag to check for fallback parent implementation
568   @param[out] assembled  `CeedVector` to store assembled `CeedQFunction` at quadrature points
569   @param[out] rstr       `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
570   @param[in]  request    Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
571 
572   @return An error code: 0 - success, otherwise - failure
573 
574   @ref User
575 **/
576 static int CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(CeedOperator op, bool use_parent, CeedVector *assembled, CeedElemRestriction *rstr,
577                                                                  CeedRequest *request) {
578   int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL;
579   CeedOperator op_assemble                                                                           = NULL;
580   CeedOperator op_fallback_parent                                                                    = NULL;
581 
582   CeedCall(CeedOperatorCheckReady(op));
583 
584   // Determine if fallback parent or operator has implementation
585   CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent));
586   if (op_fallback_parent && use_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) {
587     // -- Backend version for op fallback parent is faster, if it exists
588     LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate;
589     op_assemble                   = op_fallback_parent;
590   } else if (op->LinearAssembleQFunctionUpdate) {
591     // -- Backend version for op
592     LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate;
593     op_assemble                   = op;
594   }
595 
596   // Assemble QFunction
597   if (LinearAssembleQFunctionUpdate) {
598     // Backend or fallback parent version
599     CeedQFunctionAssemblyData data;
600     bool                      data_is_setup;
601     CeedVector                assembled_vec  = NULL;
602     CeedElemRestriction       assembled_rstr = NULL;
603 
604     CeedCall(CeedOperatorGetQFunctionAssemblyData(op, &data));
605     CeedCall(CeedQFunctionAssemblyDataIsSetup(data, &data_is_setup));
606     if (data_is_setup) {
607       bool update_needed;
608 
609       CeedCall(CeedQFunctionAssemblyDataGetObjects(data, &assembled_vec, &assembled_rstr));
610       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(data, &update_needed));
611       if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request));
612     } else {
613       CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request));
614       CeedCall(CeedQFunctionAssemblyDataSetObjects(data, assembled_vec, assembled_rstr));
615     }
616     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(data, false));
617 
618     // Copy reference from internally held copy
619     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
620     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
621     CeedCall(CeedVectorDestroy(&assembled_vec));
622     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
623   } else {
624     // Operator fallback
625     CeedOperator op_fallback;
626 
627     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
628     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
629     else return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
630   }
631   return CEED_ERROR_SUCCESS;
632 }
633 
634 /**
635   @brief Assemble `CeedQFunction` and store result internally, but do not use fallback parent.
636 
637   Return copied references of stored data to the caller.
638   Caller is responsible for ownership and destruction of the copied references.
639   See also @ref CeedOperatorLinearAssembleQFunction().
640 
641   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
642         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
643 
644   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
645   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
646   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
647   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
648 
649   @return An error code: 0 - success, otherwise - failure
650 
651   @ref Developer
652 **/
653 int CeedOperatorFallbackLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr,
654                                                              CeedRequest *request) {
655   return CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(op, false, assembled, rstr, request);
656 }
657 
658 /**
659   @brief Assemble nonzero entries for non-composite `CeedOperator`.
660 
661   Users should generally use @ref CeedOperatorLinearAssemble().
662 
663   @param[in]  op     `CeedOperator` to assemble
664   @param[in]  offset Offset for number of entries
665   @param[out] values Values to assemble into matrix
666 
667   @return An error code: 0 - success, otherwise - failure
668 
669   @ref Developer
670 **/
671 int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
672   bool is_composite, is_at_points;
673 
674   CeedCall(CeedOperatorIsComposite(op, &is_composite));
675   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
676 
677   // Early exit for empty operator
678   {
679     CeedInt num_elem = 0;
680 
681     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
682     if (num_elem == 0) return CEED_ERROR_SUCCESS;
683   }
684 
685   if (op->LinearAssembleSingle) {
686     // Backend version
687     CeedCall(op->LinearAssembleSingle(op, offset, values));
688     return CEED_ERROR_SUCCESS;
689   } else {
690     // Operator fallback
691     CeedOperator op_fallback;
692 
693     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
694     if (op_fallback) {
695       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
696       return CEED_ERROR_SUCCESS;
697     }
698   }
699 
700   CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
701   CeedCheck(!is_at_points, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
702             "Backend does not implement CeedOperatorLinearAssemble for AtPoints operator");
703 
704   // Assemble QFunction
705   CeedInt             layout_qf[3];
706   const CeedScalar   *assembled_qf_array;
707   CeedVector          assembled_qf        = NULL;
708   CeedElemRestriction assembled_elem_rstr = NULL;
709 
710   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE));
711   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf));
712   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
713   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
714 
715   // Get assembly data
716   CeedInt                  num_elem_in, elem_size_in, num_comp_in, num_qpts_in;
717   CeedInt                  num_elem_out, elem_size_out, num_comp_out, num_qpts_out;
718   CeedSize                 local_num_entries, count = 0;
719   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
720   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
721   CeedBasis               *active_bases_in, *active_bases_out, basis_in, basis_out;
722   const CeedScalar       **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out;
723   CeedElemRestriction      elem_rstr_in, elem_rstr_out;
724   CeedRestrictionType      elem_rstr_type_in, elem_rstr_type_out;
725   const bool              *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL;
726   const CeedInt8          *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL;
727   CeedOperatorAssemblyData data;
728 
729   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
730   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out,
731                                                 &num_eval_modes_out, &eval_modes_out, NULL, NULL));
732 
733   CeedCheck(num_active_bases_in == 1 && num_active_bases_out == 1, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
734             "Cannot assemble operator with multiple active bases");
735   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
736             "Cannot assemble operator without inputs/outputs");
737 
738   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out));
739   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
740   basis_in  = active_bases_in[0];
741   basis_out = active_bases_out[0];
742   B_mat_in  = B_mats_in[0];
743   B_mat_out = B_mats_out[0];
744 
745   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
746   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
747   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
748   if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in;
749   else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in));
750 
751   CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in));
752   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
753     CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in));
754   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
755     CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in));
756   }
757 
758   if (elem_rstr_in != elem_rstr_out) {
759     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
760     CeedCheck(num_elem_in == num_elem_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
761               "Active input and output operator restrictions must have the same number of elements."
762               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
763               num_elem_in, num_elem_out);
764     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
765     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
766     if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out;
767     else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out));
768     CeedCheck(num_qpts_in == num_qpts_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
769               "Active input and output bases must have the same number of quadrature points."
770               " Input has %" CeedInt_FMT " points; output has %" CeedInt_FMT "points.",
771               num_qpts_in, num_qpts_out);
772 
773     CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out));
774     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
775       CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out));
776     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
777       CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out));
778     }
779   } else {
780     num_elem_out  = num_elem_in;
781     elem_size_out = elem_size_in;
782     num_comp_out  = num_comp_in;
783     num_qpts_out  = num_qpts_in;
784 
785     elem_rstr_orients_out      = elem_rstr_orients_in;
786     elem_rstr_curl_orients_out = elem_rstr_curl_orients_in;
787   }
788   local_num_entries = (CeedSize)elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
789 
790   // Loop over elements and put in data structure
791   // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
792   CeedTensorContract contract;
793   CeedScalar        *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL;
794 
795   CeedCall(CeedBasisGetTensorContract(basis_in, &contract));
796   CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat));
797   CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat));
798   if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b));
799 
800   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
801   for (CeedSize e = 0; e < num_elem_in; e++) {
802     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
803       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
804         // Compute B^T*D
805         for (CeedSize n = 0; n < elem_size_out; n++) {
806           for (CeedSize q = 0; q < num_qpts_in; q++) {
807             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
808               const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in;
809               CeedScalar     sum       = 0.0;
810 
811               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
812                 const CeedSize b_out_index     = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n;
813                 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out;
814                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
815 
816                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
817               }
818               BTD_mat[btd_index] = sum;
819             }
820           }
821         }
822 
823         // Form element matrix itself (for each block component)
824         if (contract) {
825           CeedCall(CeedTensorContractApply(contract, 1, num_qpts_in * num_eval_modes_in[0], elem_size_in, elem_size_out, BTD_mat, CEED_NOTRANSPOSE,
826                                            false, B_mat_in, elem_mat));
827         } else {
828           Ceed ceed;
829 
830           CeedCall(CeedOperatorGetCeed(op, &ceed));
831           CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size_out, elem_size_in, num_qpts_in * num_eval_modes_in[0]));
832           CeedCall(CeedDestroy(&ceed));
833         }
834 
835         // Transform the element matrix if required
836         if (elem_rstr_orients_out) {
837           const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out];
838 
839           for (CeedInt i = 0; i < elem_size_out; i++) {
840             const double orient = elem_orients[i] ? -1.0 : 1.0;
841 
842             for (CeedInt j = 0; j < elem_size_in; j++) {
843               elem_mat[i * elem_size_in + j] *= orient;
844             }
845           }
846         } else if (elem_rstr_curl_orients_out) {
847           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out];
848 
849           // T^T*(B^T*D*B)
850           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
851           for (CeedInt i = 0; i < elem_size_out; i++) {
852             for (CeedInt j = 0; j < elem_size_in; j++) {
853               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] +
854                                                (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) +
855                                                (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0);
856             }
857           }
858         }
859         if (elem_rstr_orients_in) {
860           const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in];
861 
862           for (CeedInt i = 0; i < elem_size_out; i++) {
863             for (CeedInt j = 0; j < elem_size_in; j++) {
864               elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0;
865             }
866           }
867         } else if (elem_rstr_curl_orients_in) {
868           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in];
869 
870           // (B^T*D*B)*T
871           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
872           for (CeedInt i = 0; i < elem_size_out; i++) {
873             for (CeedInt j = 0; j < elem_size_in; j++) {
874               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] +
875                                                (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) +
876                                                (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0);
877             }
878           }
879         }
880 
881         // Put element matrix in coordinate data structure
882         for (CeedInt i = 0; i < elem_size_out; i++) {
883           for (CeedInt j = 0; j < elem_size_in; j++) {
884             vals[offset + count] = elem_mat[i * elem_size_in + j];
885             count++;
886           }
887         }
888       }
889     }
890   }
891   CeedCheck(count == local_num_entries, CeedOperatorReturnCeed(op), CEED_ERROR_MAJOR, "Error computing entries");
892   CeedCall(CeedVectorRestoreArray(values, &vals));
893 
894   // Cleanup
895   CeedCall(CeedFree(&BTD_mat));
896   CeedCall(CeedFree(&elem_mat));
897   CeedCall(CeedFree(&elem_mat_b));
898   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
899     CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in));
900   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
901     CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in));
902   }
903   if (elem_rstr_in != elem_rstr_out) {
904     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
905       CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out));
906     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
907       CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out));
908     }
909   }
910   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
911   CeedCall(CeedVectorDestroy(&assembled_qf));
912   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
913   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
914   return CEED_ERROR_SUCCESS;
915 }
916 
917 /**
918   @brief Count number of entries for assembled `CeedOperator`
919 
920   @param[in]  op          `CeedOperator` to assemble
921   @param[out] num_entries Number of entries in assembled representation
922 
923   @return An error code: 0 - success, otherwise - failure
924 
925   @ref Utility
926 **/
927 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) {
928   bool                is_composite;
929   CeedInt             num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out;
930   CeedElemRestriction rstr_in, rstr_out;
931 
932   CeedCall(CeedOperatorIsComposite(op, &is_composite));
933   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
934 
935   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out));
936   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in));
937   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in));
938   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
939   if (rstr_in != rstr_out) {
940     CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out));
941     CeedCheck(num_elem_in == num_elem_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
942               "Active input and output operator restrictions must have the same number of elements."
943               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
944               num_elem_in, num_elem_out);
945     CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out));
946     CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
947   } else {
948     num_elem_out  = num_elem_in;
949     elem_size_out = elem_size_in;
950     num_comp_out  = num_comp_in;
951   }
952   CeedCall(CeedElemRestrictionDestroy(&rstr_in));
953   CeedCall(CeedElemRestrictionDestroy(&rstr_out));
954   *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in;
955   return CEED_ERROR_SUCCESS;
956 }
957 
958 /**
959   @brief Common code for creating a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator`
960 
961   @param[in]  op_fine      Fine grid `CeedOperator`
962   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
963   @param[in]  rstr_coarse  Coarse grid `CeedElemRestriction`
964   @param[in]  basis_coarse Coarse grid active vector `CeedBasis`
965   @param[in]  basis_c_to_f `CeedBasis` for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction operators
966   @param[out] op_coarse    Coarse grid `CeedOperator`
967   @param[out] op_prolong   Coarse to fine `CeedOperator`, or `NULL`
968   @param[out] op_restrict  Fine to coarse `CeedOperator`, or `NULL`
969 
970   @return An error code: 0 - success, otherwise - failure
971 
972   @ref Developer
973 **/
974 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
975                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
976   bool                is_composite;
977   Ceed                ceed;
978   CeedInt             num_comp, num_input_fields, num_output_fields;
979   CeedVector          mult_vec         = NULL;
980   CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL;
981   CeedOperatorField  *input_fields, *output_fields;
982 
983   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
984 
985   // Check for composite operator
986   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
987   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
988 
989   // Coarse Grid
990   {
991     bool is_at_points;
992 
993     CeedCall(CeedOperatorIsAtPoints(op_fine, &is_at_points));
994     if (is_at_points) {
995       CeedVector          point_coords;
996       CeedElemRestriction rstr_points;
997 
998       CeedCall(CeedOperatorCreateAtPoints(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
999       CeedCall(CeedOperatorAtPointsGetPoints(op_fine, &rstr_points, &point_coords));
1000       CeedCall(CeedOperatorAtPointsSetPoints(*op_coarse, rstr_points, point_coords));
1001       CeedCall(CeedVectorDestroy(&point_coords));
1002       CeedCall(CeedElemRestrictionDestroy(&rstr_points));
1003     } else {
1004       CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
1005     }
1006   }
1007   CeedCall(CeedOperatorGetFields(op_fine, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
1008   // -- Clone input fields
1009   for (CeedInt i = 0; i < num_input_fields; i++) {
1010     const char         *field_name;
1011     CeedVector          vec;
1012     CeedElemRestriction rstr  = NULL;
1013     CeedBasis           basis = NULL;
1014 
1015     CeedCall(CeedOperatorFieldGetName(input_fields[i], &field_name));
1016     CeedCall(CeedOperatorFieldGetVector(input_fields[i], &vec));
1017     if (vec == CEED_VECTOR_ACTIVE) {
1018       CeedCall(CeedElemRestrictionReferenceCopy(rstr_coarse, &rstr));
1019       CeedCall(CeedBasisReferenceCopy(basis_coarse, &basis));
1020       if (!rstr_fine) CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_fine));
1021     } else {
1022       CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr));
1023       CeedCall(CeedOperatorFieldGetBasis(input_fields[i], &basis));
1024     }
1025     CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec));
1026     CeedCall(CeedVectorDestroy(&vec));
1027     CeedCall(CeedElemRestrictionDestroy(&rstr));
1028     CeedCall(CeedBasisDestroy(&basis));
1029   }
1030   // -- Clone output fields
1031   for (CeedInt i = 0; i < num_output_fields; i++) {
1032     const char         *field_name;
1033     CeedVector          vec;
1034     CeedElemRestriction rstr  = NULL;
1035     CeedBasis           basis = NULL;
1036 
1037     CeedCall(CeedOperatorFieldGetName(output_fields[i], &field_name));
1038     CeedCall(CeedOperatorFieldGetVector(output_fields[i], &vec));
1039     if (vec == CEED_VECTOR_ACTIVE) {
1040       CeedCall(CeedElemRestrictionReferenceCopy(rstr_coarse, &rstr));
1041       CeedCall(CeedBasisReferenceCopy(basis_coarse, &basis));
1042       if (!rstr_fine) CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_fine));
1043     } else {
1044       CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr));
1045       CeedCall(CeedOperatorFieldGetBasis(output_fields[i], &basis));
1046     }
1047     CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec));
1048     CeedCall(CeedVectorDestroy(&vec));
1049     CeedCall(CeedElemRestrictionDestroy(&rstr));
1050     CeedCall(CeedBasisDestroy(&basis));
1051   }
1052   // -- Clone QFunctionAssemblyData
1053   {
1054     CeedQFunctionAssemblyData fine_data;
1055 
1056     CeedCall(CeedOperatorGetQFunctionAssemblyData(op_fine, &fine_data));
1057     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(fine_data, &(*op_coarse)->qf_assembled));
1058   }
1059 
1060   // Multiplicity vector
1061   if (op_restrict || op_prolong) {
1062     CeedVector          mult_e_vec;
1063     CeedRestrictionType rstr_type;
1064 
1065     CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type));
1066     CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED,
1067               "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported");
1068     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
1069     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
1070     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
1071     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
1072     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
1073     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
1074     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
1075     CeedCall(CeedVectorDestroy(&mult_e_vec));
1076     CeedCall(CeedVectorReciprocal(mult_vec));
1077   }
1078 
1079   // Clone name
1080   bool   has_name = op_fine->name;
1081   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
1082   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
1083 
1084   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
1085   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
1086             "Prolongation or restriction operator creation requires coarse-to-fine basis");
1087 
1088   // Restriction/Prolongation Operators
1089   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
1090 
1091   // Restriction
1092   if (op_restrict) {
1093     CeedInt             *num_comp_r_data;
1094     CeedQFunctionContext ctx_r;
1095     CeedQFunction        qf_restrict;
1096 
1097     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
1098     CeedCall(CeedCalloc(1, &num_comp_r_data));
1099     num_comp_r_data[0] = num_comp;
1100     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
1101     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
1102     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
1103     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
1104     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
1105     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
1106     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
1107     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
1108 
1109     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
1110     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
1111     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
1112     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
1113 
1114     // Set name
1115     char *restriction_name;
1116 
1117     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
1118     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
1119     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
1120     CeedCall(CeedFree(&restriction_name));
1121 
1122     // Check
1123     CeedCall(CeedOperatorCheckReady(*op_restrict));
1124 
1125     // Cleanup
1126     CeedCall(CeedQFunctionDestroy(&qf_restrict));
1127   }
1128 
1129   // Prolongation
1130   if (op_prolong) {
1131     CeedInt             *num_comp_p_data;
1132     CeedQFunctionContext ctx_p;
1133     CeedQFunction        qf_prolong;
1134 
1135     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
1136     CeedCall(CeedCalloc(1, &num_comp_p_data));
1137     num_comp_p_data[0] = num_comp;
1138     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
1139     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
1140     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
1141     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
1142     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
1143     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
1144     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
1145     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
1146 
1147     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
1148     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
1149     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
1150     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
1151 
1152     // Set name
1153     char *prolongation_name;
1154 
1155     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
1156     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
1157     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
1158     CeedCall(CeedFree(&prolongation_name));
1159 
1160     // Check
1161     CeedCall(CeedOperatorCheckReady(*op_prolong));
1162 
1163     // Cleanup
1164     CeedCall(CeedQFunctionDestroy(&qf_prolong));
1165   }
1166 
1167   // Check
1168   CeedCall(CeedOperatorCheckReady(*op_coarse));
1169 
1170   // Cleanup
1171   CeedCall(CeedDestroy(&ceed));
1172   CeedCall(CeedVectorDestroy(&mult_vec));
1173   CeedCall(CeedElemRestrictionDestroy(&rstr_fine));
1174   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
1175   CeedCall(CeedBasisDestroy(&basis_c_to_f));
1176   return CEED_ERROR_SUCCESS;
1177 }
1178 
1179 /**
1180   @brief Build 1D mass matrix and Laplacian with perturbation
1181 
1182   @param[in]  interp_1d   Interpolation matrix in one dimension
1183   @param[in]  grad_1d     Gradient matrix in one dimension
1184   @param[in]  q_weight_1d Quadrature weights in one dimension
1185   @param[in]  P_1d        Number of basis nodes in one dimension
1186   @param[in]  Q_1d        Number of quadrature points in one dimension
1187   @param[in]  dim         Dimension of basis
1188   @param[out] mass        Assembled mass matrix in one dimension
1189   @param[out] laplace     Assembled perturbed Laplacian in one dimension
1190 
1191   @return An error code: 0 - success, otherwise - failure
1192 
1193   @ref Developer
1194 **/
1195 CeedPragmaOptimizeOff
1196 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
1197                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
1198   for (CeedInt i = 0; i < P_1d; i++) {
1199     for (CeedInt j = 0; j < P_1d; j++) {
1200       CeedScalar sum = 0.0;
1201       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
1202       mass[i + j * P_1d] = sum;
1203     }
1204   }
1205   // -- Laplacian
1206   for (CeedInt i = 0; i < P_1d; i++) {
1207     for (CeedInt j = 0; j < P_1d; j++) {
1208       CeedScalar sum = 0.0;
1209 
1210       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
1211       laplace[i + j * P_1d] = sum;
1212     }
1213   }
1214   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
1215   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
1216   return CEED_ERROR_SUCCESS;
1217 }
1218 CeedPragmaOptimizeOn
1219 
1220 /// @}
1221 
1222 /// ----------------------------------------------------------------------------
1223 /// CeedOperator Backend API
1224 /// ----------------------------------------------------------------------------
1225 /// @addtogroup CeedOperatorBackend
1226 /// @{
1227 
1228 /**
1229   @brief Select correct basis matrix pointer based on @ref CeedEvalMode
1230 
1231   @param[in]  basis     `CeedBasis` from which to get the basis matrix
1232   @param[in]  eval_mode Current basis evaluation mode
1233   @param[in]  identity  Pointer to identity matrix
1234   @param[out] basis_ptr `CeedBasis` pointer to set
1235 
1236   @ref Backend
1237 **/
1238 int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
1239   switch (eval_mode) {
1240     case CEED_EVAL_NONE:
1241       *basis_ptr = identity;
1242       break;
1243     case CEED_EVAL_INTERP:
1244       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
1245       break;
1246     case CEED_EVAL_GRAD:
1247       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
1248       break;
1249     case CEED_EVAL_DIV:
1250       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
1251       break;
1252     case CEED_EVAL_CURL:
1253       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
1254       break;
1255     case CEED_EVAL_WEIGHT:
1256       break;  // Caught by QF Assembly
1257   }
1258   assert(*basis_ptr != NULL);
1259   return CEED_ERROR_SUCCESS;
1260 }
1261 
1262 /**
1263   @brief Create point block restriction for active `CeedOperatorField`
1264 
1265   @param[in]  rstr             Original `CeedElemRestriction` for active field
1266   @param[out] point_block_rstr Address of the variable where the newly created `CeedElemRestriction` will be stored
1267 
1268   @return An error code: 0 - success, otherwise - failure
1269 
1270   @ref Backend
1271 **/
1272 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) {
1273   Ceed           ceed;
1274   CeedInt        num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets;
1275   CeedSize       l_size;
1276   const CeedInt *offsets;
1277 
1278   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
1279   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
1280 
1281   // Expand offsets
1282   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
1283   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
1284   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
1285   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
1286   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
1287   shift = num_comp;
1288   if (comp_stride != 1) shift *= num_comp;
1289   CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets));
1290   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
1291     point_block_offsets[i] = offsets[i] * shift;
1292   }
1293 
1294   // Create new restriction
1295   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
1296                                      point_block_offsets, point_block_rstr));
1297 
1298   // Cleanup
1299   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
1300   CeedCall(CeedDestroy(&ceed));
1301   return CEED_ERROR_SUCCESS;
1302 }
1303 
1304 /**
1305   @brief Get `CeedQFunctionAssemblyData`
1306 
1307   @param[in]  op   `CeedOperator` to assemble
1308   @param[out] data `CeedQFunctionAssemblyData`
1309 
1310   @return An error code: 0 - success, otherwise - failure
1311 
1312   @ref Backend
1313 **/
1314 int CeedOperatorGetQFunctionAssemblyData(CeedOperator op, CeedQFunctionAssemblyData *data) {
1315   if (!op->qf_assembled) {
1316     CeedQFunctionAssemblyData data;
1317 
1318     CeedCall(CeedQFunctionAssemblyDataCreate(op->ceed, &data));
1319     op->qf_assembled = data;
1320   }
1321   *data = op->qf_assembled;
1322   return CEED_ERROR_SUCCESS;
1323 }
1324 
1325 /**
1326   @brief Create object holding `CeedQFunction` assembly data for `CeedOperator`
1327 
1328   @param[in]  ceed `Ceed` object used to create the `CeedQFunctionAssemblyData`
1329   @param[out] data Address of the variable where the newly created `CeedQFunctionAssemblyData` will be stored
1330 
1331   @return An error code: 0 - success, otherwise - failure
1332 
1333   @ref Backend
1334 **/
1335 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
1336   CeedCall(CeedCalloc(1, data));
1337   (*data)->ref_count = 1;
1338   (*data)->ceed      = ceed;
1339   CeedCall(CeedReference(ceed));
1340   return CEED_ERROR_SUCCESS;
1341 }
1342 
1343 /**
1344   @brief Increment the reference counter for a `CeedQFunctionAssemblyData`
1345 
1346   @param[in,out] data `CeedQFunctionAssemblyData` to increment the reference counter
1347 
1348   @return An error code: 0 - success, otherwise - failure
1349 
1350   @ref Backend
1351 **/
1352 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1353   data->ref_count++;
1354   return CEED_ERROR_SUCCESS;
1355 }
1356 
1357 /**
1358   @brief Set re-use of `CeedQFunctionAssemblyData`
1359 
1360   @param[in,out] data       `CeedQFunctionAssemblyData` to mark for reuse
1361   @param[in]     reuse_data Boolean flag indicating data re-use
1362 
1363   @return An error code: 0 - success, otherwise - failure
1364 
1365   @ref Backend
1366 **/
1367 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
1368   data->reuse_data        = reuse_data;
1369   data->needs_data_update = true;
1370   return CEED_ERROR_SUCCESS;
1371 }
1372 
1373 /**
1374   @brief Mark `CeedQFunctionAssemblyData` as stale
1375 
1376   @param[in,out] data              `CeedQFunctionAssemblyData` to mark as stale
1377   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
1378 
1379   @return An error code: 0 - success, otherwise - failure
1380 
1381   @ref Backend
1382 **/
1383 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
1384   data->needs_data_update = needs_data_update;
1385   return CEED_ERROR_SUCCESS;
1386 }
1387 
1388 /**
1389   @brief Determine if `CeedQFunctionAssemblyData` needs update
1390 
1391   @param[in]  data             `CeedQFunctionAssemblyData` to mark as stale
1392   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1393 
1394   @return An error code: 0 - success, otherwise - failure
1395 
1396   @ref Backend
1397 **/
1398 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1399   *is_update_needed = !data->reuse_data || data->needs_data_update;
1400   return CEED_ERROR_SUCCESS;
1401 }
1402 
1403 /**
1404   @brief Copy the pointer to a `CeedQFunctionAssemblyData`.
1405 
1406   Both pointers should be destroyed with @ref CeedQFunctionAssemblyDataDestroy().
1407 
1408   Note: If the value of ` *data_copy` passed to this function is non-`NULL` , then it is assumed that ` *data_copy` is a pointer to a `CeedQFunctionAssemblyData`.
1409         This `CeedQFunctionAssemblyData` will be destroyed if ` *data_copy` is the only reference to this `CeedQFunctionAssemblyData`.
1410 
1411   @param[in]     data      `CeedQFunctionAssemblyData` to copy reference to
1412   @param[in,out] data_copy Variable to store copied reference
1413 
1414   @return An error code: 0 - success, otherwise - failure
1415 
1416   @ref Backend
1417 **/
1418 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1419   CeedCall(CeedQFunctionAssemblyDataReference(data));
1420   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1421   *data_copy = data;
1422   return CEED_ERROR_SUCCESS;
1423 }
1424 
1425 /**
1426   @brief Get setup status for internal objects for `CeedQFunctionAssemblyData`
1427 
1428   @param[in]  data     `CeedQFunctionAssemblyData` to retrieve status
1429   @param[out] is_setup Boolean flag for setup status
1430 
1431   @return An error code: 0 - success, otherwise - failure
1432 
1433   @ref Backend
1434 **/
1435 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1436   *is_setup = data->is_setup;
1437   return CEED_ERROR_SUCCESS;
1438 }
1439 
1440 /**
1441   @brief Set internal objects for `CeedQFunctionAssemblyData`
1442 
1443   @param[in,out] data `CeedQFunctionAssemblyData` to set objects
1444   @param[in]     vec  `CeedVector` to store assembled `CeedQFunction` at quadrature points
1445   @param[in]     rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
1446 
1447   @return An error code: 0 - success, otherwise - failure
1448 
1449   @ref Backend
1450 **/
1451 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1452   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1453   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1454 
1455   data->is_setup = true;
1456   return CEED_ERROR_SUCCESS;
1457 }
1458 
1459 /**
1460   @brief Get internal objects for `CeedQFunctionAssemblyData`
1461 
1462   @param[in,out] data `CeedQFunctionAssemblyData` to set objects
1463   @param[out]    vec  `CeedVector` to store assembled `CeedQFunction` at quadrature points
1464   @param[out]    rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
1465 
1466   @return An error code: 0 - success, otherwise - failure
1467 
1468   @ref Backend
1469 **/
1470 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1471   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1472 
1473   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1474   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1475   return CEED_ERROR_SUCCESS;
1476 }
1477 
1478 /**
1479   @brief Destroy `CeedQFunctionAssemblyData`
1480 
1481   @param[in,out] data  `CeedQFunctionAssemblyData` to destroy
1482 
1483   @return An error code: 0 - success, otherwise - failure
1484 
1485   @ref Backend
1486 **/
1487 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1488   if (!*data || --(*data)->ref_count > 0) {
1489     *data = NULL;
1490     return CEED_ERROR_SUCCESS;
1491   }
1492   CeedCall(CeedDestroy(&(*data)->ceed));
1493   CeedCall(CeedVectorDestroy(&(*data)->vec));
1494   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1495 
1496   CeedCall(CeedFree(data));
1497   return CEED_ERROR_SUCCESS;
1498 }
1499 
1500 /**
1501   @brief Get `CeedOperatorAssemblyData`
1502 
1503   @param[in]  op   `CeedOperator` to assemble
1504   @param[out] data `CeedOperatorAssemblyData`
1505 
1506   @return An error code: 0 - success, otherwise - failure
1507 
1508   @ref Backend
1509 **/
1510 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1511   if (!op->op_assembled) {
1512     CeedOperatorAssemblyData data;
1513 
1514     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1515     op->op_assembled = data;
1516   }
1517   *data = op->op_assembled;
1518   return CEED_ERROR_SUCCESS;
1519 }
1520 
1521 /**
1522   @brief Create object holding `CeedOperator` assembly data.
1523 
1524   The `CeedOperatorAssemblyData` holds an array with references to every active `CeedBasis` used in the `CeedOperator`.
1525   An array with references to the corresponding active `CeedElemRestriction` is also stored.
1526   For each active `CeedBasis, the `CeedOperatorAssemblyData` holds an array of all input and output @ref CeedEvalMode for this `CeedBasis`.
1527   The `CeedOperatorAssemblyData` holds an array of offsets for indexing into the assembled `CeedQFunction` arrays to the row representing each @ref CeedEvalMode.
1528   The number of input columns across all active bases for the assembled `CeedQFunction` is also stored.
1529   Lastly, the `CeedOperatorAssembly` data holds assembled matrices representing the full action of the `CeedBasis` for all @ref CeedEvalMode.
1530 
1531   @param[in]  ceed `Ceed` object used to create the `CeedOperatorAssemblyData`
1532   @param[in]  op   `CeedOperator` to be assembled
1533   @param[out] data Address of the variable where the newly created `CeedOperatorAssemblyData` will be stored
1534 
1535   @return An error code: 0 - success, otherwise - failure
1536 
1537   @ref Backend
1538 **/
1539 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1540   CeedInt             num_active_bases_in = 0, num_active_bases_out = 0, offset = 0;
1541   CeedInt             num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL;
1542   CeedSize          **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1543   CeedEvalMode      **eval_modes_in = NULL, **eval_modes_out = NULL;
1544   CeedQFunctionField *qf_fields;
1545   CeedQFunction       qf;
1546   CeedOperatorField  *op_fields;
1547   bool                is_composite;
1548 
1549   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1550   CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators.");
1551 
1552   // Allocate
1553   CeedCall(CeedCalloc(1, data));
1554   (*data)->ceed = ceed;
1555   CeedCall(CeedReference(ceed));
1556 
1557   // Build OperatorAssembly data
1558   CeedCall(CeedOperatorGetQFunction(op, &qf));
1559 
1560   // Determine active input basis
1561   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1562   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1563   for (CeedInt i = 0; i < num_input_fields; i++) {
1564     CeedVector vec;
1565 
1566     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1567     if (vec == CEED_VECTOR_ACTIVE) {
1568       CeedInt      index = -1, num_comp, q_comp;
1569       CeedEvalMode eval_mode;
1570       CeedBasis    basis_in = NULL;
1571 
1572       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1573       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1574       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1575       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1576       for (CeedInt i = 0; i < num_active_bases_in; i++) {
1577         if ((*data)->active_bases_in[i] == basis_in) index = i;
1578       }
1579       if (index == -1) {
1580         CeedElemRestriction elem_rstr_in;
1581 
1582         index = num_active_bases_in;
1583         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in));
1584         (*data)->active_bases_in[num_active_bases_in] = NULL;
1585         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in]));
1586         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in));
1587         (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL;
1588         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1589         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in]));
1590         CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
1591         CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in));
1592         num_eval_modes_in[index] = 0;
1593         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in));
1594         eval_modes_in[index] = NULL;
1595         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in));
1596         eval_mode_offsets_in[index] = NULL;
1597         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in));
1598         (*data)->assembled_bases_in[index] = NULL;
1599         num_active_bases_in++;
1600       }
1601       if (eval_mode != CEED_EVAL_WEIGHT) {
1602         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1603         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1604         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1605         for (CeedInt d = 0; d < q_comp; d++) {
1606           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1607           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1608           offset += num_comp;
1609         }
1610         num_eval_modes_in[index] += q_comp;
1611       }
1612       CeedCall(CeedBasisDestroy(&basis_in));
1613     }
1614     CeedCall(CeedVectorDestroy(&vec));
1615   }
1616 
1617   // Determine active output basis
1618   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1619   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1620   offset = 0;
1621   for (CeedInt i = 0; i < num_output_fields; i++) {
1622     CeedVector vec;
1623 
1624     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1625     if (vec == CEED_VECTOR_ACTIVE) {
1626       CeedInt      index = -1, num_comp, q_comp;
1627       CeedEvalMode eval_mode;
1628       CeedBasis    basis_out = NULL;
1629 
1630       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1631       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1632       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1633       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1634       for (CeedInt i = 0; i < num_active_bases_out; i++) {
1635         if ((*data)->active_bases_out[i] == basis_out) index = i;
1636       }
1637       if (index == -1) {
1638         CeedElemRestriction elem_rstr_out;
1639 
1640         index = num_active_bases_out;
1641         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out));
1642         (*data)->active_bases_out[num_active_bases_out] = NULL;
1643         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out]));
1644         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out));
1645         (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL;
1646         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1647         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out]));
1648         CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
1649         CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out));
1650         num_eval_modes_out[index] = 0;
1651         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out));
1652         eval_modes_out[index] = NULL;
1653         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out));
1654         eval_mode_offsets_out[index] = NULL;
1655         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out));
1656         (*data)->assembled_bases_out[index] = NULL;
1657         num_active_bases_out++;
1658       }
1659       if (eval_mode != CEED_EVAL_WEIGHT) {
1660         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1661         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1662         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1663         for (CeedInt d = 0; d < q_comp; d++) {
1664           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1665           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1666           offset += num_comp;
1667         }
1668         num_eval_modes_out[index] += q_comp;
1669       }
1670       CeedCall(CeedBasisDestroy(&basis_out));
1671     }
1672     CeedCall(CeedVectorDestroy(&vec));
1673   }
1674   CeedCall(CeedQFunctionDestroy(&qf));
1675   (*data)->num_active_bases_in   = num_active_bases_in;
1676   (*data)->num_eval_modes_in     = num_eval_modes_in;
1677   (*data)->eval_modes_in         = eval_modes_in;
1678   (*data)->eval_mode_offsets_in  = eval_mode_offsets_in;
1679   (*data)->num_active_bases_out  = num_active_bases_out;
1680   (*data)->num_eval_modes_out    = num_eval_modes_out;
1681   (*data)->eval_modes_out        = eval_modes_out;
1682   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1683   (*data)->num_output_components = offset;
1684   return CEED_ERROR_SUCCESS;
1685 }
1686 
1687 /**
1688   @brief Get `CeedOperator` @ref CeedEvalMode for assembly.
1689 
1690   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1691 
1692   @param[in]  data                  `CeedOperatorAssemblyData`
1693   @param[out] num_active_bases_in   Total number of active bases for input
1694   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input @ref CeedEvalMode, or `NULL`.
1695                                       `eval_modes_in[0]` holds an array of eval modes for the first active `CeedBasis`.
1696   @param[out] eval_modes_in         Pointer to hold arrays of input @ref CeedEvalMode, or `NULL`
1697   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point
1698   @param[out] num_active_bases_out  Total number of active bases for output
1699   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output @ref CeedEvalMode, or `NULL`
1700   @param[out] eval_modes_out        Pointer to hold arrays of output @ref CeedEvalMode, or `NULL`
1701   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1702   @param[out] num_output_components The number of columns in the assembled `CeedQFunction` matrix for each quadrature point, including contributions of all active bases
1703 
1704   @return An error code: 0 - success, otherwise - failure
1705 
1706   @ref Backend
1707 **/
1708 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in,
1709                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out,
1710                                          CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out,
1711                                          CeedSize *num_output_components) {
1712   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1713   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1714   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1715   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1716   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1717   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1718   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1719   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1720   if (num_output_components) *num_output_components = data->num_output_components;
1721   return CEED_ERROR_SUCCESS;
1722 }
1723 
1724 /**
1725   @brief Get `CeedOperator` `CeedBasis` data for assembly.
1726 
1727   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1728 
1729   @param[in]  data                 `CeedOperatorAssemblyData`
1730   @param[out] num_active_bases_in  Number of active input bases, or `NULL`
1731   @param[out] active_bases_in      Pointer to hold active input `CeedBasis`, or `NULL`
1732   @param[out] assembled_bases_in   Pointer to hold assembled active input `B` , or `NULL`
1733   @param[out] num_active_bases_out Number of active output bases, or `NULL`
1734   @param[out] active_bases_out     Pointer to hold active output `CeedBasis`, or `NULL`
1735   @param[out] assembled_bases_out  Pointer to hold assembled active output `B` , or `NULL`
1736 
1737   @return An error code: 0 - success, otherwise - failure
1738 
1739   @ref Backend
1740 **/
1741 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in,
1742                                      const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out,
1743                                      const CeedScalar ***assembled_bases_out) {
1744   // Assemble B_in, B_out if needed
1745   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1746     CeedInt num_qpts;
1747 
1748     if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts));
1749     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts));
1750     for (CeedInt b = 0; b < data->num_active_bases_in; b++) {
1751       bool        has_eval_none = false;
1752       CeedInt     num_nodes;
1753       CeedScalar *B_in = NULL, *identity = NULL;
1754 
1755       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes));
1756       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1757 
1758       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1759         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1760       }
1761       if (has_eval_none) {
1762         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1763         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1764           identity[i * num_nodes + i] = 1.0;
1765         }
1766       }
1767 
1768       for (CeedInt q = 0; q < num_qpts; q++) {
1769         for (CeedInt n = 0; n < num_nodes; n++) {
1770           CeedInt      d_in              = 0, q_comp_in;
1771           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1772 
1773           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1774             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1775             const CeedScalar *B  = NULL;
1776 
1777             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B));
1778             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in));
1779             if (q_comp_in > 1) {
1780               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1781               else B = &B[(++d_in) * num_qpts * num_nodes];
1782             }
1783             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1784             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1785           }
1786         }
1787       }
1788       if (identity) CeedCall(CeedFree(&identity));
1789       data->assembled_bases_in[b] = B_in;
1790     }
1791   }
1792 
1793   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1794     CeedInt num_qpts;
1795 
1796     if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts));
1797     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts));
1798     for (CeedInt b = 0; b < data->num_active_bases_out; b++) {
1799       bool        has_eval_none = false;
1800       CeedInt     num_nodes;
1801       CeedScalar *B_out = NULL, *identity = NULL;
1802 
1803       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes));
1804       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1805 
1806       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1807         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1808       }
1809       if (has_eval_none) {
1810         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1811         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1812           identity[i * num_nodes + i] = 1.0;
1813         }
1814       }
1815 
1816       for (CeedInt q = 0; q < num_qpts; q++) {
1817         for (CeedInt n = 0; n < num_nodes; n++) {
1818           CeedInt      d_out              = 0, q_comp_out;
1819           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1820 
1821           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1822             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1823             const CeedScalar *B  = NULL;
1824 
1825             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B));
1826             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out));
1827             if (q_comp_out > 1) {
1828               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1829               else B = &B[(++d_out) * num_qpts * num_nodes];
1830             }
1831             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1832             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1833           }
1834         }
1835       }
1836       if (identity) CeedCall(CeedFree(&identity));
1837       data->assembled_bases_out[b] = B_out;
1838     }
1839   }
1840 
1841   // Pass out assembled data
1842   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1843   if (active_bases_in) *active_bases_in = data->active_bases_in;
1844   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1845   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1846   if (active_bases_out) *active_bases_out = data->active_bases_out;
1847   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1848   return CEED_ERROR_SUCCESS;
1849 }
1850 
1851 /**
1852   @brief Get `CeedOperator` `CeedBasis` data for assembly.
1853 
1854   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1855 
1856   @param[in]  data                      `CeedOperatorAssemblyData`
1857   @param[out] num_active_elem_rstrs_in  Number of active input element restrictions, or `NULL`
1858   @param[out] active_elem_rstrs_in      Pointer to hold active input `CeedElemRestriction`, or `NULL`
1859   @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or `NULL`
1860   @param[out] active_elem_rstrs_out     Pointer to hold active output `CeedElemRestriction`, or `NULL`
1861 
1862   @return An error code: 0 - success, otherwise - failure
1863 
1864   @ref Backend
1865 **/
1866 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in,
1867                                                 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out,
1868                                                 CeedElemRestriction **active_elem_rstrs_out) {
1869   if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in;
1870   if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in;
1871   if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out;
1872   if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out;
1873   return CEED_ERROR_SUCCESS;
1874 }
1875 
1876 /**
1877   @brief Destroy `CeedOperatorAssemblyData`
1878 
1879   @param[in,out] data `CeedOperatorAssemblyData` to destroy
1880 
1881   @return An error code: 0 - success, otherwise - failure
1882 
1883   @ref Backend
1884 **/
1885 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1886   if (!*data) {
1887     *data = NULL;
1888     return CEED_ERROR_SUCCESS;
1889   }
1890   CeedCall(CeedDestroy(&(*data)->ceed));
1891   for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) {
1892     CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b]));
1893     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b]));
1894     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1895     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1896     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1897   }
1898   for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) {
1899     CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b]));
1900     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b]));
1901     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1902     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1903     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1904   }
1905   CeedCall(CeedFree(&(*data)->active_bases_in));
1906   CeedCall(CeedFree(&(*data)->active_bases_out));
1907   CeedCall(CeedFree(&(*data)->active_elem_rstrs_in));
1908   CeedCall(CeedFree(&(*data)->active_elem_rstrs_out));
1909   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1910   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1911   CeedCall(CeedFree(&(*data)->eval_modes_in));
1912   CeedCall(CeedFree(&(*data)->eval_modes_out));
1913   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1914   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1915   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1916   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1917 
1918   CeedCall(CeedFree(data));
1919   return CEED_ERROR_SUCCESS;
1920 }
1921 
1922 /**
1923   @brief Retrieve fallback `CeedOperator` with a reference `Ceed` for advanced `CeedOperator` functionality
1924 
1925   @param[in]  op          `CeedOperator` to retrieve fallback for
1926   @param[out] op_fallback Fallback `CeedOperator`
1927 
1928   @return An error code: 0 - success, otherwise - failure
1929 
1930   @ref Backend
1931 **/
1932 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
1933   // Create if needed
1934   if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op));
1935   if (op->op_fallback) {
1936     bool is_debug;
1937     Ceed ceed;
1938 
1939     CeedCall(CeedOperatorGetCeed(op, &ceed));
1940     CeedCall(CeedIsDebug(ceed, &is_debug));
1941     if (is_debug) {
1942       Ceed        ceed_fallback;
1943       const char *resource, *resource_fallback, *op_name;
1944 
1945       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
1946       CeedCall(CeedGetResource(ceed, &resource));
1947       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
1948       CeedCall(CeedOperatorGetName(op, &op_name));
1949 
1950       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
1951       CeedDebug(ceed, "CeedOperator \"%s\": falling back from operator at address %p with backend %s to operator at address %p with backend %s\n",
1952                 op_name, op, resource, op->op_fallback, resource_fallback);
1953       CeedCall(CeedDestroy(&ceed_fallback));
1954     }
1955     CeedCall(CeedDestroy(&ceed));
1956   }
1957   *op_fallback = op->op_fallback;
1958   return CEED_ERROR_SUCCESS;
1959 }
1960 
1961 /**
1962   @brief Get the parent `CeedOperator` for a fallback `CeedOperator`
1963 
1964   @param[in]  op     `CeedOperator` context
1965   @param[out] parent Variable to store parent `CeedOperator` context
1966 
1967   @return An error code: 0 - success, otherwise - failure
1968 
1969   @ref Backend
1970 **/
1971 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) {
1972   *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL;
1973   return CEED_ERROR_SUCCESS;
1974 }
1975 
1976 /**
1977   @brief Get the `Ceed` context of the parent `CeedOperator` for a fallback `CeedOperator`
1978 
1979   @param[in]  op     `CeedOperator` context
1980   @param[out] parent Variable to store parent `Ceed` context
1981 
1982   @return An error code: 0 - success, otherwise - failure
1983 
1984   @ref Backend
1985 **/
1986 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) {
1987   *parent = NULL;
1988   if (op->op_fallback_parent) CeedCall(CeedReferenceCopy(op->op_fallback_parent->ceed, parent));
1989   else CeedCall(CeedReferenceCopy(CeedOperatorReturnCeed(op), parent));
1990   return CEED_ERROR_SUCCESS;
1991 }
1992 
1993 /// @}
1994 
1995 /// ----------------------------------------------------------------------------
1996 /// CeedOperator Public API
1997 /// ----------------------------------------------------------------------------
1998 /// @addtogroup CeedOperatorUser
1999 /// @{
2000 
2001 /**
2002   @brief Assemble a linear `CeedQFunction` associated with a `CeedOperator`.
2003 
2004   This returns a `CeedVector` containing a matrix at each quadrature point providing the action of the `CeedQFunction` associated with the `CeedOperator`.
2005   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices representing the action of the `CeedQFunction` for a corresponding quadrature point on an element.
2006 
2007   Inputs and outputs are in the order provided by the user when adding `CeedOperator` fields.
2008   For example, a `CeedQFunction` with inputs `u` and `gradu` and outputs `gradv` and `v` , provided in that order, would result in an assembled `CeedQFunction` that consists of `(1 + dim) x (dim + 1)` matrices at each quadrature point acting on the input ` [u, du_0, du_1]` and producing the output `[dv_0, dv_1, v]`.
2009 
2010   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2011 
2012   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2013   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
2014   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
2015   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2016 
2017   @return An error code: 0 - success, otherwise - failure
2018 
2019   @ref User
2020 **/
2021 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
2022   CeedCall(CeedOperatorCheckReady(op));
2023 
2024   if (op->LinearAssembleQFunction) {
2025     // Backend version
2026     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
2027   } else {
2028     // Operator fallback
2029     CeedOperator op_fallback;
2030 
2031     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2032     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
2033     else return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
2034   }
2035   return CEED_ERROR_SUCCESS;
2036 }
2037 
2038 /**
2039   @brief Assemble `CeedQFunction` and store result internally.
2040 
2041   Return copied references of stored data to the caller.
2042   Caller is responsible for ownership and destruction of the copied references.
2043   See also @ref CeedOperatorLinearAssembleQFunction().
2044 
2045   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
2046         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
2047 
2048   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2049   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
2050   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
2051   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2052 
2053   @return An error code: 0 - success, otherwise - failure
2054 
2055   @ref User
2056 **/
2057 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
2058   return CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(op, true, assembled, rstr, request);
2059 }
2060 
2061 /**
2062   @brief Assemble the diagonal of a square linear `CeedOperator`
2063 
2064   This overwrites a `CeedVector` with the diagonal of a linear `CeedOperator`.
2065 
2066   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2067 
2068   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2069 
2070   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2071   @param[out] assembled `CeedVector` to store assembled `CeedOperator` diagonal
2072   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2073 
2074   @return An error code: 0 - success, otherwise - failure
2075 
2076   @ref User
2077 **/
2078 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2079   bool     is_composite;
2080   CeedSize input_size = 0, output_size = 0;
2081 
2082   CeedCall(CeedOperatorCheckReady(op));
2083   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2084 
2085   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2086   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2087 
2088   // Early exit for empty operator
2089   if (!is_composite) {
2090     CeedInt num_elem = 0;
2091 
2092     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2093     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2094   }
2095 
2096   if (op->LinearAssembleDiagonal) {
2097     // Backend version
2098     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
2099     return CEED_ERROR_SUCCESS;
2100   } else if (op->LinearAssembleAddDiagonal) {
2101     // Backend version with zeroing first
2102     CeedCall(CeedVectorSetValue(assembled, 0.0));
2103     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
2104     return CEED_ERROR_SUCCESS;
2105   } else if (is_composite) {
2106     // Default to summing contributions of suboperators
2107     CeedCall(CeedVectorSetValue(assembled, 0.0));
2108     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
2109     return CEED_ERROR_SUCCESS;
2110   } else {
2111     // Operator fallback
2112     CeedOperator op_fallback;
2113 
2114     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2115     if (op_fallback) {
2116       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
2117       return CEED_ERROR_SUCCESS;
2118     }
2119   }
2120   // Default interface implementation
2121   CeedCall(CeedVectorSetValue(assembled, 0.0));
2122   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
2123   return CEED_ERROR_SUCCESS;
2124 }
2125 
2126 /**
2127   @brief Assemble the diagonal of a square linear `CeedOperator`.
2128 
2129   This sums into a `CeedVector` the diagonal of a linear `CeedOperator`.
2130 
2131   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2132 
2133   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2134 
2135   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2136   @param[out] assembled `CeedVector` to store assembled `CeedOperator` diagonal
2137   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2138 
2139   @return An error code: 0 - success, otherwise - failure
2140 
2141   @ref User
2142 **/
2143 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2144   bool     is_composite;
2145   CeedSize input_size = 0, output_size = 0;
2146 
2147   CeedCall(CeedOperatorCheckReady(op));
2148   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2149 
2150   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2151   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2152 
2153   // Early exit for empty operator
2154   if (!is_composite) {
2155     CeedInt num_elem = 0;
2156 
2157     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2158     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2159   }
2160 
2161   if (op->LinearAssembleAddDiagonal) {
2162     // Backend version
2163     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
2164     return CEED_ERROR_SUCCESS;
2165   } else if (is_composite) {
2166     // Default to summing contributions of suboperators
2167     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
2168     return CEED_ERROR_SUCCESS;
2169   } else {
2170     // Operator fallback
2171     CeedOperator op_fallback;
2172 
2173     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2174     if (op_fallback) {
2175       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
2176       return CEED_ERROR_SUCCESS;
2177     }
2178   }
2179   // Default interface implementation
2180   CeedCall(CeedSingleOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
2181   return CEED_ERROR_SUCCESS;
2182 }
2183 
2184 /**
2185    @brief Fully assemble the point-block diagonal pattern of a linear `CeedOperator`.
2186 
2187    Expected to be used in conjunction with @ref CeedOperatorLinearAssemblePointBlockDiagonal().
2188 
2189    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2190    Note that the `(i, j)` pairs are unique.
2191    This function returns the number of entries and their `(i, j)` locations, while @ref CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in the same ordering.
2192 
2193    This will generally be slow unless your operator is low-order.
2194 
2195    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2196 
2197    @param[in]  op          `CeedOperator` to assemble
2198    @param[out] num_entries Number of entries in coordinate nonzero pattern
2199    @param[out] rows        Row number for each entry
2200    @param[out] cols        Column number for each entry
2201 
2202    @ref User
2203 **/
2204 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2205   bool          is_composite;
2206   CeedInt       num_active_components, num_sub_operators;
2207   CeedOperator *sub_operators;
2208 
2209   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2210 
2211   CeedSize input_size = 0, output_size = 0;
2212   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2213   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2214 
2215   if (is_composite) {
2216     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_operators));
2217     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2218   } else {
2219     sub_operators     = &op;
2220     num_sub_operators = 1;
2221   }
2222 
2223   // Verify operator can be assembled correctly
2224   {
2225     CeedOperatorAssemblyData data;
2226     CeedInt                  num_active_elem_rstrs, comp_stride;
2227     CeedElemRestriction     *active_elem_rstrs;
2228 
2229     // Get initial values to check against
2230     CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data));
2231     CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
2232     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride));
2233     CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components));
2234 
2235     // Verify that all active element restrictions have same component stride and number of components
2236     for (CeedInt k = 0; k < num_sub_operators; k++) {
2237       CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data));
2238       CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
2239       for (CeedInt i = 0; i < num_active_elem_rstrs; i++) {
2240         CeedInt comp_stride_sub, num_active_components_sub;
2241 
2242         CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub));
2243         CeedCheck(comp_stride == comp_stride_sub, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION,
2244                   "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub);
2245         CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub));
2246         CeedCheck(num_active_components == num_active_components_sub, CeedOperatorReturnCeed(op), CEED_ERROR_INCOMPATIBLE,
2247                   "All suboperators must have the same number of output components."
2248                   " Previous: %" CeedInt_FMT " Current: %" CeedInt_FMT,
2249                   num_active_components, num_active_components_sub);
2250       }
2251     }
2252   }
2253   *num_entries = input_size * num_active_components;
2254   CeedCall(CeedCalloc(*num_entries, rows));
2255   CeedCall(CeedCalloc(*num_entries, cols));
2256 
2257   for (CeedInt o = 0; o < num_sub_operators; o++) {
2258     CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr;
2259     CeedInt             comp_stride, num_elem, elem_size;
2260     const CeedInt      *offsets, *point_block_offsets;
2261 
2262     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr));
2263     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride));
2264     CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem));
2265     CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size));
2266     CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets));
2267 
2268     CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr));
2269     CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets));
2270 
2271     for (CeedSize i = 0; i < num_elem * elem_size; i++) {
2272       for (CeedInt c_out = 0; c_out < num_active_components; c_out++) {
2273         for (CeedInt c_in = 0; c_in < num_active_components; c_in++) {
2274           (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride;
2275           (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride;
2276         }
2277       }
2278     }
2279 
2280     CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets));
2281     CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets));
2282     CeedCall(CeedElemRestrictionDestroy(&active_elem_rstr));
2283     CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr));
2284   }
2285   return CEED_ERROR_SUCCESS;
2286 }
2287 
2288 /**
2289   @brief Assemble the point block diagonal of a square linear `CeedOperator`.
2290 
2291   This overwrites a `CeedVector` with the point block diagonal of a linear `CeedOperator`.
2292 
2293   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2294 
2295   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2296 
2297   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2298   @param[out] assembled `CeedVector` to store assembled `CeedOperator` point block diagonal, provided in row-major form with an `num_comp * num_comp` block at each node.
2299                           The dimensions of this vector are derived from the active vector for the `CeedOperator`.
2300                           The array has shape `[nodes, component out, component in]`.
2301   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2302 
2303   @return An error code: 0 - success, otherwise - failure
2304 
2305   @ref User
2306 **/
2307 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2308   bool     is_composite;
2309   CeedSize input_size = 0, output_size = 0;
2310 
2311   CeedCall(CeedOperatorCheckReady(op));
2312   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2313 
2314   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2315   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2316 
2317   // Early exit for empty operator
2318   if (!is_composite) {
2319     CeedInt num_elem = 0;
2320 
2321     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2322     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2323   }
2324 
2325   if (op->LinearAssemblePointBlockDiagonal) {
2326     // Backend version
2327     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
2328     return CEED_ERROR_SUCCESS;
2329   } else if (op->LinearAssembleAddPointBlockDiagonal) {
2330     // Backend version with zeroing first
2331     CeedCall(CeedVectorSetValue(assembled, 0.0));
2332     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2333     return CEED_ERROR_SUCCESS;
2334   } else {
2335     // Operator fallback
2336     CeedOperator op_fallback;
2337 
2338     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2339     if (op_fallback) {
2340       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
2341       return CEED_ERROR_SUCCESS;
2342     }
2343   }
2344   // Default interface implementation
2345   CeedCall(CeedVectorSetValue(assembled, 0.0));
2346   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2347   return CEED_ERROR_SUCCESS;
2348 }
2349 
2350 /**
2351   @brief Assemble the point block diagonal of a square linear `CeedOperator`.
2352 
2353   This sums into a `CeedVector` with the point block diagonal of a linear `CeedOperator`.
2354 
2355   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2356 
2357   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2358 
2359   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2360   @param[out] assembled `CeedVector` to store assembled CeedOperator point block diagonal, provided in row-major form with an `num_comp * num_comp` block at each node.
2361                           The dimensions of this vector are derived from the active vector for the `CeedOperator`.
2362                           The array has shape `[nodes, component out, component in]`.
2363   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2364 
2365   @return An error code: 0 - success, otherwise - failure
2366 
2367   @ref User
2368 **/
2369 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2370   bool     is_composite;
2371   CeedSize input_size = 0, output_size = 0;
2372 
2373   CeedCall(CeedOperatorCheckReady(op));
2374   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2375 
2376   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2377   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2378 
2379   // Early exit for empty operator
2380   if (!is_composite) {
2381     CeedInt num_elem = 0;
2382 
2383     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2384     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2385   }
2386 
2387   if (op->LinearAssembleAddPointBlockDiagonal) {
2388     // Backend version
2389     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
2390     return CEED_ERROR_SUCCESS;
2391   } else {
2392     // Operator fallback
2393     CeedOperator op_fallback;
2394 
2395     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2396     if (op_fallback) {
2397       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
2398       return CEED_ERROR_SUCCESS;
2399     }
2400   }
2401   // Default interface implementation
2402   if (is_composite) {
2403     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
2404   } else {
2405     CeedCall(CeedSingleOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
2406   }
2407   return CEED_ERROR_SUCCESS;
2408 }
2409 
2410 /**
2411    @brief Fully assemble the nonzero pattern of a linear `CeedOperator`.
2412 
2413    Expected to be used in conjunction with @ref CeedOperatorLinearAssemble().
2414 
2415    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2416    Note that the `(i, j)` pairs are not unique and may repeat.
2417    This function returns the number of entries and their `(i, j)` locations, while @ref CeedOperatorLinearAssemble() provides the values in the same ordering.
2418 
2419    This will generally be slow unless your operator is low-order.
2420 
2421    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2422 
2423    @param[in]  op          `CeedOperator` to assemble
2424    @param[out] num_entries Number of entries in coordinate nonzero pattern
2425    @param[out] rows        Row number for each entry
2426    @param[out] cols        Column number for each entry
2427 
2428    @ref User
2429 **/
2430 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2431   bool          is_composite;
2432   CeedInt       num_suboperators, offset = 0;
2433   CeedSize      single_entries;
2434   CeedOperator *sub_operators;
2435 
2436   CeedCall(CeedOperatorCheckReady(op));
2437   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2438 
2439   if (op->LinearAssembleSymbolic) {
2440     // Backend version
2441     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
2442     return CEED_ERROR_SUCCESS;
2443   } else {
2444     // Operator fallback
2445     CeedOperator op_fallback;
2446 
2447     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2448     if (op_fallback) {
2449       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
2450       return CEED_ERROR_SUCCESS;
2451     }
2452   }
2453 
2454   // Default interface implementation
2455 
2456   // Count entries and allocate rows, cols arrays
2457   *num_entries = 0;
2458   if (is_composite) {
2459     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2460     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2461     for (CeedInt k = 0; k < num_suboperators; ++k) {
2462       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2463       *num_entries += single_entries;
2464     }
2465   } else {
2466     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
2467     *num_entries += single_entries;
2468   }
2469   CeedCall(CeedCalloc(*num_entries, rows));
2470   CeedCall(CeedCalloc(*num_entries, cols));
2471 
2472   // Assemble nonzero locations
2473   if (is_composite) {
2474     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2475     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2476     for (CeedInt k = 0; k < num_suboperators; ++k) {
2477       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
2478       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2479       offset += single_entries;
2480     }
2481   } else {
2482     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
2483   }
2484   return CEED_ERROR_SUCCESS;
2485 }
2486 
2487 /**
2488    @brief Fully assemble the nonzero entries of a linear operator.
2489 
2490    Expected to be used in conjunction with @ref CeedOperatorLinearAssembleSymbolic().
2491 
2492    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2493    Note that the `(i, j)` pairs are not unique and may repeat.
2494    This function returns the values of the nonzero entries to be added, their `(i, j)` locations are provided by @ref CeedOperatorLinearAssembleSymbolic().
2495 
2496    This will generally be slow unless your operator is low-order.
2497 
2498    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2499 
2500    @param[in]  op     `CeedOperator` to assemble
2501    @param[out] values Values to assemble into matrix
2502 
2503    @ref User
2504 **/
2505 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
2506   bool          is_composite;
2507   CeedInt       num_suboperators, offset = 0;
2508   CeedSize      single_entries = 0;
2509   CeedOperator *sub_operators;
2510 
2511   CeedCall(CeedOperatorCheckReady(op));
2512   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2513 
2514   // Early exit for empty operator
2515   if (!is_composite) {
2516     CeedInt num_elem = 0;
2517 
2518     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2519     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2520   }
2521 
2522   if (op->LinearAssemble) {
2523     // Backend version
2524     CeedCall(op->LinearAssemble(op, values));
2525     return CEED_ERROR_SUCCESS;
2526   } else if (is_composite) {
2527     // Default to summing contributions of suboperators
2528     CeedCall(CeedVectorSetValue(values, 0.0));
2529     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2530     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2531     for (CeedInt k = 0; k < num_suboperators; k++) {
2532       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2533       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2534       offset += single_entries;
2535     }
2536     return CEED_ERROR_SUCCESS;
2537   } else if (op->LinearAssembleSingle) {
2538     CeedCall(CeedVectorSetValue(values, 0.0));
2539     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2540     return CEED_ERROR_SUCCESS;
2541   } else {
2542     // Operator fallback
2543     CeedOperator op_fallback;
2544 
2545     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2546     if (op_fallback) {
2547       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2548       return CEED_ERROR_SUCCESS;
2549     }
2550   }
2551 
2552   // Default to interface version if non-composite and no fallback
2553   CeedCall(CeedVectorSetValue(values, 0.0));
2554   CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2555   return CEED_ERROR_SUCCESS;
2556 }
2557 
2558 /**
2559   @brief Get the multiplicity of nodes across sub-operators in a composite `CeedOperator`.
2560 
2561   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2562 
2563   @param[in]  op               Composite `CeedOperator`
2564   @param[in]  num_skip_indices Number of sub-operators to skip
2565   @param[in]  skip_indices     Array of indices of sub-operators to skip
2566   @param[out] mult             Vector to store multiplicity (of size `l_size` )
2567 
2568   @return An error code: 0 - success, otherwise - failure
2569 
2570   @ref User
2571 **/
2572 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2573   Ceed                ceed;
2574   CeedInt             num_suboperators;
2575   CeedSize            l_vec_len;
2576   CeedScalar         *mult_array;
2577   CeedVector          ones_l_vec;
2578   CeedElemRestriction elem_rstr, mult_elem_rstr;
2579   CeedOperator       *sub_operators;
2580 
2581   CeedCall(CeedOperatorCheckReady(op));
2582 
2583   // Zero mult vector
2584   CeedCall(CeedVectorSetValue(mult, 0.0));
2585 
2586   // Get suboperators
2587   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2588   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2589   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2590 
2591   // Work vector
2592   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2593   CeedCall(CeedOperatorGetCeed(op, &ceed));
2594   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2595   CeedCall(CeedDestroy(&ceed));
2596   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2597   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2598 
2599   // Compute multiplicity across suboperators
2600   for (CeedInt i = 0; i < num_suboperators; i++) {
2601     const CeedScalar *sub_mult_array;
2602     CeedVector        sub_mult_l_vec, ones_e_vec;
2603 
2604     // -- Check for suboperator to skip
2605     for (CeedInt j = 0; j < num_skip_indices; j++) {
2606       if (skip_indices[j] == i) continue;
2607     }
2608 
2609     // -- Sub operator multiplicity
2610     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2611     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr));
2612     CeedCall(CeedElemRestrictionDestroy(&elem_rstr));
2613     CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2614     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2615     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2616     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2617     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2618     // ---- Flag every node present in the current suboperator
2619     for (CeedSize j = 0; j < l_vec_len; j++) {
2620       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2621     }
2622     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2623     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2624     CeedCall(CeedVectorDestroy(&ones_e_vec));
2625     CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr));
2626   }
2627   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2628   CeedCall(CeedVectorDestroy(&ones_l_vec));
2629   return CEED_ERROR_SUCCESS;
2630 }
2631 
2632 /**
2633   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator`, creating the prolongation basis from the fine and coarse grid interpolation.
2634 
2635   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2636 
2637   @param[in]  op_fine      Fine grid `CeedOperator`
2638   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2639   @param[in]  rstr_coarse  Coarse grid `CeedElemRestriction`
2640   @param[in]  basis_coarse Coarse grid active vector `CeedBasis`
2641   @param[out] op_coarse    Coarse grid `CeedOperator`
2642   @param[out] op_prolong   Coarse to fine `CeedOperator`, or `NULL`
2643   @param[out] op_restrict  Fine to coarse `CeedOperator`, or `NULL`
2644 
2645   @return An error code: 0 - success, otherwise - failure
2646 
2647   @ref User
2648 **/
2649 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2650                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2651   CeedBasis basis_c_to_f = NULL;
2652 
2653   CeedCall(CeedOperatorCheckReady(op_fine));
2654 
2655   // Build prolongation matrix, if required
2656   if (op_prolong || op_restrict) {
2657     CeedBasis basis_fine;
2658 
2659     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2660     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2661     CeedCall(CeedBasisDestroy(&basis_fine));
2662   }
2663 
2664   // Core code
2665   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2666   return CEED_ERROR_SUCCESS;
2667 }
2668 
2669 /**
2670   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator` with a tensor basis for the active basis.
2671 
2672   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2673 
2674   @param[in]  op_fine       Fine grid `CeedOperator`
2675   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2676   @param[in]  rstr_coarse   Coarse grid `CeedElemRestriction`
2677   @param[in]  basis_coarse  Coarse grid active vector `CeedBasis`
2678   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction `CeedOperator`
2679   @param[out] op_coarse     Coarse grid `CeedOperator`
2680   @param[out] op_prolong    Coarse to fine `CeedOperator`, or `NULL`
2681   @param[out] op_restrict   Fine to coarse `CeedOperator`, or `NULL`
2682 
2683   @return An error code: 0 - success, otherwise - failure
2684 
2685   @ref User
2686 **/
2687 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2688                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2689                                              CeedOperator *op_restrict) {
2690   Ceed      ceed;
2691   CeedInt   Q_f, Q_c;
2692   CeedBasis basis_fine, basis_c_to_f = NULL;
2693 
2694   CeedCall(CeedOperatorCheckReady(op_fine));
2695   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2696 
2697   // Check for compatible quadrature spaces
2698   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2699   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2700   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2701   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION,
2702             "Bases must have compatible quadrature spaces."
2703             " Fine grid: %" CeedInt_FMT " points, Coarse grid: %" CeedInt_FMT " points",
2704             Q_f, Q_c);
2705 
2706   // Create coarse to fine basis, if required
2707   if (op_prolong || op_restrict) {
2708     CeedInt     dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2709     CeedScalar *q_ref, *q_weight, *grad;
2710 
2711     // Check if interpolation matrix is provided
2712     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2713               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2714     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2715     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2716     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2717     CeedCall(CeedBasisDestroy(&basis_fine));
2718     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2719     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2720     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2721     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2722     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2723     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2724     CeedCall(CeedFree(&q_ref));
2725     CeedCall(CeedFree(&q_weight));
2726     CeedCall(CeedFree(&grad));
2727   }
2728 
2729   // Core code
2730   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2731   CeedCall(CeedDestroy(&ceed));
2732   return CEED_ERROR_SUCCESS;
2733 }
2734 
2735 /**
2736   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator` with a non-tensor basis for the active vector
2737 
2738   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2739 
2740   @param[in]  op_fine       Fine grid `CeedOperator`
2741   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2742   @param[in]  rstr_coarse   Coarse grid `CeedElemRestriction`
2743   @param[in]  basis_coarse  Coarse grid active vector `CeedBasis`
2744   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction `CeedOperator`
2745   @param[out] op_coarse     Coarse grid `CeedOperator`
2746   @param[out] op_prolong    Coarse to fine `CeedOperator`, or `NULL`
2747   @param[out] op_restrict   Fine to coarse `CeedOperator`, or `NULL`
2748 
2749   @return An error code: 0 - success, otherwise - failure
2750 
2751   @ref User
2752 **/
2753 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2754                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2755                                        CeedOperator *op_restrict) {
2756   Ceed      ceed;
2757   CeedInt   Q_f, Q_c;
2758   CeedBasis basis_fine, basis_c_to_f = NULL;
2759 
2760   CeedCall(CeedOperatorCheckReady(op_fine));
2761   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2762 
2763   // Check for compatible quadrature spaces
2764   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2765   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2766   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2767   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2768 
2769   // Coarse to fine basis
2770   if (op_prolong || op_restrict) {
2771     CeedInt          dim, num_comp, num_nodes_c, num_nodes_f;
2772     CeedScalar      *q_ref, *q_weight, *grad;
2773     CeedElemTopology topo;
2774 
2775     // Check if interpolation matrix is provided
2776     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2777               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2778     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2779     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2780     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2781     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2782     CeedCall(CeedBasisDestroy(&basis_fine));
2783     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2784     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2785     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2786     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2787     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2788     CeedCall(CeedFree(&q_ref));
2789     CeedCall(CeedFree(&q_weight));
2790     CeedCall(CeedFree(&grad));
2791   }
2792 
2793   // Core code
2794   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2795   CeedCall(CeedDestroy(&ceed));
2796   return CEED_ERROR_SUCCESS;
2797 }
2798 
2799 /**
2800   @brief Build a FDM based approximate inverse for each element for a `CeedOperator`.
2801 
2802   This returns a `CeedOperator` and `CeedVector` to apply a Fast Diagonalization Method based approximate inverse.
2803   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2804   The assembled `CeedQFunction` is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T \hat S V\f$.
2805   The `CeedOperator` must be linear and non-composite.
2806   The associated `CeedQFunction` must therefore also be linear.
2807 
2808   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2809 
2810   @param[in]  op      `CeedOperator` to create element inverses
2811   @param[out] fdm_inv `CeedOperator` to apply the action of a FDM based inverse for each element
2812   @param[in]  request Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2813 
2814   @return An error code: 0 - success, otherwise - failure
2815 
2816   @ref User
2817 **/
2818 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2819   Ceed                 ceed, ceed_parent;
2820   bool                 interp = false, grad = false, is_tensor_basis = true;
2821   CeedInt              num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2822   CeedScalar          *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg;
2823   const CeedScalar    *interp_1d, *grad_1d, *q_weight_1d;
2824   CeedVector           q_data;
2825   CeedElemRestriction  rstr  = NULL, rstr_qd_i;
2826   CeedBasis            basis = NULL, fdm_basis;
2827   CeedQFunctionContext ctx_fdm;
2828   CeedQFunctionField  *qf_fields;
2829   CeedQFunction        qf, qf_fdm;
2830   CeedOperatorField   *op_fields;
2831 
2832   CeedCall(CeedOperatorCheckReady(op));
2833 
2834   if (op->CreateFDMElementInverse) {
2835     // Backend version
2836     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2837     return CEED_ERROR_SUCCESS;
2838   } else {
2839     // Operator fallback
2840     CeedOperator op_fallback;
2841 
2842     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2843     if (op_fallback) {
2844       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2845       return CEED_ERROR_SUCCESS;
2846     }
2847   }
2848 
2849   // Default interface implementation
2850   CeedCall(CeedOperatorGetCeed(op, &ceed));
2851   CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent));
2852   CeedCall(CeedOperatorGetQFunction(op, &qf));
2853 
2854   // Determine active input basis
2855   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2856   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2857   for (CeedInt i = 0; i < num_input_fields; i++) {
2858     CeedVector vec;
2859 
2860     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2861     if (vec == CEED_VECTOR_ACTIVE) {
2862       CeedEvalMode eval_mode;
2863 
2864       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2865       interp = interp || eval_mode == CEED_EVAL_INTERP;
2866       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2867       if (!basis) CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2868       if (!rstr) CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2869     }
2870     CeedCall(CeedVectorDestroy(&vec));
2871   }
2872   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2873   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2874   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2875   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2876   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2877   CeedCall(CeedBasisGetDimension(basis, &dim));
2878   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2879   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2880 
2881   // Build and diagonalize 1D Mass and Laplacian
2882   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2883   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2884   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2885   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2886   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2887   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2888   CeedCall(CeedCalloc(P_1d, &lambda));
2889   // -- Build matrices
2890   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2891   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2892   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2893   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2894 
2895   // -- Diagonalize
2896   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2897   CeedCall(CeedFree(&mass));
2898   CeedCall(CeedFree(&laplace));
2899   for (CeedInt i = 0; i < P_1d; i++) {
2900     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2901   }
2902   CeedCall(CeedFree(&x));
2903 
2904   {
2905     CeedInt             layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2906     CeedScalar          max_norm = 0;
2907     const CeedScalar   *assembled_array, *q_weight_array;
2908     CeedVector          assembled = NULL, q_weight;
2909     CeedElemRestriction rstr_qf   = NULL;
2910 
2911     // Assemble QFunction
2912     CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2913     CeedCall(CeedElemRestrictionGetELayout(rstr_qf, layout));
2914     CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2915     CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2916 
2917     // Calculate element averages
2918     CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2919     CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2920     CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2921     CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2922     CeedCall(CeedCalloc(num_elem, &elem_avg));
2923     const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2924 
2925     for (CeedInt e = 0; e < num_elem; e++) {
2926       CeedInt count = 0;
2927 
2928       for (CeedInt q = 0; q < num_qpts; q++) {
2929         for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2930           if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2931             elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2932             count++;
2933           }
2934         }
2935       }
2936       if (count) {
2937         elem_avg[e] /= count;
2938       } else {
2939         elem_avg[e] = 1.0;
2940       }
2941     }
2942     CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2943     CeedCall(CeedVectorDestroy(&assembled));
2944     CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2945     CeedCall(CeedVectorDestroy(&q_weight));
2946   }
2947 
2948   // Build FDM diagonal
2949   {
2950     CeedScalar *q_data_array, *fdm_diagonal;
2951 
2952     CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2953     const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2954     for (CeedInt c = 0; c < num_comp; c++) {
2955       for (CeedInt n = 0; n < num_nodes; n++) {
2956         if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2957         if (grad) {
2958           for (CeedInt d = 0; d < dim; d++) {
2959             CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2960             fdm_diagonal[c * num_nodes + n] += lambda[i];
2961           }
2962         }
2963         if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2964       }
2965     }
2966     CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2967     CeedCall(CeedVectorSetValue(q_data, 0.0));
2968     CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2969     for (CeedInt e = 0; e < num_elem; e++) {
2970       for (CeedInt c = 0; c < num_comp; c++) {
2971         for (CeedInt n = 0; n < num_nodes; n++) {
2972           q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2973         }
2974       }
2975     }
2976     CeedCall(CeedFree(&elem_avg));
2977     CeedCall(CeedFree(&fdm_diagonal));
2978     CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2979   }
2980 
2981   // Setup FDM operator
2982   // -- Basis
2983   {
2984     CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2985 
2986     CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2987     CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2988     CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2989     CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2990     CeedCall(CeedFree(&fdm_interp));
2991     CeedCall(CeedFree(&grad_dummy));
2992     CeedCall(CeedFree(&q_ref_dummy));
2993     CeedCall(CeedFree(&q_weight_dummy));
2994     CeedCall(CeedFree(&lambda));
2995   }
2996 
2997   // -- Restriction
2998   {
2999     CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp};
3000     CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp,
3001                                               (CeedSize)num_elem * (CeedSize)num_comp * (CeedSize)num_nodes, strides, &rstr_qd_i));
3002   }
3003 
3004   // -- QFunction
3005   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
3006   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
3007   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
3008   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
3009   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
3010 
3011   // -- QFunction context
3012   {
3013     CeedInt *num_comp_data;
3014 
3015     CeedCall(CeedCalloc(1, &num_comp_data));
3016     num_comp_data[0] = num_comp;
3017     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
3018     CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
3019   }
3020   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
3021   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
3022 
3023   // -- Operator
3024   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
3025   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
3026   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data));
3027   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
3028 
3029   // Cleanup
3030   CeedCall(CeedDestroy(&ceed));
3031   CeedCall(CeedDestroy(&ceed_parent));
3032   CeedCall(CeedVectorDestroy(&q_data));
3033   CeedCall(CeedElemRestrictionDestroy(&rstr));
3034   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
3035   CeedCall(CeedBasisDestroy(&basis));
3036   CeedCall(CeedBasisDestroy(&fdm_basis));
3037   CeedCall(CeedQFunctionDestroy(&qf));
3038   CeedCall(CeedQFunctionDestroy(&qf_fdm));
3039   return CEED_ERROR_SUCCESS;
3040 }
3041 
3042 /// @}
3043