xref: /libCEED/interface/ceed-preconditioning.c (revision 9b5f41c81b637db3e5453a22df59a4f47deed499)
1 // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a `CeedQFunction` with a reference `Ceed` to fallback for advanced `CeedOperator` functionality
28 
29   @param[in]  fallback_ceed `Ceed` on which to create fallback `CeedQFunction`
30   @param[in]  qf            `CeedQFunction` to create fallback for
31   @param[out] qf_fallback   Fallback `CeedQFunction`
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   char               *source_path_with_name = NULL;
39   CeedInt             num_input_fields, num_output_fields;
40   CeedQFunctionField *input_fields, *output_fields;
41 
42   // Check if NULL qf passed in
43   if (!qf) return CEED_ERROR_SUCCESS;
44 
45   CeedDebug(CeedQFunctionReturnCeed(qf), "Creating fallback CeedQFunction\n");
46 
47   if (qf->source_path) {
48     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
49 
50     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
51     memcpy(source_path_with_name, qf->source_path, path_len);
52     memcpy(&source_path_with_name[path_len], ":", 1);
53     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
54   } else if (qf->user_source) {
55     CeedCall(CeedStringAllocCopy(qf->user_source, &source_path_with_name));
56   } else {
57     CeedCall(CeedCalloc(1, &source_path_with_name));
58   }
59 
60   {
61     CeedInt           vec_length;
62     CeedQFunctionUser f;
63 
64     CeedCall(CeedQFunctionGetVectorLength(qf, &vec_length));
65     CeedCall(CeedQFunctionGetUserFunction(qf, &f));
66     CeedCall(CeedQFunctionCreateInterior(fallback_ceed, vec_length, f, source_path_with_name, qf_fallback));
67   }
68   {
69     CeedQFunctionContext ctx;
70 
71     CeedCall(CeedQFunctionGetContext(qf, &ctx));
72     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
73     CeedCall(CeedQFunctionContextDestroy(&ctx));
74   }
75   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
76   for (CeedInt i = 0; i < num_input_fields; i++) {
77     const char  *field_name;
78     CeedInt      size;
79     CeedEvalMode eval_mode;
80 
81     CeedCall(CeedQFunctionFieldGetData(input_fields[i], &field_name, &size, &eval_mode));
82     CeedCall(CeedQFunctionAddInput(*qf_fallback, field_name, size, eval_mode));
83   }
84   for (CeedInt i = 0; i < num_output_fields; i++) {
85     const char  *field_name;
86     CeedInt      size;
87     CeedEvalMode eval_mode;
88 
89     CeedCall(CeedQFunctionFieldGetData(output_fields[i], &field_name, &size, &eval_mode));
90     CeedCall(CeedQFunctionAddOutput(*qf_fallback, field_name, size, eval_mode));
91   }
92   CeedCall(CeedFree(&source_path_with_name));
93   return CEED_ERROR_SUCCESS;
94 }
95 
96 /**
97   @brief Duplicate a `CeedOperator` with a reference `Ceed` to fallback for advanced `CeedOperator` functionality
98 
99   @param[in,out] op `CeedOperator` to create fallback for
100 
101   @return An error code: 0 - success, otherwise - failure
102 
103   @ref Developer
104 **/
105 static int CeedOperatorCreateFallback(CeedOperator op) {
106   bool         is_composite;
107   Ceed         ceed, ceed_fallback;
108   CeedOperator op_fallback;
109 
110   // Check not already created
111   if (op->op_fallback) return CEED_ERROR_SUCCESS;
112 
113   // Fallback Ceed
114   CeedCall(CeedOperatorGetCeed(op, &ceed));
115   CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
116   CeedCall(CeedDestroy(&ceed));
117   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
118 
119   CeedDebug(CeedOperatorReturnCeed(op), "Creating fallback CeedOperator\n");
120 
121   // Clone Op
122   CeedCall(CeedOperatorIsComposite(op, &is_composite));
123   if (is_composite) {
124     CeedInt       num_suboperators;
125     CeedOperator *sub_operators;
126 
127     CeedCall(CeedOperatorCreateComposite(ceed_fallback, &op_fallback));
128     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
129     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
130     for (CeedInt i = 0; i < num_suboperators; i++) {
131       CeedOperator op_sub_fallback;
132 
133       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
134       CeedCall(CeedOperatorCompositeAddSub(op_fallback, op_sub_fallback));
135     }
136   } else {
137     bool               is_at_points = false;
138     CeedInt            num_input_fields, num_output_fields;
139     CeedQFunction      qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
140     CeedOperatorField *input_fields, *output_fields;
141 
142     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
143     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
144     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
145     CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
146     if (is_at_points) {
147       CeedVector          points;
148       CeedElemRestriction rstr_points;
149 
150       CeedCall(CeedOperatorCreateAtPoints(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
151       CeedCall(CeedOperatorAtPointsGetPoints(op, &rstr_points, &points));
152       CeedCall(CeedOperatorAtPointsSetPoints(op_fallback, rstr_points, points));
153       CeedCall(CeedVectorDestroy(&points));
154       CeedCall(CeedElemRestrictionDestroy(&rstr_points));
155     } else {
156       CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
157     }
158     CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
159     for (CeedInt i = 0; i < num_input_fields; i++) {
160       const char         *field_name;
161       CeedVector          vec;
162       CeedElemRestriction rstr;
163       CeedBasis           basis;
164 
165       CeedCall(CeedOperatorFieldGetData(input_fields[i], &field_name, &rstr, &basis, &vec));
166       CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec));
167       CeedCall(CeedVectorDestroy(&vec));
168       CeedCall(CeedElemRestrictionDestroy(&rstr));
169       CeedCall(CeedBasisDestroy(&basis));
170     }
171     for (CeedInt i = 0; i < num_output_fields; i++) {
172       const char         *field_name;
173       CeedVector          vec;
174       CeedElemRestriction rstr;
175       CeedBasis           basis;
176 
177       CeedCall(CeedOperatorFieldGetData(output_fields[i], &field_name, &rstr, &basis, &vec));
178       CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec));
179       CeedCall(CeedVectorDestroy(&vec));
180       CeedCall(CeedElemRestrictionDestroy(&rstr));
181       CeedCall(CeedBasisDestroy(&basis));
182     }
183     {
184       CeedQFunctionAssemblyData data;
185 
186       CeedCall(CeedOperatorGetQFunctionAssemblyData(op, &data));
187       CeedCall(CeedQFunctionAssemblyDataReferenceCopy(data, &op_fallback->qf_assembled));
188     }
189     // Cleanup
190     CeedCall(CeedQFunctionDestroy(&qf_fallback));
191     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
192     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
193   }
194   CeedCall(CeedOperatorSetName(op_fallback, op->name));
195   CeedCall(CeedOperatorCheckReady(op_fallback));
196   // Note: No ref-counting here so we don't get caught in a reference loop.
197   //       The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback.
198   op->op_fallback                 = op_fallback;
199   op_fallback->op_fallback_parent = op;
200   CeedCall(CeedDestroy(&ceed_fallback));
201   return CEED_ERROR_SUCCESS;
202 }
203 
204 /**
205   @brief Core logic for assembling operator diagonal or point block diagonal
206 
207   @param[in]  op             `CeedOperator` to assemble diagonal or point block diagonal
208   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
209   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
210   @param[out] assembled      `CeedVector` to store assembled diagonal
211 
212   @return An error code: 0 - success, otherwise - failure
213 
214   @ref Developer
215 **/
216 static inline int CeedOperatorLinearAssembleAddDiagonalSingle_Mesh(CeedOperator op, CeedRequest *request, const bool is_point_block,
217                                                                    CeedVector assembled) {
218   bool is_composite;
219 
220   CeedCall(CeedOperatorIsComposite(op, &is_composite));
221   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
222 
223   // Assemble QFunction
224   CeedInt             layout_qf[3];
225   const CeedScalar   *assembled_qf_array;
226   CeedVector          assembled_qf        = NULL;
227   CeedElemRestriction assembled_elem_rstr = NULL;
228 
229   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
230   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf));
231   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
232   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
233 
234   // Get assembly data
235   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
236   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
237   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
238   CeedBasis               *active_bases_in, *active_bases_out;
239   CeedElemRestriction     *active_elem_rstrs_in, *active_elem_rstrs_out;
240   CeedOperatorAssemblyData data;
241 
242   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
243   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
244                                                 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out,
245                                                 &num_output_components));
246   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL));
247   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out));
248 
249   // Loop over all active bases (find matching input/output pairs)
250   for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) {
251     CeedInt             b_in, b_out, num_elem, num_nodes, num_qpts, num_comp;
252     bool                has_eval_none = false;
253     CeedScalar         *elem_diag_array, *identity = NULL;
254     CeedVector          elem_diag;
255     CeedElemRestriction diag_elem_rstr;
256 
257     if (num_active_bases_in <= num_active_bases_out) {
258       b_in = b;
259       for (b_out = 0; b_out < num_active_bases_out; b_out++) {
260         if (active_bases_in[b_in] == active_bases_out[b_out]) {
261           break;
262         }
263       }
264       if (b_out == num_active_bases_out) {
265         continue;
266       }  // No matching output basis found
267     } else {
268       b_out = b;
269       for (b_in = 0; b_in < num_active_bases_in; b_in++) {
270         if (active_bases_in[b_in] == active_bases_out[b_out]) {
271           break;
272         }
273       }
274       if (b_in == num_active_bases_in) {
275         continue;
276       }  // No matching output basis found
277     }
278     CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
279               "Cannot assemble operator diagonal with different input and output active element restrictions");
280 
281     // Assemble point block diagonal restriction, if needed
282     if (is_point_block) {
283       CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr));
284     } else {
285       CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr));
286     }
287 
288     // Create diagonal vector
289     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
290 
291     // Assemble element operator diagonals
292     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
293     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
294     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
295     CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes));
296     CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp));
297     if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes;
298     else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts));
299 
300     // Construct identity matrix for basis if required
301     for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) {
302       has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE);
303     }
304     for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) {
305       has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE);
306     }
307     if (has_eval_none) {
308       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
309       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
310     }
311 
312     // Compute the diagonal of B^T D B
313     // Each element
314     for (CeedSize e = 0; e < num_elem; e++) {
315       // Each basis eval mode pair
316       CeedInt      d_out              = 0, q_comp_out;
317       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
318 
319       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) {
320         CeedInt           d_in              = 0, q_comp_in;
321         const CeedScalar *B_t               = NULL;
322         CeedEvalMode      eval_mode_in_prev = CEED_EVAL_NONE;
323 
324         CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t));
325         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out));
326         if (q_comp_out > 1) {
327           if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0;
328           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
329         }
330         eval_mode_out_prev = eval_modes_out[b_out][e_out];
331 
332         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) {
333           const CeedScalar *B = NULL;
334 
335           CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B));
336           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in));
337           if (q_comp_in > 1) {
338             if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0;
339             else B = &B[(++d_in) * num_qpts * num_nodes];
340           }
341           eval_mode_in_prev = eval_modes_in[b_in][e_in];
342 
343           // Each component
344           for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
345             // Each qpt/node pair
346             for (CeedInt q = 0; q < num_qpts; q++) {
347               if (is_point_block) {
348                 // Point Block Diagonal
349                 for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
350                   const CeedSize c_offset =
351                       (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
352                   const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
353 
354                   for (CeedInt n = 0; n < num_nodes; n++) {
355                     elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
356                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
357                   }
358                 }
359               } else {
360                 // Diagonal Only
361                 const CeedInt c_offset =
362                     (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
363                 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
364 
365                 for (CeedInt n = 0; n < num_nodes; n++) {
366                   elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
367                 }
368               }
369             }
370           }
371         }
372       }
373     }
374     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
375 
376     // Assemble local operator diagonal
377     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
378 
379     // Cleanup
380     CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
381     CeedCall(CeedVectorDestroy(&elem_diag));
382     CeedCall(CeedFree(&identity));
383   }
384   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
385   CeedCall(CeedVectorDestroy(&assembled_qf));
386   return CEED_ERROR_SUCCESS;
387 }
388 
389 /**
390   @brief Core logic for assembling operator diagonal or point block diagonal
391 
392   @param[in]  op             `CeedOperator` to assemble diagonal or point block diagonal
393   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
394   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
395   @param[out] assembled      `CeedVector` to store assembled diagonal
396 
397   @return An error code: 0 - success, otherwise - failure
398 
399   @ref Developer
400 **/
401 static inline int CeedOperatorLinearAssembleAddDiagonalSingle(CeedOperator op, CeedRequest *request, const bool is_point_block,
402                                                               CeedVector assembled) {
403   bool is_at_points;
404 
405   CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
406   CeedCheck(!is_at_points, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "AtPoints operator not supported");
407   CeedCall(CeedOperatorLinearAssembleAddDiagonalSingle_Mesh(op, request, is_point_block, assembled));
408   return CEED_ERROR_SUCCESS;
409 }
410 
411 /**
412   @brief Core logic for assembling composite operator diagonal
413 
414   @param[in]  op             `CeedOperator` to assemble point block diagonal
415   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
416   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
417   @param[out] assembled      `CeedVector` to store assembled diagonal
418 
419   @return An error code: 0 - success, otherwise - failure
420 
421   @ref Developer
422 **/
423 static inline int CeedOperatorLinearAssembleAddDiagonalComposite(CeedOperator op, CeedRequest *request, const bool is_point_block,
424                                                                  CeedVector assembled) {
425   CeedInt       num_sub;
426   CeedOperator *suboperators;
427 
428   CeedCall(CeedOperatorCompositeGetNumSub(op, &num_sub));
429   CeedCall(CeedOperatorCompositeGetSubList(op, &suboperators));
430   for (CeedInt i = 0; i < num_sub; i++) {
431     if (is_point_block) {
432       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
433     } else {
434       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
435     }
436   }
437   return CEED_ERROR_SUCCESS;
438 }
439 
440 /**
441   @brief Build nonzero pattern for non-composite CeedOperator`.
442 
443   Users should generally use @ref CeedOperatorLinearAssembleSymbolic().
444 
445   @param[in]  op     `CeedOperator` to assemble nonzero pattern
446   @param[in]  offset Offset for number of entries
447   @param[out] rows   Row number for each entry
448   @param[out] cols   Column number for each entry
449 
450   @return An error code: 0 - success, otherwise - failure
451 
452   @ref Developer
453 **/
454 static int CeedOperatorAssembleSymbolicSingle(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
455   Ceed                ceed;
456   bool                is_composite;
457   CeedSize            num_nodes_in, num_nodes_out, local_num_entries, count = 0;
458   CeedInt             num_elem_in, elem_size_in, num_comp_in, layout_er_in[3];
459   CeedInt             num_elem_out, elem_size_out, num_comp_out, layout_er_out[3];
460   CeedScalar         *array;
461   const CeedScalar   *elem_dof_a_in, *elem_dof_a_out;
462   CeedVector          index_vec_in, index_vec_out, elem_dof_in, elem_dof_out;
463   CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out;
464 
465   CeedCall(CeedOperatorIsComposite(op, &is_composite));
466   CeedCall(CeedOperatorGetCeed(op, &ceed));
467   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
468 
469   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out));
470   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
471   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
472   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
473   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
474   CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, layout_er_in));
475 
476   // Determine elem_dof relation for input
477   CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in));
478   CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array));
479   for (CeedSize i = 0; i < num_nodes_in; i++) array[i] = i;
480   CeedCall(CeedVectorRestoreArray(index_vec_in, &array));
481   CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in));
482   CeedCall(CeedVectorSetValue(elem_dof_in, 0.0));
483   CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in));
484   CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE));
485   CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in));
486   CeedCall(CeedVectorDestroy(&index_vec_in));
487   CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in));
488 
489   if (elem_rstr_in != elem_rstr_out) {
490     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
491     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
492               "Active input and output operator restrictions must have the same number of elements."
493               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
494               num_elem_in, num_elem_out);
495     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
496     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
497     CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, layout_er_out));
498 
499     // Determine elem_dof relation for output
500     CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out));
501     CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array));
502     for (CeedSize i = 0; i < num_nodes_out; i++) array[i] = i;
503     CeedCall(CeedVectorRestoreArray(index_vec_out, &array));
504     CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out));
505     CeedCall(CeedVectorSetValue(elem_dof_out, 0.0));
506     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out));
507     CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE));
508     CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out));
509     CeedCall(CeedVectorDestroy(&index_vec_out));
510     CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out));
511   } else {
512     num_elem_out     = num_elem_in;
513     elem_size_out    = elem_size_in;
514     num_comp_out     = num_comp_in;
515     layout_er_out[0] = layout_er_in[0];
516     layout_er_out[1] = layout_er_in[1];
517     layout_er_out[2] = layout_er_in[2];
518     elem_dof_a_out   = elem_dof_a_in;
519   }
520   local_num_entries = (CeedSize)elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
521 
522   // Determine i, j locations for element matrices
523   for (CeedInt e = 0; e < num_elem_in; e++) {
524     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
525       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
526         for (CeedInt i = 0; i < elem_size_out; i++) {
527           for (CeedInt j = 0; j < elem_size_in; j++) {
528             const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2];
529             const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2];
530             const CeedInt row                = elem_dof_a_out[elem_dof_index_row];
531             const CeedInt col                = elem_dof_a_in[elem_dof_index_col];
532 
533             rows[offset + count] = row;
534             cols[offset + count] = col;
535             count++;
536           }
537         }
538       }
539     }
540   }
541   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
542   CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in));
543   CeedCall(CeedVectorDestroy(&elem_dof_in));
544   if (elem_rstr_in != elem_rstr_out) {
545     CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out));
546     CeedCall(CeedVectorDestroy(&elem_dof_out));
547   }
548   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
549   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
550   CeedCall(CeedDestroy(&ceed));
551   return CEED_ERROR_SUCCESS;
552 }
553 
554 /**
555   @brief Core logic to assemble `CeedQFunction` and store result internally.
556 
557   Return copied references of stored data to the caller.
558   Caller is responsible for ownership and destruction of the copied references.
559   See also @ref CeedOperatorLinearAssembleQFunction().
560 
561   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
562         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
563 
564   @param[in]  op         `CeedOperator` to assemble `CeedQFunction`
565   @param[in]  use_parent Boolean flag to check for fallback parent implementation
566   @param[out] assembled  `CeedVector` to store assembled `CeedQFunction` at quadrature points
567   @param[out] rstr       `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
568   @param[in]  request    Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
569 
570   @return An error code: 0 - success, otherwise - failure
571 
572   @ref User
573 **/
574 static int CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(CeedOperator op, bool use_parent, CeedVector *assembled, CeedElemRestriction *rstr,
575                                                                  CeedRequest *request) {
576   int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL;
577   CeedOperator op_assemble                                                                           = NULL;
578   CeedOperator op_fallback_parent                                                                    = NULL;
579 
580   CeedCall(CeedOperatorCheckReady(op));
581 
582   // Determine if fallback parent or operator has implementation
583   CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent));
584   if (op_fallback_parent && use_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) {
585     // -- Backend version for op fallback parent is faster, if it exists
586     CeedDebug(CeedOperatorReturnCeed(op), "Using fallback parent for CeedOperatorLinearAssembleQFunctionBuildOrUpdate\n");
587     LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate;
588     op_assemble                   = op_fallback_parent;
589   } else if (op->LinearAssembleQFunctionUpdate) {
590     // -- Backend version for op
591     LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate;
592     op_assemble                   = op;
593   }
594 
595   // Assemble QFunction
596   if (LinearAssembleQFunctionUpdate) {
597     // Backend or fallback parent version
598     CeedQFunctionAssemblyData data;
599     bool                      data_is_setup;
600     CeedVector                assembled_vec  = NULL;
601     CeedElemRestriction       assembled_rstr = NULL;
602 
603     CeedCall(CeedOperatorGetQFunctionAssemblyData(op, &data));
604     CeedCall(CeedQFunctionAssemblyDataIsSetup(data, &data_is_setup));
605     if (data_is_setup) {
606       bool update_needed;
607 
608       CeedCall(CeedQFunctionAssemblyDataGetObjects(data, &assembled_vec, &assembled_rstr));
609       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(data, &update_needed));
610       if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request));
611     } else {
612       CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request));
613       CeedCall(CeedQFunctionAssemblyDataSetObjects(data, assembled_vec, assembled_rstr));
614     }
615     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(data, false));
616 
617     // Copy reference from internally held copy
618     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
619     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
620     CeedCall(CeedVectorDestroy(&assembled_vec));
621     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
622   } else {
623     // Operator fallback
624     CeedOperator op_fallback;
625 
626     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleQFunctionBuildOrUpdate\n");
627     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
628     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
629     else return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
630   }
631   return CEED_ERROR_SUCCESS;
632 }
633 
634 /**
635   @brief Assemble `CeedQFunction` and store result internally, but do not use fallback parent.
636 
637   Return copied references of stored data to the caller.
638   Caller is responsible for ownership and destruction of the copied references.
639   See also @ref CeedOperatorLinearAssembleQFunction().
640 
641   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
642         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
643 
644   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
645   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
646   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
647   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
648 
649   @return An error code: 0 - success, otherwise - failure
650 
651   @ref Developer
652 **/
653 int CeedOperatorLinearAssembleQFunctionBuildOrUpdateFallback(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr,
654                                                              CeedRequest *request) {
655   return CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(op, false, assembled, rstr, request);
656 }
657 
658 /**
659   @brief Assemble nonzero entries for non-composite `CeedOperator`.
660 
661   Users should generally use @ref CeedOperatorLinearAssemble().
662 
663   @param[in]  op     `CeedOperator` to assemble
664   @param[in]  offset Offset for number of entries
665   @param[out] values Values to assemble into matrix
666 
667   @return An error code: 0 - success, otherwise - failure
668 
669   @ref Developer
670 **/
671 int CeedOperatorAssembleSingle(CeedOperator op, CeedInt offset, CeedVector values) {
672   bool is_composite, is_at_points;
673 
674   CeedCall(CeedOperatorIsComposite(op, &is_composite));
675   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
676 
677   // Early exit for empty operator
678   {
679     CeedInt num_elem = 0;
680 
681     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
682     if (num_elem == 0) return CEED_ERROR_SUCCESS;
683   }
684 
685   if (op->LinearAssembleSingle) {
686     // Backend version
687     CeedCall(op->LinearAssembleSingle(op, offset, values));
688     return CEED_ERROR_SUCCESS;
689   } else {
690     // Operator fallback
691     CeedOperator op_fallback;
692 
693     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorAssembleSingle\n");
694     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
695     if (op_fallback) {
696       CeedCall(CeedOperatorAssembleSingle(op_fallback, offset, values));
697       return CEED_ERROR_SUCCESS;
698     }
699   }
700 
701   CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
702   CeedCheck(!is_at_points, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
703             "Backend does not implement CeedOperatorLinearAssemble for AtPoints operator");
704 
705   // Assemble QFunction
706   CeedInt             layout_qf[3];
707   const CeedScalar   *assembled_qf_array;
708   CeedVector          assembled_qf        = NULL;
709   CeedElemRestriction assembled_elem_rstr = NULL;
710 
711   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE));
712   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf));
713   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
714   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
715 
716   // Get assembly data
717   CeedInt                  num_elem_in, elem_size_in, num_comp_in, num_qpts_in;
718   CeedInt                  num_elem_out, elem_size_out, num_comp_out, num_qpts_out;
719   CeedSize                 local_num_entries, count = 0;
720   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
721   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
722   CeedBasis               *active_bases_in, *active_bases_out, basis_in, basis_out;
723   const CeedScalar       **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out;
724   CeedElemRestriction      elem_rstr_in, elem_rstr_out;
725   CeedRestrictionType      elem_rstr_type_in, elem_rstr_type_out;
726   const bool              *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL;
727   const CeedInt8          *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL;
728   CeedOperatorAssemblyData data;
729 
730   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
731   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out,
732                                                 &num_eval_modes_out, &eval_modes_out, NULL, NULL));
733 
734   CeedCheck(num_active_bases_in == 1 && num_active_bases_out == 1, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
735             "Cannot assemble operator with multiple active bases");
736   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
737             "Cannot assemble operator without inputs/outputs");
738 
739   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out));
740   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
741   basis_in  = active_bases_in[0];
742   basis_out = active_bases_out[0];
743   B_mat_in  = B_mats_in[0];
744   B_mat_out = B_mats_out[0];
745 
746   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
747   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
748   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
749   if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in;
750   else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in));
751 
752   CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in));
753   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
754     CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in));
755   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
756     CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in));
757   }
758 
759   if (elem_rstr_in != elem_rstr_out) {
760     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
761     CeedCheck(num_elem_in == num_elem_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
762               "Active input and output operator restrictions must have the same number of elements."
763               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
764               num_elem_in, num_elem_out);
765     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
766     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
767     if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out;
768     else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out));
769     CeedCheck(num_qpts_in == num_qpts_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
770               "Active input and output bases must have the same number of quadrature points."
771               " Input has %" CeedInt_FMT " points; output has %" CeedInt_FMT "points.",
772               num_qpts_in, num_qpts_out);
773 
774     CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out));
775     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
776       CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out));
777     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
778       CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out));
779     }
780   } else {
781     num_elem_out  = num_elem_in;
782     elem_size_out = elem_size_in;
783     num_comp_out  = num_comp_in;
784     num_qpts_out  = num_qpts_in;
785 
786     elem_rstr_orients_out      = elem_rstr_orients_in;
787     elem_rstr_curl_orients_out = elem_rstr_curl_orients_in;
788   }
789   local_num_entries = (CeedSize)elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
790 
791   // Loop over elements and put in data structure
792   // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
793   CeedTensorContract contract;
794   CeedScalar        *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL;
795 
796   CeedCall(CeedBasisGetTensorContract(basis_in, &contract));
797   CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat));
798   CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat));
799   if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b));
800 
801   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
802   for (CeedSize e = 0; e < num_elem_in; e++) {
803     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
804       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
805         // Compute B^T*D
806         for (CeedSize n = 0; n < elem_size_out; n++) {
807           for (CeedSize q = 0; q < num_qpts_in; q++) {
808             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
809               const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in;
810               CeedScalar     sum       = 0.0;
811 
812               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
813                 const CeedSize b_out_index     = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n;
814                 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out;
815                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
816 
817                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
818               }
819               BTD_mat[btd_index] = sum;
820             }
821           }
822         }
823 
824         // Form element matrix itself (for each block component)
825         if (contract) {
826           CeedCall(CeedTensorContractApply(contract, 1, num_qpts_in * num_eval_modes_in[0], elem_size_in, elem_size_out, BTD_mat, CEED_NOTRANSPOSE,
827                                            false, B_mat_in, elem_mat));
828         } else {
829           Ceed ceed;
830 
831           CeedCall(CeedOperatorGetCeed(op, &ceed));
832           CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size_out, elem_size_in, num_qpts_in * num_eval_modes_in[0]));
833           CeedCall(CeedDestroy(&ceed));
834         }
835 
836         // Transform the element matrix if required
837         if (elem_rstr_orients_out) {
838           const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out];
839 
840           for (CeedInt i = 0; i < elem_size_out; i++) {
841             const double orient = elem_orients[i] ? -1.0 : 1.0;
842 
843             for (CeedInt j = 0; j < elem_size_in; j++) {
844               elem_mat[i * elem_size_in + j] *= orient;
845             }
846           }
847         } else if (elem_rstr_curl_orients_out) {
848           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out];
849 
850           // T^T*(B^T*D*B)
851           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
852           for (CeedInt i = 0; i < elem_size_out; i++) {
853             for (CeedInt j = 0; j < elem_size_in; j++) {
854               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] +
855                                                (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) +
856                                                (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0);
857             }
858           }
859         }
860         if (elem_rstr_orients_in) {
861           const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in];
862 
863           for (CeedInt i = 0; i < elem_size_out; i++) {
864             for (CeedInt j = 0; j < elem_size_in; j++) {
865               elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0;
866             }
867           }
868         } else if (elem_rstr_curl_orients_in) {
869           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in];
870 
871           // (B^T*D*B)*T
872           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
873           for (CeedInt i = 0; i < elem_size_out; i++) {
874             for (CeedInt j = 0; j < elem_size_in; j++) {
875               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] +
876                                                (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) +
877                                                (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0);
878             }
879           }
880         }
881 
882         // Put element matrix in coordinate data structure
883         for (CeedInt i = 0; i < elem_size_out; i++) {
884           for (CeedInt j = 0; j < elem_size_in; j++) {
885             vals[offset + count] = elem_mat[i * elem_size_in + j];
886             count++;
887           }
888         }
889       }
890     }
891   }
892   CeedCheck(count == local_num_entries, CeedOperatorReturnCeed(op), CEED_ERROR_MAJOR, "Error computing entries");
893   CeedCall(CeedVectorRestoreArray(values, &vals));
894 
895   // Cleanup
896   CeedCall(CeedFree(&BTD_mat));
897   CeedCall(CeedFree(&elem_mat));
898   CeedCall(CeedFree(&elem_mat_b));
899   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
900     CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in));
901   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
902     CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in));
903   }
904   if (elem_rstr_in != elem_rstr_out) {
905     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
906       CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out));
907     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
908       CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out));
909     }
910   }
911   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
912   CeedCall(CeedVectorDestroy(&assembled_qf));
913   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
914   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
915   return CEED_ERROR_SUCCESS;
916 }
917 
918 /**
919   @brief Count number of entries for assembled `CeedOperator`
920 
921   @param[in]  op          `CeedOperator` to assemble
922   @param[out] num_entries Number of entries in assembled representation
923 
924   @return An error code: 0 - success, otherwise - failure
925 
926   @ref Utility
927 **/
928 static int CeedOperatorAssemblyCountEntriesSingle(CeedOperator op, CeedSize *num_entries) {
929   bool                is_composite;
930   CeedInt             num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out;
931   CeedElemRestriction rstr_in, rstr_out;
932 
933   CeedCall(CeedOperatorIsComposite(op, &is_composite));
934   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
935 
936   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out));
937   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in));
938   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in));
939   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
940   if (rstr_in != rstr_out) {
941     CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out));
942     CeedCheck(num_elem_in == num_elem_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
943               "Active input and output operator restrictions must have the same number of elements."
944               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
945               num_elem_in, num_elem_out);
946     CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out));
947     CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
948   } else {
949     num_elem_out  = num_elem_in;
950     elem_size_out = elem_size_in;
951     num_comp_out  = num_comp_in;
952   }
953   CeedCall(CeedElemRestrictionDestroy(&rstr_in));
954   CeedCall(CeedElemRestrictionDestroy(&rstr_out));
955   *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in;
956   return CEED_ERROR_SUCCESS;
957 }
958 
959 /**
960   @brief Common code for creating a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator`
961 
962   @param[in]  op_fine      Fine grid `CeedOperator`
963   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
964   @param[in]  rstr_coarse  Coarse grid `CeedElemRestriction`
965   @param[in]  basis_coarse Coarse grid active vector `CeedBasis`
966   @param[in]  basis_c_to_f `CeedBasis` for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction operators
967   @param[out] op_coarse    Coarse grid `CeedOperator`
968   @param[out] op_prolong   Coarse to fine `CeedOperator`, or `NULL`
969   @param[out] op_restrict  Fine to coarse `CeedOperator`, or `NULL`
970 
971   @return An error code: 0 - success, otherwise - failure
972 
973   @ref Developer
974 **/
975 static int CeedOperatorMultigridLevelCreateSingle_Core(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse,
976                                                        CeedBasis basis_coarse, CeedBasis basis_c_to_f, CeedOperator *op_coarse,
977                                                        CeedOperator *op_prolong, CeedOperator *op_restrict) {
978   bool                is_composite;
979   Ceed                ceed;
980   CeedInt             num_comp, num_input_fields, num_output_fields;
981   CeedVector          mult_vec         = NULL;
982   CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL;
983   CeedOperatorField  *input_fields, *output_fields;
984 
985   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
986 
987   // Check for composite operator
988   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
989   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
990 
991   // Coarse Grid
992   {
993     bool is_at_points;
994 
995     CeedCall(CeedOperatorIsAtPoints(op_fine, &is_at_points));
996     if (is_at_points) {
997       CeedVector          point_coords;
998       CeedElemRestriction rstr_points;
999 
1000       CeedCall(CeedOperatorCreateAtPoints(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
1001       CeedCall(CeedOperatorAtPointsGetPoints(op_fine, &rstr_points, &point_coords));
1002       CeedCall(CeedOperatorAtPointsSetPoints(*op_coarse, rstr_points, point_coords));
1003       CeedCall(CeedVectorDestroy(&point_coords));
1004       CeedCall(CeedElemRestrictionDestroy(&rstr_points));
1005     } else {
1006       CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
1007     }
1008   }
1009   CeedCall(CeedOperatorGetFields(op_fine, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
1010   // -- Clone input fields
1011   for (CeedInt i = 0; i < num_input_fields; i++) {
1012     const char         *field_name;
1013     CeedVector          vec;
1014     CeedElemRestriction rstr  = NULL;
1015     CeedBasis           basis = NULL;
1016 
1017     CeedCall(CeedOperatorFieldGetName(input_fields[i], &field_name));
1018     CeedCall(CeedOperatorFieldGetVector(input_fields[i], &vec));
1019     if (vec == CEED_VECTOR_ACTIVE) {
1020       CeedCall(CeedElemRestrictionReferenceCopy(rstr_coarse, &rstr));
1021       CeedCall(CeedBasisReferenceCopy(basis_coarse, &basis));
1022       if (!rstr_fine) CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_fine));
1023     } else {
1024       CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr));
1025       CeedCall(CeedOperatorFieldGetBasis(input_fields[i], &basis));
1026     }
1027     CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec));
1028     CeedCall(CeedVectorDestroy(&vec));
1029     CeedCall(CeedElemRestrictionDestroy(&rstr));
1030     CeedCall(CeedBasisDestroy(&basis));
1031   }
1032   // -- Clone output fields
1033   for (CeedInt i = 0; i < num_output_fields; i++) {
1034     const char         *field_name;
1035     CeedVector          vec;
1036     CeedElemRestriction rstr  = NULL;
1037     CeedBasis           basis = NULL;
1038 
1039     CeedCall(CeedOperatorFieldGetName(output_fields[i], &field_name));
1040     CeedCall(CeedOperatorFieldGetVector(output_fields[i], &vec));
1041     if (vec == CEED_VECTOR_ACTIVE) {
1042       CeedCall(CeedElemRestrictionReferenceCopy(rstr_coarse, &rstr));
1043       CeedCall(CeedBasisReferenceCopy(basis_coarse, &basis));
1044       if (!rstr_fine) CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_fine));
1045     } else {
1046       CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr));
1047       CeedCall(CeedOperatorFieldGetBasis(output_fields[i], &basis));
1048     }
1049     CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec));
1050     CeedCall(CeedVectorDestroy(&vec));
1051     CeedCall(CeedElemRestrictionDestroy(&rstr));
1052     CeedCall(CeedBasisDestroy(&basis));
1053   }
1054   // -- Clone QFunctionAssemblyData
1055   {
1056     CeedQFunctionAssemblyData fine_data;
1057 
1058     CeedCall(CeedOperatorGetQFunctionAssemblyData(op_fine, &fine_data));
1059     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(fine_data, &(*op_coarse)->qf_assembled));
1060   }
1061 
1062   // Multiplicity vector
1063   if (op_restrict || op_prolong) {
1064     CeedVector          mult_e_vec;
1065     CeedRestrictionType rstr_type;
1066 
1067     CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type));
1068     CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED,
1069               "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported");
1070     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
1071     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
1072     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
1073     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
1074     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
1075     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
1076     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
1077     CeedCall(CeedVectorDestroy(&mult_e_vec));
1078     CeedCall(CeedVectorReciprocal(mult_vec));
1079   }
1080 
1081   // Clone name
1082   bool   has_name = op_fine->name;
1083   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
1084   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
1085 
1086   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
1087   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
1088             "Prolongation or restriction operator creation requires coarse-to-fine basis");
1089 
1090   // Restriction/Prolongation Operators
1091   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
1092 
1093   // Restriction
1094   if (op_restrict) {
1095     CeedInt             *num_comp_r_data;
1096     CeedQFunctionContext ctx_r;
1097     CeedQFunction        qf_restrict;
1098 
1099     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
1100     CeedCall(CeedCalloc(1, &num_comp_r_data));
1101     num_comp_r_data[0] = num_comp;
1102     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
1103     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
1104     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
1105     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
1106     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
1107     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
1108     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
1109     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
1110 
1111     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
1112     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
1113     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
1114     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
1115 
1116     // Set name
1117     char *restriction_name;
1118 
1119     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
1120     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
1121     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
1122     CeedCall(CeedFree(&restriction_name));
1123 
1124     // Check
1125     CeedCall(CeedOperatorCheckReady(*op_restrict));
1126 
1127     // Cleanup
1128     CeedCall(CeedQFunctionDestroy(&qf_restrict));
1129   }
1130 
1131   // Prolongation
1132   if (op_prolong) {
1133     CeedInt             *num_comp_p_data;
1134     CeedQFunctionContext ctx_p;
1135     CeedQFunction        qf_prolong;
1136 
1137     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
1138     CeedCall(CeedCalloc(1, &num_comp_p_data));
1139     num_comp_p_data[0] = num_comp;
1140     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
1141     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
1142     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
1143     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
1144     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
1145     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
1146     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
1147     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
1148 
1149     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
1150     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
1151     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
1152     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
1153 
1154     // Set name
1155     char *prolongation_name;
1156 
1157     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
1158     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
1159     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
1160     CeedCall(CeedFree(&prolongation_name));
1161 
1162     // Check
1163     CeedCall(CeedOperatorCheckReady(*op_prolong));
1164 
1165     // Cleanup
1166     CeedCall(CeedQFunctionDestroy(&qf_prolong));
1167   }
1168 
1169   // Check
1170   CeedCall(CeedOperatorCheckReady(*op_coarse));
1171 
1172   // Cleanup
1173   CeedCall(CeedDestroy(&ceed));
1174   CeedCall(CeedVectorDestroy(&mult_vec));
1175   CeedCall(CeedElemRestrictionDestroy(&rstr_fine));
1176   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
1177   CeedCall(CeedBasisDestroy(&basis_c_to_f));
1178   return CEED_ERROR_SUCCESS;
1179 }
1180 
1181 /**
1182   @brief Build 1D mass matrix and Laplacian with perturbation
1183 
1184   @param[in]  interp_1d   Interpolation matrix in one dimension
1185   @param[in]  grad_1d     Gradient matrix in one dimension
1186   @param[in]  q_weight_1d Quadrature weights in one dimension
1187   @param[in]  P_1d        Number of basis nodes in one dimension
1188   @param[in]  Q_1d        Number of quadrature points in one dimension
1189   @param[in]  dim         Dimension of basis
1190   @param[out] mass        Assembled mass matrix in one dimension
1191   @param[out] laplace     Assembled perturbed Laplacian in one dimension
1192 
1193   @return An error code: 0 - success, otherwise - failure
1194 
1195   @ref Developer
1196 **/
1197 CeedPragmaOptimizeOff
1198 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
1199                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
1200   for (CeedInt i = 0; i < P_1d; i++) {
1201     for (CeedInt j = 0; j < P_1d; j++) {
1202       CeedScalar sum = 0.0;
1203       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
1204       mass[i + j * P_1d] = sum;
1205     }
1206   }
1207   // -- Laplacian
1208   for (CeedInt i = 0; i < P_1d; i++) {
1209     for (CeedInt j = 0; j < P_1d; j++) {
1210       CeedScalar sum = 0.0;
1211 
1212       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
1213       laplace[i + j * P_1d] = sum;
1214     }
1215   }
1216   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
1217   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
1218   return CEED_ERROR_SUCCESS;
1219 }
1220 CeedPragmaOptimizeOn
1221 
1222 /// @}
1223 
1224 /// ----------------------------------------------------------------------------
1225 /// CeedOperator Backend API
1226 /// ----------------------------------------------------------------------------
1227 /// @addtogroup CeedOperatorBackend
1228 /// @{
1229 
1230 /**
1231   @brief Select correct basis matrix pointer based on @ref CeedEvalMode
1232 
1233   @param[in]  basis     `CeedBasis` from which to get the basis matrix
1234   @param[in]  eval_mode Current basis evaluation mode
1235   @param[in]  identity  Pointer to identity matrix
1236   @param[out] basis_ptr `CeedBasis` pointer to set
1237 
1238   @ref Backend
1239 **/
1240 int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
1241   switch (eval_mode) {
1242     case CEED_EVAL_NONE:
1243       *basis_ptr = identity;
1244       break;
1245     case CEED_EVAL_INTERP:
1246       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
1247       break;
1248     case CEED_EVAL_GRAD:
1249       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
1250       break;
1251     case CEED_EVAL_DIV:
1252       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
1253       break;
1254     case CEED_EVAL_CURL:
1255       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
1256       break;
1257     case CEED_EVAL_WEIGHT:
1258       break;  // Caught by QF Assembly
1259   }
1260   assert(*basis_ptr != NULL);
1261   return CEED_ERROR_SUCCESS;
1262 }
1263 
1264 /**
1265   @brief Create point block restriction for active `CeedOperatorField`
1266 
1267   @param[in]  rstr             Original `CeedElemRestriction` for active field
1268   @param[out] point_block_rstr Address of the variable where the newly created `CeedElemRestriction` will be stored
1269 
1270   @return An error code: 0 - success, otherwise - failure
1271 
1272   @ref Backend
1273 **/
1274 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) {
1275   Ceed           ceed;
1276   CeedInt        num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets;
1277   CeedSize       l_size;
1278   const CeedInt *offsets;
1279 
1280   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
1281   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
1282 
1283   // Expand offsets
1284   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
1285   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
1286   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
1287   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
1288   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
1289   shift = num_comp;
1290   if (comp_stride != 1) shift *= num_comp;
1291   CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets));
1292   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
1293     point_block_offsets[i] = offsets[i] * shift;
1294   }
1295 
1296   // Create new restriction
1297   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
1298                                      point_block_offsets, point_block_rstr));
1299 
1300   // Cleanup
1301   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
1302   CeedCall(CeedDestroy(&ceed));
1303   return CEED_ERROR_SUCCESS;
1304 }
1305 
1306 /**
1307   @brief Get `CeedQFunctionAssemblyData`
1308 
1309   @param[in]  op   `CeedOperator` to assemble
1310   @param[out] data `CeedQFunctionAssemblyData`
1311 
1312   @return An error code: 0 - success, otherwise - failure
1313 
1314   @ref Backend
1315 **/
1316 int CeedOperatorGetQFunctionAssemblyData(CeedOperator op, CeedQFunctionAssemblyData *data) {
1317   if (!op->qf_assembled) {
1318     CeedQFunctionAssemblyData data;
1319 
1320     CeedCall(CeedQFunctionAssemblyDataCreate(op->ceed, &data));
1321     op->qf_assembled = data;
1322   }
1323   *data = op->qf_assembled;
1324   return CEED_ERROR_SUCCESS;
1325 }
1326 
1327 /**
1328   @brief Create object holding `CeedQFunction` assembly data for `CeedOperator`
1329 
1330   @param[in]  ceed `Ceed` object used to create the `CeedQFunctionAssemblyData`
1331   @param[out] data Address of the variable where the newly created `CeedQFunctionAssemblyData` will be stored
1332 
1333   @return An error code: 0 - success, otherwise - failure
1334 
1335   @ref Backend
1336 **/
1337 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
1338   CeedCall(CeedCalloc(1, data));
1339   (*data)->ref_count = 1;
1340   (*data)->ceed      = ceed;
1341   CeedCall(CeedReference(ceed));
1342   return CEED_ERROR_SUCCESS;
1343 }
1344 
1345 /**
1346   @brief Increment the reference counter for a `CeedQFunctionAssemblyData`
1347 
1348   @param[in,out] data `CeedQFunctionAssemblyData` to increment the reference counter
1349 
1350   @return An error code: 0 - success, otherwise - failure
1351 
1352   @ref Backend
1353 **/
1354 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1355   data->ref_count++;
1356   return CEED_ERROR_SUCCESS;
1357 }
1358 
1359 /**
1360   @brief Set re-use of `CeedQFunctionAssemblyData`
1361 
1362   @param[in,out] data       `CeedQFunctionAssemblyData` to mark for reuse
1363   @param[in]     reuse_data Boolean flag indicating data re-use
1364 
1365   @return An error code: 0 - success, otherwise - failure
1366 
1367   @ref Backend
1368 **/
1369 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
1370   data->reuse_data        = reuse_data;
1371   data->needs_data_update = true;
1372   return CEED_ERROR_SUCCESS;
1373 }
1374 
1375 /**
1376   @brief Mark `CeedQFunctionAssemblyData` as stale
1377 
1378   @param[in,out] data              `CeedQFunctionAssemblyData` to mark as stale
1379   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
1380 
1381   @return An error code: 0 - success, otherwise - failure
1382 
1383   @ref Backend
1384 **/
1385 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
1386   data->needs_data_update = needs_data_update;
1387   return CEED_ERROR_SUCCESS;
1388 }
1389 
1390 /**
1391   @brief Determine if `CeedQFunctionAssemblyData` needs update
1392 
1393   @param[in]  data             `CeedQFunctionAssemblyData` to mark as stale
1394   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1395 
1396   @return An error code: 0 - success, otherwise - failure
1397 
1398   @ref Backend
1399 **/
1400 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1401   *is_update_needed = !data->reuse_data || data->needs_data_update;
1402   return CEED_ERROR_SUCCESS;
1403 }
1404 
1405 /**
1406   @brief Copy the pointer to a `CeedQFunctionAssemblyData`.
1407 
1408   Both pointers should be destroyed with @ref CeedQFunctionAssemblyDataDestroy().
1409 
1410   Note: If the value of ` *data_copy` passed to this function is non-`NULL` , then it is assumed that ` *data_copy` is a pointer to a `CeedQFunctionAssemblyData`.
1411         This `CeedQFunctionAssemblyData` will be destroyed if ` *data_copy` is the only reference to this `CeedQFunctionAssemblyData`.
1412 
1413   @param[in]     data      `CeedQFunctionAssemblyData` to copy reference to
1414   @param[in,out] data_copy Variable to store copied reference
1415 
1416   @return An error code: 0 - success, otherwise - failure
1417 
1418   @ref Backend
1419 **/
1420 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1421   CeedCall(CeedQFunctionAssemblyDataReference(data));
1422   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1423   *data_copy = data;
1424   return CEED_ERROR_SUCCESS;
1425 }
1426 
1427 /**
1428   @brief Get setup status for internal objects for `CeedQFunctionAssemblyData`
1429 
1430   @param[in]  data     `CeedQFunctionAssemblyData` to retrieve status
1431   @param[out] is_setup Boolean flag for setup status
1432 
1433   @return An error code: 0 - success, otherwise - failure
1434 
1435   @ref Backend
1436 **/
1437 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1438   *is_setup = data->is_setup;
1439   return CEED_ERROR_SUCCESS;
1440 }
1441 
1442 /**
1443   @brief Set internal objects for `CeedQFunctionAssemblyData`
1444 
1445   @param[in,out] data `CeedQFunctionAssemblyData` to set objects
1446   @param[in]     vec  `CeedVector` to store assembled `CeedQFunction` at quadrature points
1447   @param[in]     rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
1448 
1449   @return An error code: 0 - success, otherwise - failure
1450 
1451   @ref Backend
1452 **/
1453 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1454   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1455   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1456 
1457   data->is_setup = true;
1458   return CEED_ERROR_SUCCESS;
1459 }
1460 
1461 /**
1462   @brief Get internal objects for `CeedQFunctionAssemblyData`
1463 
1464   @param[in,out] data `CeedQFunctionAssemblyData` to set objects
1465   @param[out]    vec  `CeedVector` to store assembled `CeedQFunction` at quadrature points
1466   @param[out]    rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
1467 
1468   @return An error code: 0 - success, otherwise - failure
1469 
1470   @ref Backend
1471 **/
1472 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1473   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1474 
1475   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1476   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1477   return CEED_ERROR_SUCCESS;
1478 }
1479 
1480 /**
1481   @brief Destroy `CeedQFunctionAssemblyData`
1482 
1483   @param[in,out] data  `CeedQFunctionAssemblyData` to destroy
1484 
1485   @return An error code: 0 - success, otherwise - failure
1486 
1487   @ref Backend
1488 **/
1489 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1490   if (!*data || --(*data)->ref_count > 0) {
1491     *data = NULL;
1492     return CEED_ERROR_SUCCESS;
1493   }
1494   CeedCall(CeedDestroy(&(*data)->ceed));
1495   CeedCall(CeedVectorDestroy(&(*data)->vec));
1496   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1497 
1498   CeedCall(CeedFree(data));
1499   return CEED_ERROR_SUCCESS;
1500 }
1501 
1502 /**
1503   @brief Get `CeedOperatorAssemblyData`
1504 
1505   @param[in]  op   `CeedOperator` to assemble
1506   @param[out] data `CeedOperatorAssemblyData`
1507 
1508   @return An error code: 0 - success, otherwise - failure
1509 
1510   @ref Backend
1511 **/
1512 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1513   if (!op->op_assembled) {
1514     CeedOperatorAssemblyData data;
1515 
1516     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1517     op->op_assembled = data;
1518   }
1519   *data = op->op_assembled;
1520   return CEED_ERROR_SUCCESS;
1521 }
1522 
1523 /**
1524   @brief Create object holding `CeedOperator` assembly data.
1525 
1526   The `CeedOperatorAssemblyData` holds an array with references to every active `CeedBasis` used in the `CeedOperator`.
1527   An array with references to the corresponding active `CeedElemRestriction` is also stored.
1528   For each active `CeedBasis, the `CeedOperatorAssemblyData` holds an array of all input and output @ref CeedEvalMode for this `CeedBasis`.
1529   The `CeedOperatorAssemblyData` holds an array of offsets for indexing into the assembled `CeedQFunction` arrays to the row representing each @ref CeedEvalMode.
1530   The number of input columns across all active bases for the assembled `CeedQFunction` is also stored.
1531   Lastly, the `CeedOperatorAssembly` data holds assembled matrices representing the full action of the `CeedBasis` for all @ref CeedEvalMode.
1532 
1533   @param[in]  ceed `Ceed` object used to create the `CeedOperatorAssemblyData`
1534   @param[in]  op   `CeedOperator` to be assembled
1535   @param[out] data Address of the variable where the newly created `CeedOperatorAssemblyData` will be stored
1536 
1537   @return An error code: 0 - success, otherwise - failure
1538 
1539   @ref Backend
1540 **/
1541 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1542   CeedInt             num_active_bases_in = 0, num_active_bases_out = 0, offset = 0;
1543   CeedInt             num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL;
1544   CeedSize          **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1545   CeedEvalMode      **eval_modes_in = NULL, **eval_modes_out = NULL;
1546   CeedQFunctionField *qf_fields;
1547   CeedQFunction       qf;
1548   CeedOperatorField  *op_fields;
1549   bool                is_composite;
1550 
1551   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1552   CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators.");
1553 
1554   // Allocate
1555   CeedCall(CeedCalloc(1, data));
1556   (*data)->ceed = ceed;
1557   CeedCall(CeedReference(ceed));
1558 
1559   // Build OperatorAssembly data
1560   CeedCall(CeedOperatorGetQFunction(op, &qf));
1561 
1562   // Determine active input basis
1563   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1564   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1565   for (CeedInt i = 0; i < num_input_fields; i++) {
1566     CeedVector vec;
1567 
1568     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1569     if (vec == CEED_VECTOR_ACTIVE) {
1570       CeedInt      index = -1, num_comp, q_comp;
1571       CeedEvalMode eval_mode;
1572       CeedBasis    basis_in = NULL;
1573 
1574       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1575       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1576       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1577       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1578       for (CeedInt i = 0; i < num_active_bases_in; i++) {
1579         if ((*data)->active_bases_in[i] == basis_in) index = i;
1580       }
1581       if (index == -1) {
1582         CeedElemRestriction elem_rstr_in;
1583 
1584         index = num_active_bases_in;
1585         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in));
1586         (*data)->active_bases_in[num_active_bases_in] = NULL;
1587         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in]));
1588         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in));
1589         (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL;
1590         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1591         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in]));
1592         CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
1593         CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in));
1594         num_eval_modes_in[index] = 0;
1595         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in));
1596         eval_modes_in[index] = NULL;
1597         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in));
1598         eval_mode_offsets_in[index] = NULL;
1599         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in));
1600         (*data)->assembled_bases_in[index] = NULL;
1601         num_active_bases_in++;
1602       }
1603       if (eval_mode != CEED_EVAL_WEIGHT) {
1604         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1605         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1606         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1607         for (CeedInt d = 0; d < q_comp; d++) {
1608           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1609           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1610           offset += num_comp;
1611         }
1612         num_eval_modes_in[index] += q_comp;
1613       }
1614       CeedCall(CeedBasisDestroy(&basis_in));
1615     }
1616     CeedCall(CeedVectorDestroy(&vec));
1617   }
1618 
1619   // Determine active output basis
1620   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1621   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1622   offset = 0;
1623   for (CeedInt i = 0; i < num_output_fields; i++) {
1624     CeedVector vec;
1625 
1626     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1627     if (vec == CEED_VECTOR_ACTIVE) {
1628       CeedInt      index = -1, num_comp, q_comp;
1629       CeedEvalMode eval_mode;
1630       CeedBasis    basis_out = NULL;
1631 
1632       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1633       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1634       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1635       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1636       for (CeedInt i = 0; i < num_active_bases_out; i++) {
1637         if ((*data)->active_bases_out[i] == basis_out) index = i;
1638       }
1639       if (index == -1) {
1640         CeedElemRestriction elem_rstr_out;
1641 
1642         index = num_active_bases_out;
1643         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out));
1644         (*data)->active_bases_out[num_active_bases_out] = NULL;
1645         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out]));
1646         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out));
1647         (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL;
1648         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1649         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out]));
1650         CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
1651         CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out));
1652         num_eval_modes_out[index] = 0;
1653         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out));
1654         eval_modes_out[index] = NULL;
1655         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out));
1656         eval_mode_offsets_out[index] = NULL;
1657         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out));
1658         (*data)->assembled_bases_out[index] = NULL;
1659         num_active_bases_out++;
1660       }
1661       if (eval_mode != CEED_EVAL_WEIGHT) {
1662         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1663         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1664         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1665         for (CeedInt d = 0; d < q_comp; d++) {
1666           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1667           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1668           offset += num_comp;
1669         }
1670         num_eval_modes_out[index] += q_comp;
1671       }
1672       CeedCall(CeedBasisDestroy(&basis_out));
1673     }
1674     CeedCall(CeedVectorDestroy(&vec));
1675   }
1676   CeedCall(CeedQFunctionDestroy(&qf));
1677   (*data)->num_active_bases_in   = num_active_bases_in;
1678   (*data)->num_eval_modes_in     = num_eval_modes_in;
1679   (*data)->eval_modes_in         = eval_modes_in;
1680   (*data)->eval_mode_offsets_in  = eval_mode_offsets_in;
1681   (*data)->num_active_bases_out  = num_active_bases_out;
1682   (*data)->num_eval_modes_out    = num_eval_modes_out;
1683   (*data)->eval_modes_out        = eval_modes_out;
1684   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1685   (*data)->num_output_components = offset;
1686   return CEED_ERROR_SUCCESS;
1687 }
1688 
1689 /**
1690   @brief Get `CeedOperator` @ref CeedEvalMode for assembly.
1691 
1692   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1693 
1694   @param[in]  data                  `CeedOperatorAssemblyData`
1695   @param[out] num_active_bases_in   Total number of active bases for input
1696   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input @ref CeedEvalMode, or `NULL`.
1697                                       `eval_modes_in[0]` holds an array of eval modes for the first active `CeedBasis`.
1698   @param[out] eval_modes_in         Pointer to hold arrays of input @ref CeedEvalMode, or `NULL`
1699   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point
1700   @param[out] num_active_bases_out  Total number of active bases for output
1701   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output @ref CeedEvalMode, or `NULL`
1702   @param[out] eval_modes_out        Pointer to hold arrays of output @ref CeedEvalMode, or `NULL`
1703   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1704   @param[out] num_output_components The number of columns in the assembled `CeedQFunction` matrix for each quadrature point, including contributions of all active bases
1705 
1706   @return An error code: 0 - success, otherwise - failure
1707 
1708   @ref Backend
1709 **/
1710 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in,
1711                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out,
1712                                          CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out,
1713                                          CeedSize *num_output_components) {
1714   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1715   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1716   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1717   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1718   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1719   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1720   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1721   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1722   if (num_output_components) *num_output_components = data->num_output_components;
1723   return CEED_ERROR_SUCCESS;
1724 }
1725 
1726 /**
1727   @brief Get `CeedOperator` `CeedBasis` data for assembly.
1728 
1729   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1730 
1731   @param[in]  data                 `CeedOperatorAssemblyData`
1732   @param[out] num_active_bases_in  Number of active input bases, or `NULL`
1733   @param[out] active_bases_in      Pointer to hold active input `CeedBasis`, or `NULL`
1734   @param[out] assembled_bases_in   Pointer to hold assembled active input `B` , or `NULL`
1735   @param[out] num_active_bases_out Number of active output bases, or `NULL`
1736   @param[out] active_bases_out     Pointer to hold active output `CeedBasis`, or `NULL`
1737   @param[out] assembled_bases_out  Pointer to hold assembled active output `B` , or `NULL`
1738 
1739   @return An error code: 0 - success, otherwise - failure
1740 
1741   @ref Backend
1742 **/
1743 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in,
1744                                      const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out,
1745                                      const CeedScalar ***assembled_bases_out) {
1746   // Assemble B_in, B_out if needed
1747   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1748     CeedInt num_qpts;
1749 
1750     if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts));
1751     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts));
1752     for (CeedInt b = 0; b < data->num_active_bases_in; b++) {
1753       bool        has_eval_none = false;
1754       CeedInt     num_nodes;
1755       CeedScalar *B_in = NULL, *identity = NULL;
1756 
1757       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes));
1758       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1759 
1760       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1761         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1762       }
1763       if (has_eval_none) {
1764         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1765         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1766           identity[i * num_nodes + i] = 1.0;
1767         }
1768       }
1769 
1770       for (CeedInt q = 0; q < num_qpts; q++) {
1771         for (CeedInt n = 0; n < num_nodes; n++) {
1772           CeedInt      d_in              = 0, q_comp_in;
1773           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1774 
1775           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1776             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1777             const CeedScalar *B  = NULL;
1778 
1779             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B));
1780             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in));
1781             if (q_comp_in > 1) {
1782               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1783               else B = &B[(++d_in) * num_qpts * num_nodes];
1784             }
1785             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1786             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1787           }
1788         }
1789       }
1790       if (identity) CeedCall(CeedFree(&identity));
1791       data->assembled_bases_in[b] = B_in;
1792     }
1793   }
1794 
1795   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1796     CeedInt num_qpts;
1797 
1798     if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts));
1799     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts));
1800     for (CeedInt b = 0; b < data->num_active_bases_out; b++) {
1801       bool        has_eval_none = false;
1802       CeedInt     num_nodes;
1803       CeedScalar *B_out = NULL, *identity = NULL;
1804 
1805       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes));
1806       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1807 
1808       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1809         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1810       }
1811       if (has_eval_none) {
1812         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1813         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1814           identity[i * num_nodes + i] = 1.0;
1815         }
1816       }
1817 
1818       for (CeedInt q = 0; q < num_qpts; q++) {
1819         for (CeedInt n = 0; n < num_nodes; n++) {
1820           CeedInt      d_out              = 0, q_comp_out;
1821           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1822 
1823           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1824             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1825             const CeedScalar *B  = NULL;
1826 
1827             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B));
1828             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out));
1829             if (q_comp_out > 1) {
1830               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1831               else B = &B[(++d_out) * num_qpts * num_nodes];
1832             }
1833             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1834             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1835           }
1836         }
1837       }
1838       if (identity) CeedCall(CeedFree(&identity));
1839       data->assembled_bases_out[b] = B_out;
1840     }
1841   }
1842 
1843   // Pass out assembled data
1844   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1845   if (active_bases_in) *active_bases_in = data->active_bases_in;
1846   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1847   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1848   if (active_bases_out) *active_bases_out = data->active_bases_out;
1849   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1850   return CEED_ERROR_SUCCESS;
1851 }
1852 
1853 /**
1854   @brief Get `CeedOperator` `CeedBasis` data for assembly.
1855 
1856   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1857 
1858   @param[in]  data                      `CeedOperatorAssemblyData`
1859   @param[out] num_active_elem_rstrs_in  Number of active input element restrictions, or `NULL`
1860   @param[out] active_elem_rstrs_in      Pointer to hold active input `CeedElemRestriction`, or `NULL`
1861   @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or `NULL`
1862   @param[out] active_elem_rstrs_out     Pointer to hold active output `CeedElemRestriction`, or `NULL`
1863 
1864   @return An error code: 0 - success, otherwise - failure
1865 
1866   @ref Backend
1867 **/
1868 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in,
1869                                                 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out,
1870                                                 CeedElemRestriction **active_elem_rstrs_out) {
1871   if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in;
1872   if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in;
1873   if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out;
1874   if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out;
1875   return CEED_ERROR_SUCCESS;
1876 }
1877 
1878 /**
1879   @brief Destroy `CeedOperatorAssemblyData`
1880 
1881   @param[in,out] data `CeedOperatorAssemblyData` to destroy
1882 
1883   @return An error code: 0 - success, otherwise - failure
1884 
1885   @ref Backend
1886 **/
1887 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1888   if (!*data) {
1889     *data = NULL;
1890     return CEED_ERROR_SUCCESS;
1891   }
1892   CeedCall(CeedDestroy(&(*data)->ceed));
1893   for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) {
1894     CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b]));
1895     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b]));
1896     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1897     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1898     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1899   }
1900   for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) {
1901     CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b]));
1902     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b]));
1903     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1904     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1905     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1906   }
1907   CeedCall(CeedFree(&(*data)->active_bases_in));
1908   CeedCall(CeedFree(&(*data)->active_bases_out));
1909   CeedCall(CeedFree(&(*data)->active_elem_rstrs_in));
1910   CeedCall(CeedFree(&(*data)->active_elem_rstrs_out));
1911   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1912   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1913   CeedCall(CeedFree(&(*data)->eval_modes_in));
1914   CeedCall(CeedFree(&(*data)->eval_modes_out));
1915   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1916   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1917   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1918   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1919 
1920   CeedCall(CeedFree(data));
1921   return CEED_ERROR_SUCCESS;
1922 }
1923 
1924 /**
1925   @brief Retrieve fallback `CeedOperator` with a reference `Ceed` for advanced `CeedOperator` functionality
1926 
1927   @param[in]  op          `CeedOperator` to retrieve fallback for
1928   @param[out] op_fallback Fallback `CeedOperator`
1929 
1930   @return An error code: 0 - success, otherwise - failure
1931 
1932   @ref Backend
1933 **/
1934 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
1935   // Create if needed
1936   if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op));
1937   if (op->op_fallback) {
1938     bool is_debug;
1939     Ceed ceed;
1940 
1941     CeedCall(CeedOperatorGetCeed(op, &ceed));
1942     CeedCall(CeedIsDebug(ceed, &is_debug));
1943     if (is_debug) {
1944       Ceed        ceed_fallback;
1945       const char *resource, *resource_fallback, *op_name;
1946 
1947       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
1948       CeedCall(CeedGetResource(ceed, &resource));
1949       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
1950       CeedCall(CeedOperatorGetName(op, &op_name));
1951 
1952       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
1953       CeedDebug(ceed, "Falling back from Operator with backend %s at address %p to Operator with backend %s at address %p for CeedOperator \"%s\"\n",
1954                 resource, op, resource_fallback, op->op_fallback, op_name);
1955       CeedCall(CeedDestroy(&ceed_fallback));
1956     }
1957     CeedCall(CeedDestroy(&ceed));
1958   }
1959   *op_fallback = op->op_fallback;
1960   return CEED_ERROR_SUCCESS;
1961 }
1962 
1963 /**
1964   @brief Get the parent `CeedOperator` for a fallback `CeedOperator`
1965 
1966   @param[in]  op     `CeedOperator` context
1967   @param[out] parent Variable to store parent `CeedOperator` context
1968 
1969   @return An error code: 0 - success, otherwise - failure
1970 
1971   @ref Backend
1972 **/
1973 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) {
1974   *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL;
1975   return CEED_ERROR_SUCCESS;
1976 }
1977 
1978 /**
1979   @brief Get the `Ceed` context of the parent `CeedOperator` for a fallback `CeedOperator`
1980 
1981   @param[in]  op     `CeedOperator` context
1982   @param[out] parent Variable to store parent `Ceed` context
1983 
1984   @return An error code: 0 - success, otherwise - failure
1985 
1986   @ref Backend
1987 **/
1988 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) {
1989   *parent = NULL;
1990   if (op->op_fallback_parent) CeedCall(CeedReferenceCopy(op->op_fallback_parent->ceed, parent));
1991   else CeedCall(CeedReferenceCopy(CeedOperatorReturnCeed(op), parent));
1992   return CEED_ERROR_SUCCESS;
1993 }
1994 
1995 /// @}
1996 
1997 /// ----------------------------------------------------------------------------
1998 /// CeedOperator Public API
1999 /// ----------------------------------------------------------------------------
2000 /// @addtogroup CeedOperatorUser
2001 /// @{
2002 
2003 /**
2004   @brief Assemble a linear `CeedQFunction` associated with a `CeedOperator`.
2005 
2006   This returns a `CeedVector` containing a matrix at each quadrature point providing the action of the `CeedQFunction` associated with the `CeedOperator`.
2007   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices representing the action of the `CeedQFunction` for a corresponding quadrature point on an element.
2008 
2009   Inputs and outputs are in the order provided by the user when adding `CeedOperator` fields.
2010   For example, a `CeedQFunction` with inputs `u` and `gradu` and outputs `gradv` and `v` , provided in that order, would result in an assembled `CeedQFunction` that consists of `(1 + dim) x (dim + 1)` matrices at each quadrature point acting on the input ` [u, du_0, du_1]` and producing the output `[dv_0, dv_1, v]`.
2011 
2012   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2013 
2014   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2015   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
2016   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
2017   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2018 
2019   @return An error code: 0 - success, otherwise - failure
2020 
2021   @ref User
2022 **/
2023 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
2024   CeedCall(CeedOperatorCheckReady(op));
2025 
2026   if (op->LinearAssembleQFunction) {
2027     // Backend version
2028     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
2029   } else {
2030     // Operator fallback
2031     CeedOperator op_fallback;
2032 
2033     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleQFunction\n");
2034     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2035     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
2036     else return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
2037   }
2038   return CEED_ERROR_SUCCESS;
2039 }
2040 
2041 /**
2042   @brief Assemble `CeedQFunction` and store result internally.
2043 
2044   Return copied references of stored data to the caller.
2045   Caller is responsible for ownership and destruction of the copied references.
2046   See also @ref CeedOperatorLinearAssembleQFunction().
2047 
2048   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
2049         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
2050 
2051   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2052   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
2053   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
2054   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2055 
2056   @return An error code: 0 - success, otherwise - failure
2057 
2058   @ref User
2059 **/
2060 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
2061   return CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(op, true, assembled, rstr, request);
2062 }
2063 
2064 /**
2065   @brief Assemble the diagonal of a square linear `CeedOperator`
2066 
2067   This overwrites a `CeedVector` with the diagonal of a linear `CeedOperator`.
2068 
2069   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2070 
2071   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2072 
2073   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2074   @param[out] assembled `CeedVector` to store assembled `CeedOperator` diagonal
2075   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2076 
2077   @return An error code: 0 - success, otherwise - failure
2078 
2079   @ref User
2080 **/
2081 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2082   bool     is_composite;
2083   CeedSize input_size = 0, output_size = 0;
2084 
2085   CeedCall(CeedOperatorCheckReady(op));
2086   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2087 
2088   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2089   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2090 
2091   // Early exit for empty operator
2092   if (!is_composite) {
2093     CeedInt num_elem = 0;
2094 
2095     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2096     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2097   }
2098 
2099   if (op->LinearAssembleDiagonal) {
2100     // Backend version
2101     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
2102     return CEED_ERROR_SUCCESS;
2103   } else if (op->LinearAssembleAddDiagonal) {
2104     // Backend version with zeroing first
2105     CeedCall(CeedVectorSetValue(assembled, 0.0));
2106     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
2107     return CEED_ERROR_SUCCESS;
2108   } else if (is_composite) {
2109     // Default to summing contributions of suboperators
2110     CeedCall(CeedVectorSetValue(assembled, 0.0));
2111     CeedCall(CeedOperatorLinearAssembleAddDiagonalComposite(op, request, false, assembled));
2112     return CEED_ERROR_SUCCESS;
2113   } else {
2114     // Operator fallback
2115     CeedOperator op_fallback;
2116 
2117     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleDiagonal\n");
2118     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2119     if (op_fallback) {
2120       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
2121       return CEED_ERROR_SUCCESS;
2122     }
2123   }
2124   // Default interface implementation
2125   CeedCall(CeedVectorSetValue(assembled, 0.0));
2126   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
2127   return CEED_ERROR_SUCCESS;
2128 }
2129 
2130 /**
2131   @brief Assemble the diagonal of a square linear `CeedOperator`.
2132 
2133   This sums into a `CeedVector` the diagonal of a linear `CeedOperator`.
2134 
2135   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2136 
2137   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2138 
2139   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2140   @param[out] assembled `CeedVector` to store assembled `CeedOperator` diagonal
2141   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2142 
2143   @return An error code: 0 - success, otherwise - failure
2144 
2145   @ref User
2146 **/
2147 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2148   bool     is_composite;
2149   CeedSize input_size = 0, output_size = 0;
2150 
2151   CeedCall(CeedOperatorCheckReady(op));
2152   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2153 
2154   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2155   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2156 
2157   // Early exit for empty operator
2158   if (!is_composite) {
2159     CeedInt num_elem = 0;
2160 
2161     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2162     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2163   }
2164 
2165   if (op->LinearAssembleAddDiagonal) {
2166     // Backend version
2167     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
2168     return CEED_ERROR_SUCCESS;
2169   } else if (is_composite) {
2170     // Default to summing contributions of suboperators
2171     CeedCall(CeedOperatorLinearAssembleAddDiagonalComposite(op, request, false, assembled));
2172     return CEED_ERROR_SUCCESS;
2173   } else {
2174     // Operator fallback
2175     CeedOperator op_fallback;
2176 
2177     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleAddDiagonal\n");
2178     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2179     if (op_fallback) {
2180       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
2181       return CEED_ERROR_SUCCESS;
2182     }
2183   }
2184   // Default interface implementation
2185   CeedCall(CeedOperatorLinearAssembleAddDiagonalSingle(op, request, false, assembled));
2186   return CEED_ERROR_SUCCESS;
2187 }
2188 
2189 /**
2190    @brief Fully assemble the point-block diagonal pattern of a linear `CeedOperator`.
2191 
2192    Expected to be used in conjunction with @ref CeedOperatorLinearAssemblePointBlockDiagonal().
2193 
2194    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2195    Note that the `(i, j)` pairs are unique.
2196    This function returns the number of entries and their `(i, j)` locations, while @ref CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in the same ordering.
2197 
2198    This will generally be slow unless your operator is low-order.
2199 
2200    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2201 
2202    @param[in]  op          `CeedOperator` to assemble
2203    @param[out] num_entries Number of entries in coordinate nonzero pattern
2204    @param[out] rows        Row number for each entry
2205    @param[out] cols        Column number for each entry
2206 
2207    @ref User
2208 **/
2209 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2210   bool          is_composite;
2211   CeedInt       num_active_components, num_sub_operators;
2212   CeedOperator *sub_operators;
2213 
2214   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2215 
2216   CeedSize input_size = 0, output_size = 0;
2217   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2218   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2219 
2220   if (is_composite) {
2221     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_sub_operators));
2222     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
2223   } else {
2224     sub_operators     = &op;
2225     num_sub_operators = 1;
2226   }
2227 
2228   // Verify operator can be assembled correctly
2229   {
2230     CeedOperatorAssemblyData data;
2231     CeedInt                  num_active_elem_rstrs, comp_stride;
2232     CeedElemRestriction     *active_elem_rstrs;
2233 
2234     // Get initial values to check against
2235     CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data));
2236     CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
2237     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride));
2238     CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components));
2239 
2240     // Verify that all active element restrictions have same component stride and number of components
2241     for (CeedInt k = 0; k < num_sub_operators; k++) {
2242       CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data));
2243       CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
2244       for (CeedInt i = 0; i < num_active_elem_rstrs; i++) {
2245         CeedInt comp_stride_sub, num_active_components_sub;
2246 
2247         CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub));
2248         CeedCheck(comp_stride == comp_stride_sub, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION,
2249                   "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub);
2250         CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub));
2251         CeedCheck(num_active_components == num_active_components_sub, CeedOperatorReturnCeed(op), CEED_ERROR_INCOMPATIBLE,
2252                   "All suboperators must have the same number of output components."
2253                   " Previous: %" CeedInt_FMT " Current: %" CeedInt_FMT,
2254                   num_active_components, num_active_components_sub);
2255       }
2256     }
2257   }
2258   *num_entries = input_size * num_active_components;
2259   CeedCall(CeedCalloc(*num_entries, rows));
2260   CeedCall(CeedCalloc(*num_entries, cols));
2261 
2262   for (CeedInt o = 0; o < num_sub_operators; o++) {
2263     CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr;
2264     CeedInt             comp_stride, num_elem, elem_size;
2265     const CeedInt      *offsets, *point_block_offsets;
2266 
2267     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr));
2268     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride));
2269     CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem));
2270     CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size));
2271     CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets));
2272 
2273     CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr));
2274     CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets));
2275 
2276     for (CeedSize i = 0; i < num_elem * elem_size; i++) {
2277       for (CeedInt c_out = 0; c_out < num_active_components; c_out++) {
2278         for (CeedInt c_in = 0; c_in < num_active_components; c_in++) {
2279           (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride;
2280           (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride;
2281         }
2282       }
2283     }
2284 
2285     CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets));
2286     CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets));
2287     CeedCall(CeedElemRestrictionDestroy(&active_elem_rstr));
2288     CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr));
2289   }
2290   return CEED_ERROR_SUCCESS;
2291 }
2292 
2293 /**
2294   @brief Assemble the point block diagonal of a square linear `CeedOperator`.
2295 
2296   This overwrites a `CeedVector` with the point block diagonal of a linear `CeedOperator`.
2297 
2298   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2299 
2300   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2301 
2302   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2303   @param[out] assembled `CeedVector` to store assembled `CeedOperator` point block diagonal, provided in row-major form with an `num_comp * num_comp` block at each node.
2304                           The dimensions of this vector are derived from the active vector for the `CeedOperator`.
2305                           The array has shape `[nodes, component out, component in]`.
2306   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2307 
2308   @return An error code: 0 - success, otherwise - failure
2309 
2310   @ref User
2311 **/
2312 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2313   bool     is_composite;
2314   CeedSize input_size = 0, output_size = 0;
2315 
2316   CeedCall(CeedOperatorCheckReady(op));
2317   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2318 
2319   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2320   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2321 
2322   // Early exit for empty operator
2323   if (!is_composite) {
2324     CeedInt num_elem = 0;
2325 
2326     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2327     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2328   }
2329 
2330   if (op->LinearAssemblePointBlockDiagonal) {
2331     // Backend version
2332     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
2333     return CEED_ERROR_SUCCESS;
2334   } else if (op->LinearAssembleAddPointBlockDiagonal) {
2335     // Backend version with zeroing first
2336     CeedCall(CeedVectorSetValue(assembled, 0.0));
2337     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2338     return CEED_ERROR_SUCCESS;
2339   } else {
2340     // Operator fallback
2341     CeedOperator op_fallback;
2342 
2343     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssemblePointBlockDiagonal\n");
2344     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2345     if (op_fallback) {
2346       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
2347       return CEED_ERROR_SUCCESS;
2348     }
2349   }
2350   // Default interface implementation
2351   CeedCall(CeedVectorSetValue(assembled, 0.0));
2352   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2353   return CEED_ERROR_SUCCESS;
2354 }
2355 
2356 /**
2357   @brief Assemble the point block diagonal of a square linear `CeedOperator`.
2358 
2359   This sums into a `CeedVector` with the point block diagonal of a linear `CeedOperator`.
2360 
2361   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2362 
2363   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2364 
2365   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2366   @param[out] assembled `CeedVector` to store assembled CeedOperator point block diagonal, provided in row-major form with an `num_comp * num_comp` block at each node.
2367                           The dimensions of this vector are derived from the active vector for the `CeedOperator`.
2368                           The array has shape `[nodes, component out, component in]`.
2369   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2370 
2371   @return An error code: 0 - success, otherwise - failure
2372 
2373   @ref User
2374 **/
2375 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2376   bool     is_composite;
2377   CeedSize input_size = 0, output_size = 0;
2378 
2379   CeedCall(CeedOperatorCheckReady(op));
2380   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2381 
2382   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2383   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2384 
2385   // Early exit for empty operator
2386   if (!is_composite) {
2387     CeedInt num_elem = 0;
2388 
2389     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2390     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2391   }
2392 
2393   if (op->LinearAssembleAddPointBlockDiagonal) {
2394     // Backend version
2395     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
2396     return CEED_ERROR_SUCCESS;
2397   } else {
2398     // Operator fallback
2399     CeedOperator op_fallback;
2400 
2401     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleAddPointBlockDiagonal\n");
2402     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2403     if (op_fallback) {
2404       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
2405       return CEED_ERROR_SUCCESS;
2406     }
2407   }
2408   // Default interface implementation
2409   if (is_composite) {
2410     CeedCall(CeedOperatorLinearAssembleAddDiagonalComposite(op, request, true, assembled));
2411   } else {
2412     CeedCall(CeedOperatorLinearAssembleAddDiagonalSingle(op, request, true, assembled));
2413   }
2414   return CEED_ERROR_SUCCESS;
2415 }
2416 
2417 /**
2418    @brief Fully assemble the nonzero pattern of a linear `CeedOperator`.
2419 
2420    Expected to be used in conjunction with @ref CeedOperatorLinearAssemble().
2421 
2422    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2423    Note that the `(i, j)` pairs are not unique and may repeat.
2424    This function returns the number of entries and their `(i, j)` locations, while @ref CeedOperatorLinearAssemble() provides the values in the same ordering.
2425 
2426    This will generally be slow unless your operator is low-order.
2427 
2428    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2429 
2430    @param[in]  op          `CeedOperator` to assemble
2431    @param[out] num_entries Number of entries in coordinate nonzero pattern
2432    @param[out] rows        Row number for each entry
2433    @param[out] cols        Column number for each entry
2434 
2435    @ref User
2436 **/
2437 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2438   bool          is_composite;
2439   CeedInt       num_suboperators, offset = 0;
2440   CeedSize      single_entries;
2441   CeedOperator *sub_operators;
2442 
2443   CeedCall(CeedOperatorCheckReady(op));
2444   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2445 
2446   if (op->LinearAssembleSymbolic) {
2447     // Backend version
2448     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
2449     return CEED_ERROR_SUCCESS;
2450   } else {
2451     // Operator fallback
2452     CeedOperator op_fallback;
2453 
2454     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleSymbolic\n");
2455     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2456     if (op_fallback) {
2457       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
2458       return CEED_ERROR_SUCCESS;
2459     }
2460   }
2461 
2462   // Default interface implementation
2463 
2464   // Count entries and allocate rows, cols arrays
2465   *num_entries = 0;
2466   if (is_composite) {
2467     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
2468     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
2469     for (CeedInt k = 0; k < num_suboperators; ++k) {
2470       CeedCall(CeedOperatorAssemblyCountEntriesSingle(sub_operators[k], &single_entries));
2471       *num_entries += single_entries;
2472     }
2473   } else {
2474     CeedCall(CeedOperatorAssemblyCountEntriesSingle(op, &single_entries));
2475     *num_entries += single_entries;
2476   }
2477   CeedCall(CeedCalloc(*num_entries, rows));
2478   CeedCall(CeedCalloc(*num_entries, cols));
2479 
2480   // Assemble nonzero locations
2481   if (is_composite) {
2482     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
2483     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
2484     for (CeedInt k = 0; k < num_suboperators; ++k) {
2485       CeedCall(CeedOperatorAssembleSymbolicSingle(sub_operators[k], offset, *rows, *cols));
2486       CeedCall(CeedOperatorAssemblyCountEntriesSingle(sub_operators[k], &single_entries));
2487       offset += single_entries;
2488     }
2489   } else {
2490     CeedCall(CeedOperatorAssembleSymbolicSingle(op, offset, *rows, *cols));
2491   }
2492   return CEED_ERROR_SUCCESS;
2493 }
2494 
2495 /**
2496    @brief Fully assemble the nonzero entries of a linear operator.
2497 
2498    Expected to be used in conjunction with @ref CeedOperatorLinearAssembleSymbolic().
2499 
2500    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2501    Note that the `(i, j)` pairs are not unique and may repeat.
2502    This function returns the values of the nonzero entries to be added, their `(i, j)` locations are provided by @ref CeedOperatorLinearAssembleSymbolic().
2503 
2504    This will generally be slow unless your operator is low-order.
2505 
2506    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2507 
2508    @param[in]  op     `CeedOperator` to assemble
2509    @param[out] values Values to assemble into matrix
2510 
2511    @ref User
2512 **/
2513 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
2514   bool          is_composite;
2515   CeedInt       num_suboperators, offset = 0;
2516   CeedSize      single_entries = 0;
2517   CeedOperator *sub_operators;
2518 
2519   CeedCall(CeedOperatorCheckReady(op));
2520   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2521 
2522   // Early exit for empty operator
2523   if (!is_composite) {
2524     CeedInt num_elem = 0;
2525 
2526     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2527     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2528   }
2529 
2530   if (op->LinearAssemble) {
2531     // Backend version
2532     CeedCall(op->LinearAssemble(op, values));
2533     return CEED_ERROR_SUCCESS;
2534   } else if (is_composite) {
2535     // Default to summing contributions of suboperators
2536     CeedCall(CeedVectorSetValue(values, 0.0));
2537     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
2538     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
2539     for (CeedInt k = 0; k < num_suboperators; k++) {
2540       CeedCall(CeedOperatorAssembleSingle(sub_operators[k], offset, values));
2541       CeedCall(CeedOperatorAssemblyCountEntriesSingle(sub_operators[k], &single_entries));
2542       offset += single_entries;
2543     }
2544     return CEED_ERROR_SUCCESS;
2545   } else if (op->LinearAssembleSingle) {
2546     CeedCall(CeedVectorSetValue(values, 0.0));
2547     CeedCall(CeedOperatorAssembleSingle(op, offset, values));
2548     return CEED_ERROR_SUCCESS;
2549   } else {
2550     // Operator fallback
2551     CeedOperator op_fallback;
2552 
2553     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssemble\n");
2554     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2555     if (op_fallback) {
2556       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2557       return CEED_ERROR_SUCCESS;
2558     }
2559   }
2560 
2561   // Default to interface version if non-composite and no fallback
2562   CeedCall(CeedVectorSetValue(values, 0.0));
2563   CeedCall(CeedOperatorAssembleSingle(op, offset, values));
2564   return CEED_ERROR_SUCCESS;
2565 }
2566 
2567 /**
2568   @brief Get the multiplicity of nodes across sub-operators in a composite `CeedOperator`.
2569 
2570   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2571 
2572   @param[in]  op               Composite `CeedOperator`
2573   @param[in]  num_skip_indices Number of sub-operators to skip
2574   @param[in]  skip_indices     Array of indices of sub-operators to skip
2575   @param[out] mult             Vector to store multiplicity (of size `l_size` )
2576 
2577   @return An error code: 0 - success, otherwise - failure
2578 
2579   @ref User
2580 **/
2581 int CeedOperatorCompositeGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2582   Ceed                ceed;
2583   CeedInt             num_suboperators;
2584   CeedSize            l_vec_len;
2585   CeedScalar         *mult_array;
2586   CeedVector          ones_l_vec;
2587   CeedElemRestriction elem_rstr, mult_elem_rstr;
2588   CeedOperator       *sub_operators;
2589 
2590   CeedCall(CeedOperatorCheckReady(op));
2591 
2592   // Zero mult vector
2593   CeedCall(CeedVectorSetValue(mult, 0.0));
2594 
2595   // Get suboperators
2596   CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
2597   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2598   CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
2599 
2600   // Work vector
2601   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2602   CeedCall(CeedOperatorGetCeed(op, &ceed));
2603   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2604   CeedCall(CeedDestroy(&ceed));
2605   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2606   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2607 
2608   // Compute multiplicity across suboperators
2609   for (CeedInt i = 0; i < num_suboperators; i++) {
2610     const CeedScalar *sub_mult_array;
2611     CeedVector        sub_mult_l_vec, ones_e_vec;
2612 
2613     // -- Check for suboperator to skip
2614     for (CeedInt j = 0; j < num_skip_indices; j++) {
2615       if (skip_indices[j] == i) continue;
2616     }
2617 
2618     // -- Sub operator multiplicity
2619     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2620     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr));
2621     CeedCall(CeedElemRestrictionDestroy(&elem_rstr));
2622     CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2623     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2624     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2625     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2626     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2627     // ---- Flag every node present in the current suboperator
2628     for (CeedSize j = 0; j < l_vec_len; j++) {
2629       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2630     }
2631     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2632     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2633     CeedCall(CeedVectorDestroy(&ones_e_vec));
2634     CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr));
2635   }
2636   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2637   CeedCall(CeedVectorDestroy(&ones_l_vec));
2638   return CEED_ERROR_SUCCESS;
2639 }
2640 
2641 /**
2642   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator`, creating the prolongation basis from the fine and coarse grid interpolation.
2643 
2644   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2645 
2646   @param[in]  op_fine      Fine grid `CeedOperator`
2647   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2648   @param[in]  rstr_coarse  Coarse grid `CeedElemRestriction`
2649   @param[in]  basis_coarse Coarse grid active vector `CeedBasis`
2650   @param[out] op_coarse    Coarse grid `CeedOperator`
2651   @param[out] op_prolong   Coarse to fine `CeedOperator`, or `NULL`
2652   @param[out] op_restrict  Fine to coarse `CeedOperator`, or `NULL`
2653 
2654   @return An error code: 0 - success, otherwise - failure
2655 
2656   @ref User
2657 **/
2658 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2659                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2660   CeedBasis basis_c_to_f = NULL;
2661 
2662   CeedCall(CeedOperatorCheckReady(op_fine));
2663 
2664   // Build prolongation matrix, if required
2665   if (op_prolong || op_restrict) {
2666     CeedBasis basis_fine;
2667 
2668     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2669     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2670     CeedCall(CeedBasisDestroy(&basis_fine));
2671   }
2672 
2673   // Core code
2674   CeedCall(
2675       CeedOperatorMultigridLevelCreateSingle_Core(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2676   return CEED_ERROR_SUCCESS;
2677 }
2678 
2679 /**
2680   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator` with a tensor basis for the active basis.
2681 
2682   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2683 
2684   @param[in]  op_fine       Fine grid `CeedOperator`
2685   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2686   @param[in]  rstr_coarse   Coarse grid `CeedElemRestriction`
2687   @param[in]  basis_coarse  Coarse grid active vector `CeedBasis`
2688   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction `CeedOperator`
2689   @param[out] op_coarse     Coarse grid `CeedOperator`
2690   @param[out] op_prolong    Coarse to fine `CeedOperator`, or `NULL`
2691   @param[out] op_restrict   Fine to coarse `CeedOperator`, or `NULL`
2692 
2693   @return An error code: 0 - success, otherwise - failure
2694 
2695   @ref User
2696 **/
2697 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2698                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2699                                              CeedOperator *op_restrict) {
2700   Ceed      ceed;
2701   CeedInt   Q_f, Q_c;
2702   CeedBasis basis_fine, basis_c_to_f = NULL;
2703 
2704   CeedCall(CeedOperatorCheckReady(op_fine));
2705   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2706 
2707   // Check for compatible quadrature spaces
2708   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2709   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2710   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2711   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION,
2712             "Bases must have compatible quadrature spaces."
2713             " Fine grid: %" CeedInt_FMT " points, Coarse grid: %" CeedInt_FMT " points",
2714             Q_f, Q_c);
2715 
2716   // Create coarse to fine basis, if required
2717   if (op_prolong || op_restrict) {
2718     CeedInt     dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2719     CeedScalar *q_ref, *q_weight, *grad;
2720 
2721     // Check if interpolation matrix is provided
2722     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2723               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2724     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2725     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2726     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2727     CeedCall(CeedBasisDestroy(&basis_fine));
2728     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2729     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2730     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2731     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2732     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2733     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2734     CeedCall(CeedFree(&q_ref));
2735     CeedCall(CeedFree(&q_weight));
2736     CeedCall(CeedFree(&grad));
2737   }
2738 
2739   // Core code
2740   CeedCall(
2741       CeedOperatorMultigridLevelCreateSingle_Core(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2742   CeedCall(CeedDestroy(&ceed));
2743   return CEED_ERROR_SUCCESS;
2744 }
2745 
2746 /**
2747   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator` with a non-tensor basis for the active vector
2748 
2749   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2750 
2751   @param[in]  op_fine       Fine grid `CeedOperator`
2752   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2753   @param[in]  rstr_coarse   Coarse grid `CeedElemRestriction`
2754   @param[in]  basis_coarse  Coarse grid active vector `CeedBasis`
2755   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction `CeedOperator`
2756   @param[out] op_coarse     Coarse grid `CeedOperator`
2757   @param[out] op_prolong    Coarse to fine `CeedOperator`, or `NULL`
2758   @param[out] op_restrict   Fine to coarse `CeedOperator`, or `NULL`
2759 
2760   @return An error code: 0 - success, otherwise - failure
2761 
2762   @ref User
2763 **/
2764 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2765                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2766                                        CeedOperator *op_restrict) {
2767   Ceed      ceed;
2768   CeedInt   Q_f, Q_c;
2769   CeedBasis basis_fine, basis_c_to_f = NULL;
2770 
2771   CeedCall(CeedOperatorCheckReady(op_fine));
2772   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2773 
2774   // Check for compatible quadrature spaces
2775   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2776   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2777   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2778   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2779 
2780   // Coarse to fine basis
2781   if (op_prolong || op_restrict) {
2782     CeedInt          dim, num_comp, num_nodes_c, num_nodes_f;
2783     CeedScalar      *q_ref, *q_weight, *grad;
2784     CeedElemTopology topo;
2785 
2786     // Check if interpolation matrix is provided
2787     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2788               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2789     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2790     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2791     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2792     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2793     CeedCall(CeedBasisDestroy(&basis_fine));
2794     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2795     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2796     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2797     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2798     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2799     CeedCall(CeedFree(&q_ref));
2800     CeedCall(CeedFree(&q_weight));
2801     CeedCall(CeedFree(&grad));
2802   }
2803 
2804   // Core code
2805   CeedCall(
2806       CeedOperatorMultigridLevelCreateSingle_Core(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2807   CeedCall(CeedDestroy(&ceed));
2808   return CEED_ERROR_SUCCESS;
2809 }
2810 
2811 /**
2812   @brief Build a FDM based approximate inverse for each element for a `CeedOperator`.
2813 
2814   This returns a `CeedOperator` and `CeedVector` to apply a Fast Diagonalization Method based approximate inverse.
2815   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2816   The assembled `CeedQFunction` is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T \hat S V\f$.
2817   The `CeedOperator` must be linear and non-composite.
2818   The associated `CeedQFunction` must therefore also be linear.
2819 
2820   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2821 
2822   @param[in]  op      `CeedOperator` to create element inverses
2823   @param[out] fdm_inv `CeedOperator` to apply the action of a FDM based inverse for each element
2824   @param[in]  request Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2825 
2826   @return An error code: 0 - success, otherwise - failure
2827 
2828   @ref User
2829 **/
2830 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2831   Ceed                 ceed, ceed_parent;
2832   bool                 interp = false, grad = false, is_tensor_basis = true;
2833   CeedInt              num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2834   CeedScalar          *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg;
2835   const CeedScalar    *interp_1d, *grad_1d, *q_weight_1d;
2836   CeedVector           q_data;
2837   CeedElemRestriction  rstr  = NULL, rstr_qd_i;
2838   CeedBasis            basis = NULL, fdm_basis;
2839   CeedQFunctionContext ctx_fdm;
2840   CeedQFunctionField  *qf_fields;
2841   CeedQFunction        qf, qf_fdm;
2842   CeedOperatorField   *op_fields;
2843 
2844   CeedCall(CeedOperatorCheckReady(op));
2845 
2846   if (op->CreateFDMElementInverse) {
2847     // Backend version
2848     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2849     return CEED_ERROR_SUCCESS;
2850   } else {
2851     // Operator fallback
2852     CeedOperator op_fallback;
2853 
2854     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorCreateFDMElementInverse\n");
2855     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2856     if (op_fallback) {
2857       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2858       return CEED_ERROR_SUCCESS;
2859     }
2860   }
2861 
2862   // Default interface implementation
2863   CeedCall(CeedOperatorGetCeed(op, &ceed));
2864   CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent));
2865   CeedCall(CeedOperatorGetQFunction(op, &qf));
2866 
2867   // Determine active input basis
2868   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2869   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2870   for (CeedInt i = 0; i < num_input_fields; i++) {
2871     CeedVector vec;
2872 
2873     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2874     if (vec == CEED_VECTOR_ACTIVE) {
2875       CeedEvalMode eval_mode;
2876 
2877       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2878       interp = interp || eval_mode == CEED_EVAL_INTERP;
2879       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2880       if (!basis) CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2881       if (!rstr) CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2882     }
2883     CeedCall(CeedVectorDestroy(&vec));
2884   }
2885   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2886   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2887   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2888   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2889   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2890   CeedCall(CeedBasisGetDimension(basis, &dim));
2891   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2892   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2893 
2894   // Build and diagonalize 1D Mass and Laplacian
2895   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2896   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2897   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2898   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2899   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2900   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2901   CeedCall(CeedCalloc(P_1d, &lambda));
2902   // -- Build matrices
2903   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2904   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2905   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2906   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2907 
2908   // -- Diagonalize
2909   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2910   CeedCall(CeedFree(&mass));
2911   CeedCall(CeedFree(&laplace));
2912   for (CeedInt i = 0; i < P_1d; i++) {
2913     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2914   }
2915   CeedCall(CeedFree(&x));
2916 
2917   {
2918     CeedInt             layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2919     CeedScalar          max_norm = 0;
2920     const CeedScalar   *assembled_array, *q_weight_array;
2921     CeedVector          assembled = NULL, q_weight;
2922     CeedElemRestriction rstr_qf   = NULL;
2923 
2924     // Assemble QFunction
2925     CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2926     CeedCall(CeedElemRestrictionGetELayout(rstr_qf, layout));
2927     CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2928     CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2929 
2930     // Calculate element averages
2931     CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2932     CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2933     CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2934     CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2935     CeedCall(CeedCalloc(num_elem, &elem_avg));
2936     const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2937 
2938     for (CeedInt e = 0; e < num_elem; e++) {
2939       CeedInt count = 0;
2940 
2941       for (CeedInt q = 0; q < num_qpts; q++) {
2942         for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2943           if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2944             elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2945             count++;
2946           }
2947         }
2948       }
2949       if (count) {
2950         elem_avg[e] /= count;
2951       } else {
2952         elem_avg[e] = 1.0;
2953       }
2954     }
2955     CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2956     CeedCall(CeedVectorDestroy(&assembled));
2957     CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2958     CeedCall(CeedVectorDestroy(&q_weight));
2959   }
2960 
2961   // Build FDM diagonal
2962   {
2963     CeedScalar *q_data_array, *fdm_diagonal;
2964 
2965     CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2966     const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2967     for (CeedInt c = 0; c < num_comp; c++) {
2968       for (CeedInt n = 0; n < num_nodes; n++) {
2969         if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2970         if (grad) {
2971           for (CeedInt d = 0; d < dim; d++) {
2972             CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2973             fdm_diagonal[c * num_nodes + n] += lambda[i];
2974           }
2975         }
2976         if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2977       }
2978     }
2979     CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2980     CeedCall(CeedVectorSetValue(q_data, 0.0));
2981     CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2982     for (CeedInt e = 0; e < num_elem; e++) {
2983       for (CeedInt c = 0; c < num_comp; c++) {
2984         for (CeedInt n = 0; n < num_nodes; n++) {
2985           q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2986         }
2987       }
2988     }
2989     CeedCall(CeedFree(&elem_avg));
2990     CeedCall(CeedFree(&fdm_diagonal));
2991     CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2992   }
2993 
2994   // Setup FDM operator
2995   // -- Basis
2996   {
2997     CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2998 
2999     CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
3000     CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
3001     CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
3002     CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
3003     CeedCall(CeedFree(&fdm_interp));
3004     CeedCall(CeedFree(&grad_dummy));
3005     CeedCall(CeedFree(&q_ref_dummy));
3006     CeedCall(CeedFree(&q_weight_dummy));
3007     CeedCall(CeedFree(&lambda));
3008   }
3009 
3010   // -- Restriction
3011   {
3012     CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp};
3013     CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp,
3014                                               (CeedSize)num_elem * (CeedSize)num_comp * (CeedSize)num_nodes, strides, &rstr_qd_i));
3015   }
3016 
3017   // -- QFunction
3018   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
3019   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
3020   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
3021   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
3022   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
3023 
3024   // -- QFunction context
3025   {
3026     CeedInt *num_comp_data;
3027 
3028     CeedCall(CeedCalloc(1, &num_comp_data));
3029     num_comp_data[0] = num_comp;
3030     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
3031     CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
3032   }
3033   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
3034   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
3035 
3036   // -- Operator
3037   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
3038   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
3039   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data));
3040   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
3041 
3042   // Cleanup
3043   CeedCall(CeedDestroy(&ceed));
3044   CeedCall(CeedDestroy(&ceed_parent));
3045   CeedCall(CeedVectorDestroy(&q_data));
3046   CeedCall(CeedElemRestrictionDestroy(&rstr));
3047   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
3048   CeedCall(CeedBasisDestroy(&basis));
3049   CeedCall(CeedBasisDestroy(&fdm_basis));
3050   CeedCall(CeedQFunctionDestroy(&qf));
3051   CeedCall(CeedQFunctionDestroy(&qf_fdm));
3052   return CEED_ERROR_SUCCESS;
3053 }
3054 
3055 /// @}
3056