xref: /libCEED/interface/ceed-preconditioning.c (revision 860dc8215fde8015ce701d5081dc7f665abfde5f)
1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a `CeedQFunction` with a reference `Ceed` to fallback for advanced `CeedOperator` functionality
28 
29   @param[in]  fallback_ceed `Ceed` on which to create fallback `CeedQFunction`
30   @param[in]  qf            `CeedQFunction` to create fallback for
31   @param[out] qf_fallback   Fallback `CeedQFunction`
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   char               *source_path_with_name = NULL;
39   CeedInt             num_input_fields, num_output_fields;
40   CeedQFunctionField *input_fields, *output_fields;
41 
42   // Check if NULL qf passed in
43   if (!qf) return CEED_ERROR_SUCCESS;
44 
45   CeedDebug(CeedQFunctionReturnCeed(qf), "Creating fallback CeedQFunction\n");
46 
47   if (qf->source_path) {
48     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
49 
50     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
51     memcpy(source_path_with_name, qf->source_path, path_len);
52     memcpy(&source_path_with_name[path_len], ":", 1);
53     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
54   } else if (qf->user_source) {
55     CeedCall(CeedStringAllocCopy(qf->user_source, &source_path_with_name));
56   } else {
57     CeedCall(CeedCalloc(1, &source_path_with_name));
58   }
59 
60   {
61     CeedInt           vec_length;
62     CeedQFunctionUser f;
63 
64     CeedCall(CeedQFunctionGetVectorLength(qf, &vec_length));
65     CeedCall(CeedQFunctionGetUserFunction(qf, &f));
66     CeedCall(CeedQFunctionCreateInterior(fallback_ceed, vec_length, f, source_path_with_name, qf_fallback));
67   }
68   {
69     CeedQFunctionContext ctx;
70 
71     CeedCall(CeedQFunctionGetContext(qf, &ctx));
72     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
73     CeedCall(CeedQFunctionContextDestroy(&ctx));
74   }
75   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
76   for (CeedInt i = 0; i < num_input_fields; i++) {
77     const char  *field_name;
78     CeedInt      size;
79     CeedEvalMode eval_mode;
80 
81     CeedCall(CeedQFunctionFieldGetData(input_fields[i], &field_name, &size, &eval_mode));
82     CeedCall(CeedQFunctionAddInput(*qf_fallback, field_name, size, eval_mode));
83   }
84   for (CeedInt i = 0; i < num_output_fields; i++) {
85     const char  *field_name;
86     CeedInt      size;
87     CeedEvalMode eval_mode;
88 
89     CeedCall(CeedQFunctionFieldGetData(output_fields[i], &field_name, &size, &eval_mode));
90     CeedCall(CeedQFunctionAddOutput(*qf_fallback, field_name, size, eval_mode));
91   }
92   CeedCall(CeedFree(&source_path_with_name));
93   return CEED_ERROR_SUCCESS;
94 }
95 
96 /**
97   @brief Duplicate a `CeedOperator` with a reference `Ceed` to fallback for advanced `CeedOperator` functionality
98 
99   @param[in,out] op `CeedOperator` to create fallback for
100 
101   @return An error code: 0 - success, otherwise - failure
102 
103   @ref Developer
104 **/
105 static int CeedOperatorCreateFallback(CeedOperator op) {
106   bool         is_composite;
107   Ceed         ceed, ceed_fallback;
108   CeedOperator op_fallback;
109 
110   // Check not already created
111   if (op->op_fallback) return CEED_ERROR_SUCCESS;
112 
113   // Fallback Ceed
114   CeedCall(CeedOperatorGetCeed(op, &ceed));
115   CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
116   CeedCall(CeedDestroy(&ceed));
117   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
118 
119   CeedDebug(CeedOperatorReturnCeed(op), "Creating fallback CeedOperator\n");
120 
121   // Clone Op
122   CeedCall(CeedOperatorIsComposite(op, &is_composite));
123   if (is_composite) {
124     CeedInt       num_suboperators;
125     CeedOperator *sub_operators;
126 
127     CeedCall(CeedOperatorCreateComposite(ceed_fallback, &op_fallback));
128     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
129     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
130     for (CeedInt i = 0; i < num_suboperators; i++) {
131       CeedOperator op_sub_fallback;
132 
133       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
134       CeedCall(CeedOperatorCompositeAddSub(op_fallback, op_sub_fallback));
135     }
136   } else {
137     bool               is_at_points = false;
138     CeedInt            num_input_fields, num_output_fields;
139     CeedQFunction      qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
140     CeedOperatorField *input_fields, *output_fields;
141 
142     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
143     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
144     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
145     CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
146     if (is_at_points) {
147       CeedVector          points;
148       CeedElemRestriction rstr_points;
149 
150       CeedCall(CeedOperatorCreateAtPoints(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
151       CeedCall(CeedOperatorAtPointsGetPoints(op, &rstr_points, &points));
152       CeedCall(CeedOperatorAtPointsSetPoints(op_fallback, rstr_points, points));
153       CeedCall(CeedVectorDestroy(&points));
154       CeedCall(CeedElemRestrictionDestroy(&rstr_points));
155     } else {
156       CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
157     }
158     CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
159     for (CeedInt i = 0; i < num_input_fields; i++) {
160       const char         *field_name;
161       CeedVector          vec;
162       CeedElemRestriction rstr;
163       CeedBasis           basis;
164 
165       CeedCall(CeedOperatorFieldGetData(input_fields[i], &field_name, &rstr, &basis, &vec));
166       CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec));
167       CeedCall(CeedVectorDestroy(&vec));
168       CeedCall(CeedElemRestrictionDestroy(&rstr));
169       CeedCall(CeedBasisDestroy(&basis));
170     }
171     for (CeedInt i = 0; i < num_output_fields; i++) {
172       const char         *field_name;
173       CeedVector          vec;
174       CeedElemRestriction rstr;
175       CeedBasis           basis;
176 
177       CeedCall(CeedOperatorFieldGetData(output_fields[i], &field_name, &rstr, &basis, &vec));
178       CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec));
179       CeedCall(CeedVectorDestroy(&vec));
180       CeedCall(CeedElemRestrictionDestroy(&rstr));
181       CeedCall(CeedBasisDestroy(&basis));
182     }
183     {
184       CeedQFunctionAssemblyData data;
185 
186       CeedCall(CeedOperatorGetQFunctionAssemblyData(op, &data));
187       CeedCall(CeedQFunctionAssemblyDataReferenceCopy(data, &op_fallback->qf_assembled));
188     }
189     // Cleanup
190     CeedCall(CeedQFunctionDestroy(&qf_fallback));
191     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
192     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
193   }
194   CeedCall(CeedOperatorSetName(op_fallback, op->name));
195   CeedCall(CeedOperatorCheckReady(op_fallback));
196   // Note: No ref-counting here so we don't get caught in a reference loop.
197   //       The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback.
198   op->op_fallback                 = op_fallback;
199   op_fallback->op_fallback_parent = op;
200   CeedCall(CeedDestroy(&ceed_fallback));
201   return CEED_ERROR_SUCCESS;
202 }
203 
204 /**
205   @brief Core logic for assembling operator diagonal or point block diagonal
206 
207   @param[in]  op             `CeedOperator` to assemble diagonal or point block diagonal
208   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
209   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
210   @param[out] assembled      `CeedVector` to store assembled diagonal
211 
212   @return An error code: 0 - success, otherwise - failure
213 
214   @ref Developer
215 **/
216 static inline int CeedOperatorLinearAssembleAddDiagonalSingle_Mesh(CeedOperator op, CeedRequest *request, const bool is_point_block,
217                                                                    CeedVector assembled) {
218   bool is_composite;
219 
220   CeedCall(CeedOperatorIsComposite(op, &is_composite));
221   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
222 
223   // Assemble QFunction
224   CeedInt             layout_qf[3];
225   const CeedScalar   *assembled_qf_array;
226   CeedVector          assembled_qf        = NULL;
227   CeedElemRestriction assembled_elem_rstr = NULL;
228 
229   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
230   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf));
231   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
232   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
233 
234   // Get assembly data
235   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
236   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
237   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
238   CeedBasis               *active_bases_in, *active_bases_out;
239   CeedElemRestriction     *active_elem_rstrs_in, *active_elem_rstrs_out;
240   CeedOperatorAssemblyData data;
241 
242   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
243   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
244                                                 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out,
245                                                 &num_output_components));
246   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL));
247   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out));
248 
249   // Loop over all active bases (find matching input/output pairs)
250   for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) {
251     CeedInt             b_in, b_out, num_elem, num_nodes, num_qpts, num_comp;
252     bool                has_eval_none = false;
253     CeedScalar         *elem_diag_array, *identity = NULL;
254     CeedVector          elem_diag;
255     CeedElemRestriction diag_elem_rstr;
256 
257     if (num_active_bases_in <= num_active_bases_out) {
258       b_in = b;
259       for (b_out = 0; b_out < num_active_bases_out; b_out++) {
260         if (active_bases_in[b_in] == active_bases_out[b_out]) {
261           break;
262         }
263       }
264       if (b_out == num_active_bases_out) {
265         continue;
266       }  // No matching output basis found
267     } else {
268       b_out = b;
269       for (b_in = 0; b_in < num_active_bases_in; b_in++) {
270         if (active_bases_in[b_in] == active_bases_out[b_out]) {
271           break;
272         }
273       }
274       if (b_in == num_active_bases_in) {
275         continue;
276       }  // No matching output basis found
277     }
278     CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
279               "Cannot assemble operator diagonal with different input and output active element restrictions");
280 
281     // Assemble point block diagonal restriction, if needed
282     if (is_point_block) {
283       CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr));
284     } else {
285       CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr));
286     }
287 
288     // Create diagonal vector
289     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
290 
291     // Assemble element operator diagonals
292     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
293     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
294     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
295     CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes));
296     CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp));
297     if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes;
298     else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts));
299 
300     // Construct identity matrix for basis if required
301     for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) {
302       has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE);
303     }
304     for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) {
305       has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE);
306     }
307     if (has_eval_none) {
308       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
309       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
310     }
311 
312     // Compute the diagonal of B^T D B
313     // Each element
314     for (CeedSize e = 0; e < num_elem; e++) {
315       // Each basis eval mode pair
316       CeedInt      d_out              = 0, q_comp_out;
317       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
318 
319       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) {
320         CeedInt           d_in              = 0, q_comp_in;
321         const CeedScalar *B_t               = NULL;
322         CeedEvalMode      eval_mode_in_prev = CEED_EVAL_NONE;
323 
324         CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t));
325         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out));
326         if (q_comp_out > 1) {
327           if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0;
328           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
329         }
330         eval_mode_out_prev = eval_modes_out[b_out][e_out];
331 
332         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) {
333           const CeedScalar *B = NULL;
334 
335           CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B));
336           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in));
337           if (q_comp_in > 1) {
338             if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0;
339             else B = &B[(++d_in) * num_qpts * num_nodes];
340           }
341           eval_mode_in_prev = eval_modes_in[b_in][e_in];
342 
343           // Each component
344           for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
345             // Each qpt/node pair
346             for (CeedInt q = 0; q < num_qpts; q++) {
347               if (is_point_block) {
348                 // Point Block Diagonal
349                 for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
350                   const CeedSize c_offset =
351                       (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
352                   const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
353 
354                   for (CeedInt n = 0; n < num_nodes; n++) {
355                     elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
356                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
357                   }
358                 }
359               } else {
360                 // Diagonal Only
361                 const CeedInt c_offset =
362                     (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
363                 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
364 
365                 for (CeedInt n = 0; n < num_nodes; n++) {
366                   elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
367                 }
368               }
369             }
370           }
371         }
372       }
373     }
374     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
375 
376     // Assemble local operator diagonal
377     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
378 
379     // Cleanup
380     CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
381     CeedCall(CeedVectorDestroy(&elem_diag));
382     CeedCall(CeedFree(&identity));
383   }
384   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
385   CeedCall(CeedVectorDestroy(&assembled_qf));
386   return CEED_ERROR_SUCCESS;
387 }
388 
389 /**
390   @brief Core logic for assembling operator diagonal or point block diagonal
391 
392   @param[in]  op             `CeedOperator` to assemble diagonal or point block diagonal
393   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
394   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
395   @param[out] assembled      `CeedVector` to store assembled diagonal
396 
397   @return An error code: 0 - success, otherwise - failure
398 
399   @ref Developer
400 **/
401 static inline int CeedOperatorLinearAssembleAddDiagonalSingle(CeedOperator op, CeedRequest *request, const bool is_point_block,
402                                                               CeedVector assembled) {
403   bool is_at_points;
404 
405   CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
406   CeedCheck(!is_at_points, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "AtPoints operator not supported");
407   CeedCall(CeedOperatorLinearAssembleAddDiagonalSingle_Mesh(op, request, is_point_block, assembled));
408   return CEED_ERROR_SUCCESS;
409 }
410 
411 /**
412   @brief Core logic for assembling composite operator diagonal
413 
414   @param[in]  op             `CeedOperator` to assemble point block diagonal
415   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
416   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
417   @param[out] assembled      `CeedVector` to store assembled diagonal
418 
419   @return An error code: 0 - success, otherwise - failure
420 
421   @ref Developer
422 **/
423 static inline int CeedOperatorLinearAssembleAddDiagonalComposite(CeedOperator op, CeedRequest *request, const bool is_point_block,
424                                                                  CeedVector assembled) {
425   CeedInt       num_sub;
426   CeedOperator *suboperators;
427 
428   CeedCall(CeedOperatorCompositeGetNumSub(op, &num_sub));
429   CeedCall(CeedOperatorCompositeGetSubList(op, &suboperators));
430   for (CeedInt i = 0; i < num_sub; i++) {
431     if (is_point_block) {
432       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
433     } else {
434       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
435     }
436   }
437   return CEED_ERROR_SUCCESS;
438 }
439 
440 /**
441   @brief Build nonzero pattern for non-composite CeedOperator`.
442 
443   Users should generally use @ref CeedOperatorLinearAssembleSymbolic().
444 
445   @param[in]  op     `CeedOperator` to assemble nonzero pattern
446   @param[in]  offset Offset for number of entries
447   @param[out] rows   Row number for each entry
448   @param[out] cols   Column number for each entry
449 
450   @return An error code: 0 - success, otherwise - failure
451 
452   @ref Developer
453 **/
454 static int CeedOperatorAssembleSymbolicSingle(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
455   Ceed                ceed;
456   bool                is_composite;
457   CeedSize            num_nodes_in, num_nodes_out, local_num_entries, count = 0;
458   CeedInt             num_elem_in, elem_size_in, num_comp_in, layout_er_in[3];
459   CeedInt             num_elem_out, elem_size_out, num_comp_out, layout_er_out[3];
460   CeedScalar         *array;
461   const CeedScalar   *elem_dof_a_in, *elem_dof_a_out;
462   CeedVector          index_vec_in, index_vec_out, elem_dof_in, elem_dof_out;
463   CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out;
464 
465   CeedCall(CeedOperatorIsComposite(op, &is_composite));
466   CeedCall(CeedOperatorGetCeed(op, &ceed));
467   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
468 
469   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out));
470   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
471   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
472   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
473   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
474   CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, layout_er_in));
475 
476   // Determine elem_dof relation for input
477   CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in));
478   CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array));
479   for (CeedSize i = 0; i < num_nodes_in; i++) array[i] = i;
480   CeedCall(CeedVectorRestoreArray(index_vec_in, &array));
481   CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in));
482   CeedCall(CeedVectorSetValue(elem_dof_in, 0.0));
483   CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in));
484   CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE));
485   CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in));
486   CeedCall(CeedVectorDestroy(&index_vec_in));
487   CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in));
488 
489   if (elem_rstr_in != elem_rstr_out) {
490     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
491     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
492               "Active input and output operator restrictions must have the same number of elements."
493               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
494               num_elem_in, num_elem_out);
495     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
496     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
497     CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, layout_er_out));
498 
499     // Determine elem_dof relation for output
500     CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out));
501     CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array));
502     for (CeedSize i = 0; i < num_nodes_out; i++) array[i] = i;
503     CeedCall(CeedVectorRestoreArray(index_vec_out, &array));
504     CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out));
505     CeedCall(CeedVectorSetValue(elem_dof_out, 0.0));
506     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out));
507     CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE));
508     CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out));
509     CeedCall(CeedVectorDestroy(&index_vec_out));
510     CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out));
511   } else {
512     num_elem_out     = num_elem_in;
513     elem_size_out    = elem_size_in;
514     num_comp_out     = num_comp_in;
515     layout_er_out[0] = layout_er_in[0];
516     layout_er_out[1] = layout_er_in[1];
517     layout_er_out[2] = layout_er_in[2];
518     elem_dof_a_out   = elem_dof_a_in;
519   }
520   local_num_entries = (CeedSize)elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
521 
522   // Determine i, j locations for element matrices
523   for (CeedInt e = 0; e < num_elem_in; e++) {
524     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
525       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
526         for (CeedInt i = 0; i < elem_size_out; i++) {
527           for (CeedInt j = 0; j < elem_size_in; j++) {
528             const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2];
529             const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2];
530             const CeedInt row                = elem_dof_a_out[elem_dof_index_row];
531             const CeedInt col                = elem_dof_a_in[elem_dof_index_col];
532 
533             rows[offset + count] = row;
534             cols[offset + count] = col;
535             count++;
536           }
537         }
538       }
539     }
540   }
541   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
542   CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in));
543   CeedCall(CeedVectorDestroy(&elem_dof_in));
544   if (elem_rstr_in != elem_rstr_out) {
545     CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out));
546     CeedCall(CeedVectorDestroy(&elem_dof_out));
547   }
548   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
549   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
550   CeedCall(CeedDestroy(&ceed));
551   return CEED_ERROR_SUCCESS;
552 }
553 
554 /**
555   @brief Core logic to assemble `CeedQFunction` and store result internally.
556 
557   Return copied references of stored data to the caller.
558   Caller is responsible for ownership and destruction of the copied references.
559   See also @ref CeedOperatorLinearAssembleQFunction().
560 
561   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
562         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
563 
564   @param[in]  op         `CeedOperator` to assemble `CeedQFunction`
565   @param[in]  use_parent Boolean flag to check for fallback parent implementation
566   @param[out] assembled  `CeedVector` to store assembled `CeedQFunction` at quadrature points
567   @param[out] rstr       `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
568   @param[in]  request    Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
569 
570   @return An error code: 0 - success, otherwise - failure
571 
572   @ref User
573 **/
574 static int CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(CeedOperator op, bool use_parent, CeedVector *assembled, CeedElemRestriction *rstr,
575                                                                  CeedRequest *request) {
576   int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL;
577   CeedOperator op_assemble                                                                           = NULL;
578   CeedOperator op_fallback_parent                                                                    = NULL;
579 
580   CeedCall(CeedOperatorCheckReady(op));
581 
582   // Determine if fallback parent or operator has implementation
583   CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent));
584   if (op_fallback_parent && use_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) {
585     // -- Backend version for op fallback parent is faster, if it exists
586     CeedDebug(CeedOperatorReturnCeed(op), "Using fallback parent for CeedOperatorLinearAssembleQFunctionBuildOrUpdate\n");
587     LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate;
588     op_assemble                   = op_fallback_parent;
589   } else if (op->LinearAssembleQFunctionUpdate) {
590     // -- Backend version for op
591     LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate;
592     op_assemble                   = op;
593   }
594 
595   // Assemble QFunction
596   if (LinearAssembleQFunctionUpdate) {
597     // Backend or fallback parent version
598     CeedQFunctionAssemblyData data;
599     bool                      data_is_setup;
600     CeedVector                assembled_vec  = NULL;
601     CeedElemRestriction       assembled_rstr = NULL;
602 
603     CeedCall(CeedOperatorGetQFunctionAssemblyData(op, &data));
604     CeedCall(CeedQFunctionAssemblyDataIsSetup(data, &data_is_setup));
605     if (data_is_setup) {
606       bool update_needed;
607 
608       CeedCall(CeedQFunctionAssemblyDataGetObjects(data, &assembled_vec, &assembled_rstr));
609       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(data, &update_needed));
610       if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request));
611     } else {
612       CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request));
613       CeedCall(CeedQFunctionAssemblyDataSetObjects(data, assembled_vec, assembled_rstr));
614     }
615     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(data, false));
616 
617     // Copy reference from internally held copy
618     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
619     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
620     CeedCall(CeedVectorDestroy(&assembled_vec));
621     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
622   } else {
623     // Operator fallback
624     CeedOperator op_fallback;
625 
626     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleQFunctionBuildOrUpdate\n");
627     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
628     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
629     else return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
630   }
631   return CEED_ERROR_SUCCESS;
632 }
633 
634 /**
635   @brief Assemble `CeedQFunction` and store result internally, but do not use fallback parent.
636 
637   Return copied references of stored data to the caller.
638   Caller is responsible for ownership and destruction of the copied references.
639   See also @ref CeedOperatorLinearAssembleQFunction().
640 
641   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
642         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
643 
644   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
645   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
646   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
647   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
648 
649   @return An error code: 0 - success, otherwise - failure
650 
651   @ref Developer
652 **/
653 int CeedOperatorLinearAssembleQFunctionBuildOrUpdateFallback(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr,
654                                                              CeedRequest *request) {
655   return CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(op, false, assembled, rstr, request);
656 }
657 
658 /**
659   @brief Assemble nonzero entries for non-composite `CeedOperator`.
660 
661   Users should generally use @ref CeedOperatorLinearAssemble().
662 
663   @param[in]  op     `CeedOperator` to assemble
664   @param[in]  offset Offset for number of entries
665   @param[out] values Values to assemble into matrix
666 
667   @return An error code: 0 - success, otherwise - failure
668 
669   @ref Developer
670 **/
671 int CeedOperatorAssembleSingle(CeedOperator op, CeedInt offset, CeedVector values) {
672   bool is_composite, is_at_points;
673 
674   CeedCall(CeedOperatorIsComposite(op, &is_composite));
675   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
676 
677   // Early exit for empty operator
678   {
679     CeedInt num_elem = 0;
680 
681     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
682     if (num_elem == 0) return CEED_ERROR_SUCCESS;
683   }
684 
685   if (op->LinearAssembleSingle) {
686     // Backend version
687     CeedCall(op->LinearAssembleSingle(op, offset, values));
688     return CEED_ERROR_SUCCESS;
689   } else {
690     // Operator fallback
691     CeedOperator op_fallback;
692 
693     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorAssembleSingle\n");
694     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
695     if (op_fallback) {
696       CeedCall(CeedOperatorAssembleSingle(op_fallback, offset, values));
697       return CEED_ERROR_SUCCESS;
698     }
699   }
700 
701   CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
702   CeedCheck(!is_at_points, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
703             "Backend does not implement CeedOperatorLinearAssemble for AtPoints operator");
704 
705   // Assemble QFunction
706   CeedInt             layout_qf[3];
707   const CeedScalar   *assembled_qf_array;
708   CeedVector          assembled_qf        = NULL;
709   CeedElemRestriction assembled_elem_rstr = NULL;
710 
711   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE));
712   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf));
713   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
714   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
715 
716   // Get assembly data
717   CeedInt                  num_elem_in, elem_size_in, num_comp_in, num_qpts_in;
718   CeedInt                  num_elem_out, elem_size_out, num_comp_out, num_qpts_out;
719   CeedSize                 local_num_entries, count = 0;
720   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
721   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
722   CeedBasis               *active_bases_in, *active_bases_out, basis_in, basis_out;
723   const CeedScalar       **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out;
724   CeedElemRestriction      elem_rstr_in, elem_rstr_out;
725   CeedRestrictionType      elem_rstr_type_in, elem_rstr_type_out;
726   const bool              *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL;
727   const CeedInt8          *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL;
728   CeedOperatorAssemblyData data;
729 
730   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
731   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out,
732                                                 &num_eval_modes_out, &eval_modes_out, NULL, NULL));
733 
734   CeedCheck(num_active_bases_in == 1 && num_active_bases_out == 1, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
735             "Cannot assemble operator with multiple active bases");
736   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
737             "Cannot assemble operator without inputs/outputs");
738 
739   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out));
740   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
741   basis_in  = active_bases_in[0];
742   basis_out = active_bases_out[0];
743   B_mat_in  = B_mats_in[0];
744   B_mat_out = B_mats_out[0];
745 
746   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
747   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
748   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
749   if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in;
750   else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in));
751 
752   CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in));
753   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
754     CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in));
755   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
756     CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in));
757   }
758 
759   if (elem_rstr_in != elem_rstr_out) {
760     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
761     CeedCheck(num_elem_in == num_elem_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
762               "Active input and output operator restrictions must have the same number of elements."
763               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
764               num_elem_in, num_elem_out);
765     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
766     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
767     if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out;
768     else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out));
769     CeedCheck(num_qpts_in == num_qpts_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
770               "Active input and output bases must have the same number of quadrature points."
771               " Input has %" CeedInt_FMT " points; output has %" CeedInt_FMT "points.",
772               num_qpts_in, num_qpts_out);
773 
774     CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out));
775     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
776       CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out));
777     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
778       CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out));
779     }
780   } else {
781     num_elem_out  = num_elem_in;
782     elem_size_out = elem_size_in;
783     num_comp_out  = num_comp_in;
784     num_qpts_out  = num_qpts_in;
785 
786     elem_rstr_orients_out      = elem_rstr_orients_in;
787     elem_rstr_curl_orients_out = elem_rstr_curl_orients_in;
788   }
789   local_num_entries = (CeedSize)elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
790 
791   // Loop over elements and put in data structure
792   // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
793   CeedTensorContract contract;
794   CeedScalar        *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL;
795 
796   CeedCall(CeedBasisGetTensorContract(basis_in, &contract));
797   CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat));
798   CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat));
799   if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b));
800 
801   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
802   for (CeedSize e = 0; e < num_elem_in; e++) {
803     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
804       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
805         // Compute B^T*D
806         for (CeedSize n = 0; n < elem_size_out; n++) {
807           for (CeedSize q = 0; q < num_qpts_in; q++) {
808             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
809               const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in;
810               CeedScalar     sum       = 0.0;
811 
812               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
813                 const CeedSize b_out_index     = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n;
814                 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out;
815                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
816 
817                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
818               }
819               BTD_mat[btd_index] = sum;
820             }
821           }
822         }
823 
824         // Form element matrix itself (for each block component)
825         if (contract) {
826           CeedCall(CeedTensorContractApply(contract, 1, num_qpts_in * num_eval_modes_in[0], elem_size_in, elem_size_out, BTD_mat, CEED_NOTRANSPOSE,
827                                            false, B_mat_in, elem_mat));
828         } else {
829           Ceed ceed;
830 
831           CeedCall(CeedOperatorGetCeed(op, &ceed));
832           CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size_out, elem_size_in, num_qpts_in * num_eval_modes_in[0]));
833           CeedCall(CeedDestroy(&ceed));
834         }
835 
836         // Transform the element matrix if required
837         if (elem_rstr_orients_out) {
838           const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out];
839 
840           for (CeedInt i = 0; i < elem_size_out; i++) {
841             const double orient = elem_orients[i] ? -1.0 : 1.0;
842 
843             for (CeedInt j = 0; j < elem_size_in; j++) {
844               elem_mat[i * elem_size_in + j] *= orient;
845             }
846           }
847         } else if (elem_rstr_curl_orients_out) {
848           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out];
849 
850           // T^T*(B^T*D*B)
851           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
852           for (CeedInt i = 0; i < elem_size_out; i++) {
853             for (CeedInt j = 0; j < elem_size_in; j++) {
854               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] +
855                                                (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) +
856                                                (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0);
857             }
858           }
859         }
860         if (elem_rstr_orients_in) {
861           const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in];
862 
863           for (CeedInt i = 0; i < elem_size_out; i++) {
864             for (CeedInt j = 0; j < elem_size_in; j++) {
865               elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0;
866             }
867           }
868         } else if (elem_rstr_curl_orients_in) {
869           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in];
870 
871           // (B^T*D*B)*T
872           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
873           for (CeedInt i = 0; i < elem_size_out; i++) {
874             for (CeedInt j = 0; j < elem_size_in; j++) {
875               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] +
876                                                (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) +
877                                                (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0);
878             }
879           }
880         }
881 
882         // Put element matrix in coordinate data structure
883         for (CeedInt i = 0; i < elem_size_out; i++) {
884           for (CeedInt j = 0; j < elem_size_in; j++) {
885             vals[offset + count] = elem_mat[i * elem_size_in + j];
886             count++;
887           }
888         }
889       }
890     }
891   }
892   CeedCheck(count == local_num_entries, CeedOperatorReturnCeed(op), CEED_ERROR_MAJOR, "Error computing entries");
893   CeedCall(CeedVectorRestoreArray(values, &vals));
894 
895   // Cleanup
896   CeedCall(CeedFree(&BTD_mat));
897   CeedCall(CeedFree(&elem_mat));
898   CeedCall(CeedFree(&elem_mat_b));
899   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
900     CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in));
901   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
902     CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in));
903   }
904   if (elem_rstr_in != elem_rstr_out) {
905     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
906       CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out));
907     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
908       CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out));
909     }
910   }
911   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
912   CeedCall(CeedVectorDestroy(&assembled_qf));
913   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
914   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
915   return CEED_ERROR_SUCCESS;
916 }
917 
918 /**
919   @brief Count number of entries for assembled `CeedOperator`
920 
921   @param[in]  op          `CeedOperator` to assemble
922   @param[out] num_entries Number of entries in assembled representation
923 
924   @return An error code: 0 - success, otherwise - failure
925 
926   @ref Utility
927 **/
928 static int CeedOperatorAssemblyCountEntriesSingle(CeedOperator op, CeedSize *num_entries) {
929   bool                is_composite;
930   CeedInt             num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out;
931   CeedElemRestriction rstr_in, rstr_out;
932 
933   CeedCall(CeedOperatorIsComposite(op, &is_composite));
934   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
935 
936   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out));
937   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in));
938   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in));
939   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
940   if (rstr_in != rstr_out) {
941     CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out));
942     CeedCheck(num_elem_in == num_elem_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
943               "Active input and output operator restrictions must have the same number of elements."
944               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
945               num_elem_in, num_elem_out);
946     CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out));
947     CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
948   } else {
949     num_elem_out  = num_elem_in;
950     elem_size_out = elem_size_in;
951     num_comp_out  = num_comp_in;
952   }
953   CeedCall(CeedElemRestrictionDestroy(&rstr_in));
954   CeedCall(CeedElemRestrictionDestroy(&rstr_out));
955   *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in;
956   return CEED_ERROR_SUCCESS;
957 }
958 
959 /**
960   @brief Count number of entries for assembled `CeedOperator`
961 
962   @param[in]  op          `CeedOperator` to assemble
963   @param[out] num_entries Number of entries in assembled representation
964 
965   @return An error code: 0 - success, otherwise - failure
966 
967   @ref Utility
968 **/
969 int CeedOperatorLinearAssembleGetNumEntries(CeedOperator op, CeedSize *num_entries) {
970   bool is_composite;
971 
972   CeedCall(CeedOperatorCheckReady(op));
973   CeedCall(CeedOperatorIsComposite(op, &is_composite));
974 
975   if (is_composite) {
976     CeedInt       num_suboperators;
977     CeedOperator *sub_operators;
978 
979     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
980     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
981 
982     *num_entries = 0;
983     for (CeedInt k = 0; k < num_suboperators; ++k) {
984       CeedSize single_entries;
985 
986       CeedCall(CeedOperatorAssemblyCountEntriesSingle(sub_operators[k], &single_entries));
987       *num_entries += single_entries;
988     }
989   } else {
990     CeedCall(CeedOperatorAssemblyCountEntriesSingle(op, num_entries));
991   }
992   return CEED_ERROR_SUCCESS;
993 }
994 
995 /**
996   @brief Common code for creating a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator`
997 
998   @param[in]  op_fine      Fine grid `CeedOperator`
999   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
1000   @param[in]  rstr_coarse  Coarse grid `CeedElemRestriction`
1001   @param[in]  basis_coarse Coarse grid active vector `CeedBasis`
1002   @param[in]  basis_c_to_f `CeedBasis` for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction operators
1003   @param[out] op_coarse    Coarse grid `CeedOperator`
1004   @param[out] op_prolong   Coarse to fine `CeedOperator`, or `NULL`
1005   @param[out] op_restrict  Fine to coarse `CeedOperator`, or `NULL`
1006 
1007   @return An error code: 0 - success, otherwise - failure
1008 
1009   @ref Developer
1010 **/
1011 static int CeedOperatorMultigridLevelCreateSingle_Core(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse,
1012                                                        CeedBasis basis_coarse, CeedBasis basis_c_to_f, CeedOperator *op_coarse,
1013                                                        CeedOperator *op_prolong, CeedOperator *op_restrict) {
1014   bool                is_composite;
1015   Ceed                ceed;
1016   CeedInt             num_comp, num_input_fields, num_output_fields;
1017   CeedVector          mult_vec         = NULL;
1018   CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL;
1019   CeedOperatorField  *input_fields, *output_fields;
1020 
1021   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1022 
1023   // Check for composite operator
1024   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
1025   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
1026 
1027   // Coarse Grid
1028   {
1029     bool is_at_points;
1030 
1031     CeedCall(CeedOperatorIsAtPoints(op_fine, &is_at_points));
1032     if (is_at_points) {
1033       CeedVector          point_coords;
1034       CeedElemRestriction rstr_points;
1035 
1036       CeedCall(CeedOperatorCreateAtPoints(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
1037       CeedCall(CeedOperatorAtPointsGetPoints(op_fine, &rstr_points, &point_coords));
1038       CeedCall(CeedOperatorAtPointsSetPoints(*op_coarse, rstr_points, point_coords));
1039       CeedCall(CeedVectorDestroy(&point_coords));
1040       CeedCall(CeedElemRestrictionDestroy(&rstr_points));
1041     } else {
1042       CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
1043     }
1044   }
1045   CeedCall(CeedOperatorGetFields(op_fine, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
1046   // -- Clone input fields
1047   for (CeedInt i = 0; i < num_input_fields; i++) {
1048     const char         *field_name;
1049     CeedVector          vec;
1050     CeedElemRestriction rstr  = NULL;
1051     CeedBasis           basis = NULL;
1052 
1053     CeedCall(CeedOperatorFieldGetName(input_fields[i], &field_name));
1054     CeedCall(CeedOperatorFieldGetVector(input_fields[i], &vec));
1055     if (vec == CEED_VECTOR_ACTIVE) {
1056       CeedCall(CeedElemRestrictionReferenceCopy(rstr_coarse, &rstr));
1057       CeedCall(CeedBasisReferenceCopy(basis_coarse, &basis));
1058       if (!rstr_fine) CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_fine));
1059     } else {
1060       CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr));
1061       CeedCall(CeedOperatorFieldGetBasis(input_fields[i], &basis));
1062     }
1063     CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec));
1064     CeedCall(CeedVectorDestroy(&vec));
1065     CeedCall(CeedElemRestrictionDestroy(&rstr));
1066     CeedCall(CeedBasisDestroy(&basis));
1067   }
1068   // -- Clone output fields
1069   for (CeedInt i = 0; i < num_output_fields; i++) {
1070     const char         *field_name;
1071     CeedVector          vec;
1072     CeedElemRestriction rstr  = NULL;
1073     CeedBasis           basis = NULL;
1074 
1075     CeedCall(CeedOperatorFieldGetName(output_fields[i], &field_name));
1076     CeedCall(CeedOperatorFieldGetVector(output_fields[i], &vec));
1077     if (vec == CEED_VECTOR_ACTIVE) {
1078       CeedCall(CeedElemRestrictionReferenceCopy(rstr_coarse, &rstr));
1079       CeedCall(CeedBasisReferenceCopy(basis_coarse, &basis));
1080       if (!rstr_fine) CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_fine));
1081     } else {
1082       CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr));
1083       CeedCall(CeedOperatorFieldGetBasis(output_fields[i], &basis));
1084     }
1085     CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec));
1086     CeedCall(CeedVectorDestroy(&vec));
1087     CeedCall(CeedElemRestrictionDestroy(&rstr));
1088     CeedCall(CeedBasisDestroy(&basis));
1089   }
1090   // -- Clone QFunctionAssemblyData
1091   {
1092     CeedQFunctionAssemblyData fine_data;
1093 
1094     CeedCall(CeedOperatorGetQFunctionAssemblyData(op_fine, &fine_data));
1095     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(fine_data, &(*op_coarse)->qf_assembled));
1096   }
1097 
1098   // Multiplicity vector
1099   if (op_restrict || op_prolong) {
1100     CeedVector          mult_e_vec;
1101     CeedRestrictionType rstr_type;
1102 
1103     CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type));
1104     CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED,
1105               "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported");
1106     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
1107     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
1108     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
1109     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
1110     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
1111     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
1112     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
1113     CeedCall(CeedVectorDestroy(&mult_e_vec));
1114     CeedCall(CeedVectorReciprocal(mult_vec));
1115   }
1116 
1117   // Clone name
1118   bool   has_name = op_fine->name;
1119   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
1120   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
1121 
1122   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
1123   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
1124             "Prolongation or restriction operator creation requires coarse-to-fine basis");
1125 
1126   // Restriction/Prolongation Operators
1127   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
1128 
1129   // Restriction
1130   if (op_restrict) {
1131     CeedInt             *num_comp_r_data;
1132     CeedQFunctionContext ctx_r;
1133     CeedQFunction        qf_restrict;
1134 
1135     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
1136     CeedCall(CeedCalloc(1, &num_comp_r_data));
1137     num_comp_r_data[0] = num_comp;
1138     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
1139     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
1140     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
1141     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
1142     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
1143     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
1144     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
1145     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
1146 
1147     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
1148     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
1149     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
1150     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
1151 
1152     // Set name
1153     char *restriction_name;
1154 
1155     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
1156     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
1157     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
1158     CeedCall(CeedFree(&restriction_name));
1159 
1160     // Check
1161     CeedCall(CeedOperatorCheckReady(*op_restrict));
1162 
1163     // Cleanup
1164     CeedCall(CeedQFunctionDestroy(&qf_restrict));
1165   }
1166 
1167   // Prolongation
1168   if (op_prolong) {
1169     CeedInt             *num_comp_p_data;
1170     CeedQFunctionContext ctx_p;
1171     CeedQFunction        qf_prolong;
1172 
1173     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
1174     CeedCall(CeedCalloc(1, &num_comp_p_data));
1175     num_comp_p_data[0] = num_comp;
1176     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
1177     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
1178     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
1179     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
1180     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
1181     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
1182     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
1183     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
1184 
1185     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
1186     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
1187     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
1188     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
1189 
1190     // Set name
1191     char *prolongation_name;
1192 
1193     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
1194     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
1195     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
1196     CeedCall(CeedFree(&prolongation_name));
1197 
1198     // Check
1199     CeedCall(CeedOperatorCheckReady(*op_prolong));
1200 
1201     // Cleanup
1202     CeedCall(CeedQFunctionDestroy(&qf_prolong));
1203   }
1204 
1205   // Check
1206   CeedCall(CeedOperatorCheckReady(*op_coarse));
1207 
1208   // Cleanup
1209   CeedCall(CeedDestroy(&ceed));
1210   CeedCall(CeedVectorDestroy(&mult_vec));
1211   CeedCall(CeedElemRestrictionDestroy(&rstr_fine));
1212   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
1213   CeedCall(CeedBasisDestroy(&basis_c_to_f));
1214   return CEED_ERROR_SUCCESS;
1215 }
1216 
1217 /**
1218   @brief Build 1D mass matrix and Laplacian with perturbation
1219 
1220   @param[in]  interp_1d   Interpolation matrix in one dimension
1221   @param[in]  grad_1d     Gradient matrix in one dimension
1222   @param[in]  q_weight_1d Quadrature weights in one dimension
1223   @param[in]  P_1d        Number of basis nodes in one dimension
1224   @param[in]  Q_1d        Number of quadrature points in one dimension
1225   @param[in]  dim         Dimension of basis
1226   @param[out] mass        Assembled mass matrix in one dimension
1227   @param[out] laplace     Assembled perturbed Laplacian in one dimension
1228 
1229   @return An error code: 0 - success, otherwise - failure
1230 
1231   @ref Developer
1232 **/
1233 CeedPragmaOptimizeOff
1234 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
1235                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
1236   for (CeedInt i = 0; i < P_1d; i++) {
1237     for (CeedInt j = 0; j < P_1d; j++) {
1238       CeedScalar sum = 0.0;
1239       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
1240       mass[i + j * P_1d] = sum;
1241     }
1242   }
1243   // -- Laplacian
1244   for (CeedInt i = 0; i < P_1d; i++) {
1245     for (CeedInt j = 0; j < P_1d; j++) {
1246       CeedScalar sum = 0.0;
1247 
1248       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
1249       laplace[i + j * P_1d] = sum;
1250     }
1251   }
1252   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
1253   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
1254   return CEED_ERROR_SUCCESS;
1255 }
1256 CeedPragmaOptimizeOn
1257 
1258 /// @}
1259 
1260 /// ----------------------------------------------------------------------------
1261 /// CeedOperator Backend API
1262 /// ----------------------------------------------------------------------------
1263 /// @addtogroup CeedOperatorBackend
1264 /// @{
1265 
1266 /**
1267   @brief Select correct basis matrix pointer based on @ref CeedEvalMode
1268 
1269   @param[in]  basis     `CeedBasis` from which to get the basis matrix
1270   @param[in]  eval_mode Current basis evaluation mode
1271   @param[in]  identity  Pointer to identity matrix
1272   @param[out] basis_ptr `CeedBasis` pointer to set
1273 
1274   @ref Backend
1275 **/
1276 int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
1277   switch (eval_mode) {
1278     case CEED_EVAL_NONE:
1279       *basis_ptr = identity;
1280       break;
1281     case CEED_EVAL_INTERP:
1282       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
1283       break;
1284     case CEED_EVAL_GRAD:
1285       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
1286       break;
1287     case CEED_EVAL_DIV:
1288       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
1289       break;
1290     case CEED_EVAL_CURL:
1291       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
1292       break;
1293     case CEED_EVAL_WEIGHT:
1294       break;  // Caught by QF Assembly
1295   }
1296   assert(*basis_ptr != NULL);
1297   return CEED_ERROR_SUCCESS;
1298 }
1299 
1300 /**
1301   @brief Create point block restriction for active `CeedOperatorField`
1302 
1303   @param[in]  rstr             Original `CeedElemRestriction` for active field
1304   @param[out] point_block_rstr Address of the variable where the newly created `CeedElemRestriction` will be stored
1305 
1306   @return An error code: 0 - success, otherwise - failure
1307 
1308   @ref Backend
1309 **/
1310 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) {
1311   Ceed           ceed;
1312   CeedInt        num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets;
1313   CeedSize       l_size;
1314   const CeedInt *offsets;
1315 
1316   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
1317   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
1318 
1319   // Expand offsets
1320   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
1321   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
1322   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
1323   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
1324   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
1325   shift = num_comp;
1326   if (comp_stride != 1) shift *= num_comp;
1327   CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets));
1328   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
1329     point_block_offsets[i] = offsets[i] * shift;
1330   }
1331 
1332   // Create new restriction
1333   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
1334                                      point_block_offsets, point_block_rstr));
1335 
1336   // Cleanup
1337   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
1338   CeedCall(CeedDestroy(&ceed));
1339   return CEED_ERROR_SUCCESS;
1340 }
1341 
1342 /**
1343   @brief Get `CeedQFunctionAssemblyData`
1344 
1345   @param[in]  op   `CeedOperator` to assemble
1346   @param[out] data `CeedQFunctionAssemblyData`
1347 
1348   @return An error code: 0 - success, otherwise - failure
1349 
1350   @ref Backend
1351 **/
1352 int CeedOperatorGetQFunctionAssemblyData(CeedOperator op, CeedQFunctionAssemblyData *data) {
1353   if (!op->qf_assembled) {
1354     CeedQFunctionAssemblyData data;
1355 
1356     CeedCall(CeedQFunctionAssemblyDataCreate(CeedOperatorReturnCeed(op), &data));
1357     op->qf_assembled = data;
1358   }
1359   *data = op->qf_assembled;
1360   return CEED_ERROR_SUCCESS;
1361 }
1362 
1363 /**
1364   @brief Create object holding `CeedQFunction` assembly data for `CeedOperator`
1365 
1366   @param[in]  ceed `Ceed` object used to create the `CeedQFunctionAssemblyData`
1367   @param[out] data Address of the variable where the newly created `CeedQFunctionAssemblyData` will be stored
1368 
1369   @return An error code: 0 - success, otherwise - failure
1370 
1371   @ref Backend
1372 **/
1373 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
1374   CeedCall(CeedCalloc(1, data));
1375   (*data)->ref_count = 1;
1376   CeedCall(CeedReferenceCopy(ceed, &(*data)->ceed));
1377   return CEED_ERROR_SUCCESS;
1378 }
1379 
1380 /**
1381   @brief Increment the reference counter for a `CeedQFunctionAssemblyData`
1382 
1383   @param[in,out] data `CeedQFunctionAssemblyData` to increment the reference counter
1384 
1385   @return An error code: 0 - success, otherwise - failure
1386 
1387   @ref Backend
1388 **/
1389 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1390   data->ref_count++;
1391   return CEED_ERROR_SUCCESS;
1392 }
1393 
1394 /**
1395   @brief Set re-use of `CeedQFunctionAssemblyData`
1396 
1397   @param[in,out] data       `CeedQFunctionAssemblyData` to mark for reuse
1398   @param[in]     reuse_data Boolean flag indicating data re-use
1399 
1400   @return An error code: 0 - success, otherwise - failure
1401 
1402   @ref Backend
1403 **/
1404 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
1405   data->reuse_data        = reuse_data;
1406   data->needs_data_update = true;
1407   return CEED_ERROR_SUCCESS;
1408 }
1409 
1410 /**
1411   @brief Mark `CeedQFunctionAssemblyData` as stale
1412 
1413   @param[in,out] data              `CeedQFunctionAssemblyData` to mark as stale
1414   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
1415 
1416   @return An error code: 0 - success, otherwise - failure
1417 
1418   @ref Backend
1419 **/
1420 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
1421   data->needs_data_update = needs_data_update;
1422   return CEED_ERROR_SUCCESS;
1423 }
1424 
1425 /**
1426   @brief Determine if `CeedQFunctionAssemblyData` needs update
1427 
1428   @param[in]  data             `CeedQFunctionAssemblyData` to mark as stale
1429   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1430 
1431   @return An error code: 0 - success, otherwise - failure
1432 
1433   @ref Backend
1434 **/
1435 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1436   *is_update_needed = !data->reuse_data || data->needs_data_update;
1437   return CEED_ERROR_SUCCESS;
1438 }
1439 
1440 /**
1441   @brief Copy the pointer to a `CeedQFunctionAssemblyData`.
1442 
1443   Both pointers should be destroyed with @ref CeedQFunctionAssemblyDataDestroy().
1444 
1445   Note: If the value of ` *data_copy` passed to this function is non-`NULL` , then it is assumed that ` *data_copy` is a pointer to a `CeedQFunctionAssemblyData`.
1446         This `CeedQFunctionAssemblyData` will be destroyed if ` *data_copy` is the only reference to this `CeedQFunctionAssemblyData`.
1447 
1448   @param[in]     data      `CeedQFunctionAssemblyData` to copy reference to
1449   @param[in,out] data_copy Variable to store copied reference
1450 
1451   @return An error code: 0 - success, otherwise - failure
1452 
1453   @ref Backend
1454 **/
1455 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1456   CeedCall(CeedQFunctionAssemblyDataReference(data));
1457   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1458   *data_copy = data;
1459   return CEED_ERROR_SUCCESS;
1460 }
1461 
1462 /**
1463   @brief Get setup status for internal objects for `CeedQFunctionAssemblyData`
1464 
1465   @param[in]  data     `CeedQFunctionAssemblyData` to retrieve status
1466   @param[out] is_setup Boolean flag for setup status
1467 
1468   @return An error code: 0 - success, otherwise - failure
1469 
1470   @ref Backend
1471 **/
1472 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1473   *is_setup = data->is_setup;
1474   return CEED_ERROR_SUCCESS;
1475 }
1476 
1477 /**
1478   @brief Set internal objects for `CeedQFunctionAssemblyData`
1479 
1480   @param[in,out] data `CeedQFunctionAssemblyData` to set objects
1481   @param[in]     vec  `CeedVector` to store assembled `CeedQFunction` at quadrature points
1482   @param[in]     rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
1483 
1484   @return An error code: 0 - success, otherwise - failure
1485 
1486   @ref Backend
1487 **/
1488 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1489   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1490   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1491 
1492   data->is_setup = true;
1493   return CEED_ERROR_SUCCESS;
1494 }
1495 
1496 /**
1497   @brief Get internal objects for `CeedQFunctionAssemblyData`
1498 
1499   @param[in,out] data `CeedQFunctionAssemblyData` to set objects
1500   @param[out]    vec  `CeedVector` to store assembled `CeedQFunction` at quadrature points
1501   @param[out]    rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
1502 
1503   @return An error code: 0 - success, otherwise - failure
1504 
1505   @ref Backend
1506 **/
1507 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1508   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1509 
1510   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1511   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1512   return CEED_ERROR_SUCCESS;
1513 }
1514 
1515 /**
1516   @brief Destroy `CeedQFunctionAssemblyData`
1517 
1518   @param[in,out] data  `CeedQFunctionAssemblyData` to destroy
1519 
1520   @return An error code: 0 - success, otherwise - failure
1521 
1522   @ref Backend
1523 **/
1524 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1525   if (!*data || --(*data)->ref_count > 0) {
1526     *data = NULL;
1527     return CEED_ERROR_SUCCESS;
1528   }
1529   CeedCall(CeedDestroy(&(*data)->ceed));
1530   CeedCall(CeedVectorDestroy(&(*data)->vec));
1531   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1532 
1533   CeedCall(CeedFree(data));
1534   return CEED_ERROR_SUCCESS;
1535 }
1536 
1537 /**
1538   @brief Get `CeedOperatorAssemblyData`
1539 
1540   @param[in]  op   `CeedOperator` to assemble
1541   @param[out] data `CeedOperatorAssemblyData`
1542 
1543   @return An error code: 0 - success, otherwise - failure
1544 
1545   @ref Backend
1546 **/
1547 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1548   if (!op->op_assembled) {
1549     CeedOperatorAssemblyData data;
1550 
1551     CeedCall(CeedOperatorAssemblyDataCreate(CeedOperatorReturnCeed(op), op, &data));
1552     op->op_assembled = data;
1553   }
1554   *data = op->op_assembled;
1555   return CEED_ERROR_SUCCESS;
1556 }
1557 
1558 /**
1559   @brief Create object holding `CeedOperator` assembly data.
1560 
1561   The `CeedOperatorAssemblyData` holds an array with references to every active `CeedBasis` used in the `CeedOperator`.
1562   An array with references to the corresponding active `CeedElemRestriction` is also stored.
1563   For each active `CeedBasis, the `CeedOperatorAssemblyData` holds an array of all input and output @ref CeedEvalMode for this `CeedBasis`.
1564   The `CeedOperatorAssemblyData` holds an array of offsets for indexing into the assembled `CeedQFunction` arrays to the row representing each @ref CeedEvalMode.
1565   The number of input columns across all active bases for the assembled `CeedQFunction` is also stored.
1566   Lastly, the `CeedOperatorAssembly` data holds assembled matrices representing the full action of the `CeedBasis` for all @ref CeedEvalMode.
1567 
1568   @param[in]  ceed `Ceed` object used to create the `CeedOperatorAssemblyData`
1569   @param[in]  op   `CeedOperator` to be assembled
1570   @param[out] data Address of the variable where the newly created `CeedOperatorAssemblyData` will be stored
1571 
1572   @return An error code: 0 - success, otherwise - failure
1573 
1574   @ref Backend
1575 **/
1576 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1577   CeedInt             num_active_bases_in = 0, num_active_bases_out = 0, offset = 0;
1578   CeedInt             num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL;
1579   CeedSize          **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1580   CeedEvalMode      **eval_modes_in = NULL, **eval_modes_out = NULL;
1581   CeedQFunctionField *qf_fields;
1582   CeedQFunction       qf;
1583   CeedOperatorField  *op_fields;
1584   bool                is_composite;
1585 
1586   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1587   CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators.");
1588 
1589   // Allocate
1590   CeedCall(CeedCalloc(1, data));
1591   CeedCall(CeedReferenceCopy(ceed, &(*data)->ceed));
1592 
1593   // Build OperatorAssembly data
1594   CeedCall(CeedOperatorGetQFunction(op, &qf));
1595 
1596   // Determine active input basis
1597   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1598   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1599   for (CeedInt i = 0; i < num_input_fields; i++) {
1600     CeedVector vec;
1601 
1602     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1603     if (vec == CEED_VECTOR_ACTIVE) {
1604       CeedInt      index = -1, num_comp, q_comp;
1605       CeedEvalMode eval_mode;
1606       CeedBasis    basis_in = NULL;
1607 
1608       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1609       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1610       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1611       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1612       for (CeedInt i = 0; i < num_active_bases_in; i++) {
1613         if ((*data)->active_bases_in[i] == basis_in) index = i;
1614       }
1615       if (index == -1) {
1616         CeedElemRestriction elem_rstr_in;
1617 
1618         index = num_active_bases_in;
1619         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in));
1620         (*data)->active_bases_in[num_active_bases_in] = NULL;
1621         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in]));
1622         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in));
1623         (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL;
1624         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1625         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in]));
1626         CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
1627         CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in));
1628         num_eval_modes_in[index] = 0;
1629         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in));
1630         eval_modes_in[index] = NULL;
1631         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in));
1632         eval_mode_offsets_in[index] = NULL;
1633         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in));
1634         (*data)->assembled_bases_in[index] = NULL;
1635         num_active_bases_in++;
1636       }
1637       if (eval_mode != CEED_EVAL_WEIGHT) {
1638         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1639         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1640         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1641         for (CeedInt d = 0; d < q_comp; d++) {
1642           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1643           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1644           offset += num_comp;
1645         }
1646         num_eval_modes_in[index] += q_comp;
1647       }
1648       CeedCall(CeedBasisDestroy(&basis_in));
1649     }
1650     CeedCall(CeedVectorDestroy(&vec));
1651   }
1652 
1653   // Determine active output basis
1654   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1655   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1656   offset = 0;
1657   for (CeedInt i = 0; i < num_output_fields; i++) {
1658     CeedVector vec;
1659 
1660     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1661     if (vec == CEED_VECTOR_ACTIVE) {
1662       CeedInt      index = -1, num_comp, q_comp;
1663       CeedEvalMode eval_mode;
1664       CeedBasis    basis_out = NULL;
1665 
1666       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1667       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1668       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1669       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1670       for (CeedInt i = 0; i < num_active_bases_out; i++) {
1671         if ((*data)->active_bases_out[i] == basis_out) index = i;
1672       }
1673       if (index == -1) {
1674         CeedElemRestriction elem_rstr_out;
1675 
1676         index = num_active_bases_out;
1677         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out));
1678         (*data)->active_bases_out[num_active_bases_out] = NULL;
1679         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out]));
1680         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out));
1681         (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL;
1682         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1683         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out]));
1684         CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
1685         CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out));
1686         num_eval_modes_out[index] = 0;
1687         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out));
1688         eval_modes_out[index] = NULL;
1689         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out));
1690         eval_mode_offsets_out[index] = NULL;
1691         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out));
1692         (*data)->assembled_bases_out[index] = NULL;
1693         num_active_bases_out++;
1694       }
1695       if (eval_mode != CEED_EVAL_WEIGHT) {
1696         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1697         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1698         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1699         for (CeedInt d = 0; d < q_comp; d++) {
1700           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1701           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1702           offset += num_comp;
1703         }
1704         num_eval_modes_out[index] += q_comp;
1705       }
1706       CeedCall(CeedBasisDestroy(&basis_out));
1707     }
1708     CeedCall(CeedVectorDestroy(&vec));
1709   }
1710   CeedCall(CeedQFunctionDestroy(&qf));
1711   (*data)->num_active_bases_in   = num_active_bases_in;
1712   (*data)->num_eval_modes_in     = num_eval_modes_in;
1713   (*data)->eval_modes_in         = eval_modes_in;
1714   (*data)->eval_mode_offsets_in  = eval_mode_offsets_in;
1715   (*data)->num_active_bases_out  = num_active_bases_out;
1716   (*data)->num_eval_modes_out    = num_eval_modes_out;
1717   (*data)->eval_modes_out        = eval_modes_out;
1718   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1719   (*data)->num_output_components = offset;
1720   return CEED_ERROR_SUCCESS;
1721 }
1722 
1723 /**
1724   @brief Get `CeedOperator` @ref CeedEvalMode for assembly.
1725 
1726   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1727 
1728   @param[in]  data                  `CeedOperatorAssemblyData`
1729   @param[out] num_active_bases_in   Total number of active bases for input
1730   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input @ref CeedEvalMode, or `NULL`.
1731                                       `eval_modes_in[0]` holds an array of eval modes for the first active `CeedBasis`.
1732   @param[out] eval_modes_in         Pointer to hold arrays of input @ref CeedEvalMode, or `NULL`
1733   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point
1734   @param[out] num_active_bases_out  Total number of active bases for output
1735   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output @ref CeedEvalMode, or `NULL`
1736   @param[out] eval_modes_out        Pointer to hold arrays of output @ref CeedEvalMode, or `NULL`
1737   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1738   @param[out] num_output_components The number of columns in the assembled `CeedQFunction` matrix for each quadrature point, including contributions of all active bases
1739 
1740   @return An error code: 0 - success, otherwise - failure
1741 
1742   @ref Backend
1743 **/
1744 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in,
1745                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out,
1746                                          CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out,
1747                                          CeedSize *num_output_components) {
1748   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1749   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1750   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1751   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1752   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1753   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1754   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1755   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1756   if (num_output_components) *num_output_components = data->num_output_components;
1757   return CEED_ERROR_SUCCESS;
1758 }
1759 
1760 /**
1761   @brief Get `CeedOperator` `CeedBasis` data for assembly.
1762 
1763   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1764 
1765   @param[in]  data                 `CeedOperatorAssemblyData`
1766   @param[out] num_active_bases_in  Number of active input bases, or `NULL`
1767   @param[out] active_bases_in      Pointer to hold active input `CeedBasis`, or `NULL`
1768   @param[out] assembled_bases_in   Pointer to hold assembled active input `B` , or `NULL`
1769   @param[out] num_active_bases_out Number of active output bases, or `NULL`
1770   @param[out] active_bases_out     Pointer to hold active output `CeedBasis`, or `NULL`
1771   @param[out] assembled_bases_out  Pointer to hold assembled active output `B` , or `NULL`
1772 
1773   @return An error code: 0 - success, otherwise - failure
1774 
1775   @ref Backend
1776 **/
1777 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in,
1778                                      const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out,
1779                                      const CeedScalar ***assembled_bases_out) {
1780   // Assemble B_in, B_out if needed
1781   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1782     CeedInt num_qpts;
1783 
1784     if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts));
1785     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts));
1786     for (CeedInt b = 0; b < data->num_active_bases_in; b++) {
1787       bool        has_eval_none = false;
1788       CeedInt     num_nodes;
1789       CeedScalar *B_in = NULL, *identity = NULL;
1790 
1791       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes));
1792       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1793 
1794       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1795         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1796       }
1797       if (has_eval_none) {
1798         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1799         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1800           identity[i * num_nodes + i] = 1.0;
1801         }
1802       }
1803 
1804       for (CeedInt q = 0; q < num_qpts; q++) {
1805         for (CeedInt n = 0; n < num_nodes; n++) {
1806           CeedInt      d_in              = 0, q_comp_in;
1807           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1808 
1809           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1810             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1811             const CeedScalar *B  = NULL;
1812 
1813             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B));
1814             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in));
1815             if (q_comp_in > 1) {
1816               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1817               else B = &B[(++d_in) * num_qpts * num_nodes];
1818             }
1819             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1820             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1821           }
1822         }
1823       }
1824       if (identity) CeedCall(CeedFree(&identity));
1825       data->assembled_bases_in[b] = B_in;
1826     }
1827   }
1828 
1829   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1830     CeedInt num_qpts;
1831 
1832     if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts));
1833     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts));
1834     for (CeedInt b = 0; b < data->num_active_bases_out; b++) {
1835       bool        has_eval_none = false;
1836       CeedInt     num_nodes;
1837       CeedScalar *B_out = NULL, *identity = NULL;
1838 
1839       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes));
1840       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1841 
1842       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1843         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1844       }
1845       if (has_eval_none) {
1846         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1847         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1848           identity[i * num_nodes + i] = 1.0;
1849         }
1850       }
1851 
1852       for (CeedInt q = 0; q < num_qpts; q++) {
1853         for (CeedInt n = 0; n < num_nodes; n++) {
1854           CeedInt      d_out              = 0, q_comp_out;
1855           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1856 
1857           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1858             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1859             const CeedScalar *B  = NULL;
1860 
1861             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B));
1862             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out));
1863             if (q_comp_out > 1) {
1864               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1865               else B = &B[(++d_out) * num_qpts * num_nodes];
1866             }
1867             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1868             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1869           }
1870         }
1871       }
1872       if (identity) CeedCall(CeedFree(&identity));
1873       data->assembled_bases_out[b] = B_out;
1874     }
1875   }
1876 
1877   // Pass out assembled data
1878   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1879   if (active_bases_in) *active_bases_in = data->active_bases_in;
1880   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1881   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1882   if (active_bases_out) *active_bases_out = data->active_bases_out;
1883   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1884   return CEED_ERROR_SUCCESS;
1885 }
1886 
1887 /**
1888   @brief Get `CeedOperator` `CeedBasis` data for assembly.
1889 
1890   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1891 
1892   @param[in]  data                      `CeedOperatorAssemblyData`
1893   @param[out] num_active_elem_rstrs_in  Number of active input element restrictions, or `NULL`
1894   @param[out] active_elem_rstrs_in      Pointer to hold active input `CeedElemRestriction`, or `NULL`
1895   @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or `NULL`
1896   @param[out] active_elem_rstrs_out     Pointer to hold active output `CeedElemRestriction`, or `NULL`
1897 
1898   @return An error code: 0 - success, otherwise - failure
1899 
1900   @ref Backend
1901 **/
1902 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in,
1903                                                 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out,
1904                                                 CeedElemRestriction **active_elem_rstrs_out) {
1905   if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in;
1906   if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in;
1907   if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out;
1908   if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out;
1909   return CEED_ERROR_SUCCESS;
1910 }
1911 
1912 /**
1913   @brief Destroy `CeedOperatorAssemblyData`
1914 
1915   @param[in,out] data `CeedOperatorAssemblyData` to destroy
1916 
1917   @return An error code: 0 - success, otherwise - failure
1918 
1919   @ref Backend
1920 **/
1921 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1922   if (!*data) {
1923     *data = NULL;
1924     return CEED_ERROR_SUCCESS;
1925   }
1926   CeedCall(CeedDestroy(&(*data)->ceed));
1927   for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) {
1928     CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b]));
1929     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b]));
1930     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1931     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1932     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1933   }
1934   for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) {
1935     CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b]));
1936     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b]));
1937     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1938     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1939     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1940   }
1941   CeedCall(CeedFree(&(*data)->active_bases_in));
1942   CeedCall(CeedFree(&(*data)->active_bases_out));
1943   CeedCall(CeedFree(&(*data)->active_elem_rstrs_in));
1944   CeedCall(CeedFree(&(*data)->active_elem_rstrs_out));
1945   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1946   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1947   CeedCall(CeedFree(&(*data)->eval_modes_in));
1948   CeedCall(CeedFree(&(*data)->eval_modes_out));
1949   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1950   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1951   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1952   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1953 
1954   CeedCall(CeedFree(data));
1955   return CEED_ERROR_SUCCESS;
1956 }
1957 
1958 /**
1959   @brief Retrieve fallback `CeedOperator` with a reference `Ceed` for advanced `CeedOperator` functionality
1960 
1961   @param[in]  op          `CeedOperator` to retrieve fallback for
1962   @param[out] op_fallback Fallback `CeedOperator`
1963 
1964   @return An error code: 0 - success, otherwise - failure
1965 
1966   @ref Backend
1967 **/
1968 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
1969   // Create if needed
1970   if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op));
1971   if (op->op_fallback) {
1972     bool is_debug;
1973     Ceed ceed;
1974 
1975     CeedCall(CeedOperatorGetCeed(op, &ceed));
1976     CeedCall(CeedIsDebug(ceed, &is_debug));
1977     if (is_debug) {
1978       Ceed        ceed_fallback;
1979       const char *resource, *resource_fallback, *op_name;
1980 
1981       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
1982       CeedCall(CeedGetResource(ceed, &resource));
1983       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
1984       CeedCall(CeedOperatorGetName(op, &op_name));
1985 
1986       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
1987       CeedDebug(ceed, "Falling back from Operator with backend %s at address %p to Operator with backend %s at address %p for CeedOperator \"%s\"\n",
1988                 resource, op, resource_fallback, op->op_fallback, op_name);
1989       CeedCall(CeedDestroy(&ceed_fallback));
1990     }
1991     CeedCall(CeedDestroy(&ceed));
1992   }
1993   *op_fallback = op->op_fallback;
1994   return CEED_ERROR_SUCCESS;
1995 }
1996 
1997 /**
1998   @brief Get the parent `CeedOperator` for a fallback `CeedOperator`
1999 
2000   @param[in]  op     `CeedOperator` context
2001   @param[out] parent Variable to store parent `CeedOperator` context
2002 
2003   @return An error code: 0 - success, otherwise - failure
2004 
2005   @ref Backend
2006 **/
2007 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) {
2008   *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL;
2009   return CEED_ERROR_SUCCESS;
2010 }
2011 
2012 /**
2013   @brief Get the `Ceed` context of the parent `CeedOperator` for a fallback `CeedOperator`
2014 
2015   @param[in]  op     `CeedOperator` context
2016   @param[out] parent Variable to store parent `Ceed` context
2017 
2018   @return An error code: 0 - success, otherwise - failure
2019 
2020   @ref Backend
2021 **/
2022 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) {
2023   *parent = NULL;
2024   if (op->op_fallback_parent) CeedCall(CeedReferenceCopy(CeedOperatorReturnCeed(op->op_fallback_parent), parent));
2025   else CeedCall(CeedReferenceCopy(CeedOperatorReturnCeed(op), parent));
2026   return CEED_ERROR_SUCCESS;
2027 }
2028 
2029 /// @}
2030 
2031 /// ----------------------------------------------------------------------------
2032 /// CeedOperator Public API
2033 /// ----------------------------------------------------------------------------
2034 /// @addtogroup CeedOperatorUser
2035 /// @{
2036 
2037 /**
2038   @brief Assemble a linear `CeedQFunction` associated with a `CeedOperator`.
2039 
2040   This returns a `CeedVector` containing a matrix at each quadrature point providing the action of the `CeedQFunction` associated with the `CeedOperator`.
2041   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices representing the action of the `CeedQFunction` for a corresponding quadrature point on an element.
2042 
2043   Inputs and outputs are in the order provided by the user when adding `CeedOperator` fields.
2044   For example, a `CeedQFunction` with inputs `u` and `gradu` and outputs `gradv` and `v` , provided in that order, would result in an assembled `CeedQFunction` that consists of `(1 + dim) x (dim + 1)` matrices at each quadrature point acting on the input ` [u, du_0, du_1]` and producing the output `[dv_0, dv_1, v]`.
2045 
2046   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2047 
2048   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2049   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
2050   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
2051   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2052 
2053   @return An error code: 0 - success, otherwise - failure
2054 
2055   @ref User
2056 **/
2057 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
2058   CeedCall(CeedOperatorCheckReady(op));
2059 
2060   if (op->LinearAssembleQFunction) {
2061     // Backend version
2062     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
2063   } else {
2064     // Operator fallback
2065     CeedOperator op_fallback;
2066 
2067     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleQFunction\n");
2068     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2069     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
2070     else return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
2071   }
2072   return CEED_ERROR_SUCCESS;
2073 }
2074 
2075 /**
2076   @brief Assemble `CeedQFunction` and store result internally.
2077 
2078   Return copied references of stored data to the caller.
2079   Caller is responsible for ownership and destruction of the copied references.
2080   See also @ref CeedOperatorLinearAssembleQFunction().
2081 
2082   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
2083         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
2084 
2085   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2086   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
2087   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
2088   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2089 
2090   @return An error code: 0 - success, otherwise - failure
2091 
2092   @ref User
2093 **/
2094 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
2095   return CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(op, true, assembled, rstr, request);
2096 }
2097 
2098 /**
2099   @brief Assemble the diagonal of a square linear `CeedOperator`
2100 
2101   This overwrites a `CeedVector` with the diagonal of a linear `CeedOperator`.
2102 
2103   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2104 
2105   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2106 
2107   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2108   @param[out] assembled `CeedVector` to store assembled `CeedOperator` diagonal
2109   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2110 
2111   @return An error code: 0 - success, otherwise - failure
2112 
2113   @ref User
2114 **/
2115 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2116   bool     is_composite;
2117   CeedSize input_size = 0, output_size = 0;
2118 
2119   CeedCall(CeedOperatorCheckReady(op));
2120   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2121 
2122   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2123   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2124 
2125   // Early exit for empty operator
2126   if (!is_composite) {
2127     CeedInt num_elem = 0;
2128 
2129     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2130     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2131   }
2132 
2133   if (op->LinearAssembleDiagonal) {
2134     // Backend version
2135     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
2136     return CEED_ERROR_SUCCESS;
2137   } else if (op->LinearAssembleAddDiagonal) {
2138     // Backend version with zeroing first
2139     CeedCall(CeedVectorSetValue(assembled, 0.0));
2140     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
2141     return CEED_ERROR_SUCCESS;
2142   } else if (is_composite) {
2143     // Default to summing contributions of suboperators
2144     CeedCall(CeedVectorSetValue(assembled, 0.0));
2145     CeedCall(CeedOperatorLinearAssembleAddDiagonalComposite(op, request, false, assembled));
2146     return CEED_ERROR_SUCCESS;
2147   } else {
2148     // Operator fallback
2149     CeedOperator op_fallback;
2150 
2151     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleDiagonal\n");
2152     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2153     if (op_fallback) {
2154       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
2155       return CEED_ERROR_SUCCESS;
2156     }
2157   }
2158   // Default interface implementation
2159   CeedCall(CeedVectorSetValue(assembled, 0.0));
2160   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
2161   return CEED_ERROR_SUCCESS;
2162 }
2163 
2164 /**
2165   @brief Assemble the diagonal of a square linear `CeedOperator`.
2166 
2167   This sums into a `CeedVector` the diagonal of a linear `CeedOperator`.
2168 
2169   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2170 
2171   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2172 
2173   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2174   @param[out] assembled `CeedVector` to store assembled `CeedOperator` diagonal
2175   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2176 
2177   @return An error code: 0 - success, otherwise - failure
2178 
2179   @ref User
2180 **/
2181 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2182   bool     is_composite;
2183   CeedSize input_size = 0, output_size = 0;
2184 
2185   CeedCall(CeedOperatorCheckReady(op));
2186   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2187 
2188   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2189   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2190 
2191   // Early exit for empty operator
2192   if (!is_composite) {
2193     CeedInt num_elem = 0;
2194 
2195     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2196     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2197   }
2198 
2199   if (op->LinearAssembleAddDiagonal) {
2200     // Backend version
2201     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
2202     return CEED_ERROR_SUCCESS;
2203   } else if (is_composite) {
2204     // Default to summing contributions of suboperators
2205     CeedCall(CeedOperatorLinearAssembleAddDiagonalComposite(op, request, false, assembled));
2206     return CEED_ERROR_SUCCESS;
2207   } else {
2208     // Operator fallback
2209     CeedOperator op_fallback;
2210 
2211     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleAddDiagonal\n");
2212     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2213     if (op_fallback) {
2214       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
2215       return CEED_ERROR_SUCCESS;
2216     }
2217   }
2218   // Default interface implementation
2219   CeedCall(CeedOperatorLinearAssembleAddDiagonalSingle(op, request, false, assembled));
2220   return CEED_ERROR_SUCCESS;
2221 }
2222 
2223 /**
2224    @brief Fully assemble the point-block diagonal pattern of a linear `CeedOperator`.
2225 
2226    Expected to be used in conjunction with @ref CeedOperatorLinearAssemblePointBlockDiagonal().
2227 
2228    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2229    Note that the `(i, j)` pairs are unique.
2230    This function returns the number of entries and their `(i, j)` locations, while @ref CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in the same ordering.
2231 
2232    This will generally be slow unless your operator is low-order.
2233 
2234    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2235 
2236    @param[in]  op          `CeedOperator` to assemble
2237    @param[out] num_entries Number of entries in coordinate nonzero pattern
2238    @param[out] rows        Row number for each entry
2239    @param[out] cols        Column number for each entry
2240 
2241    @ref User
2242 **/
2243 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2244   bool          is_composite;
2245   CeedInt       num_active_components, num_sub_operators;
2246   CeedOperator *sub_operators;
2247 
2248   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2249 
2250   CeedSize input_size = 0, output_size = 0;
2251   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2252   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2253 
2254   if (is_composite) {
2255     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_sub_operators));
2256     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
2257   } else {
2258     sub_operators     = &op;
2259     num_sub_operators = 1;
2260   }
2261 
2262   // Verify operator can be assembled correctly
2263   {
2264     CeedOperatorAssemblyData data;
2265     CeedInt                  num_active_elem_rstrs, comp_stride;
2266     CeedElemRestriction     *active_elem_rstrs;
2267 
2268     // Get initial values to check against
2269     CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data));
2270     CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
2271     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride));
2272     CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components));
2273 
2274     // Verify that all active element restrictions have same component stride and number of components
2275     for (CeedInt k = 0; k < num_sub_operators; k++) {
2276       CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data));
2277       CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
2278       for (CeedInt i = 0; i < num_active_elem_rstrs; i++) {
2279         CeedInt comp_stride_sub, num_active_components_sub;
2280 
2281         CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub));
2282         CeedCheck(comp_stride == comp_stride_sub, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION,
2283                   "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub);
2284         CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub));
2285         CeedCheck(num_active_components == num_active_components_sub, CeedOperatorReturnCeed(op), CEED_ERROR_INCOMPATIBLE,
2286                   "All suboperators must have the same number of output components."
2287                   " Previous: %" CeedInt_FMT " Current: %" CeedInt_FMT,
2288                   num_active_components, num_active_components_sub);
2289       }
2290     }
2291   }
2292   *num_entries = input_size * num_active_components;
2293   CeedCall(CeedCalloc(*num_entries, rows));
2294   CeedCall(CeedCalloc(*num_entries, cols));
2295 
2296   for (CeedInt o = 0; o < num_sub_operators; o++) {
2297     CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr;
2298     CeedInt             comp_stride, num_elem, elem_size;
2299     const CeedInt      *offsets, *point_block_offsets;
2300 
2301     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr));
2302     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride));
2303     CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem));
2304     CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size));
2305     CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets));
2306 
2307     CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr));
2308     CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets));
2309 
2310     for (CeedSize i = 0; i < num_elem * elem_size; i++) {
2311       for (CeedInt c_out = 0; c_out < num_active_components; c_out++) {
2312         for (CeedInt c_in = 0; c_in < num_active_components; c_in++) {
2313           (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride;
2314           (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride;
2315         }
2316       }
2317     }
2318 
2319     CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets));
2320     CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets));
2321     CeedCall(CeedElemRestrictionDestroy(&active_elem_rstr));
2322     CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr));
2323   }
2324   return CEED_ERROR_SUCCESS;
2325 }
2326 
2327 /**
2328   @brief Assemble the point block diagonal of a square linear `CeedOperator`.
2329 
2330   This overwrites a `CeedVector` with the point block diagonal of a linear `CeedOperator`.
2331 
2332   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2333 
2334   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2335 
2336   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2337   @param[out] assembled `CeedVector` to store assembled `CeedOperator` point block diagonal, provided in row-major form with an `num_comp * num_comp` block at each node.
2338                           The dimensions of this vector are derived from the active vector for the `CeedOperator`.
2339                           The array has shape `[nodes, component out, component in]`.
2340   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2341 
2342   @return An error code: 0 - success, otherwise - failure
2343 
2344   @ref User
2345 **/
2346 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2347   bool     is_composite;
2348   CeedSize input_size = 0, output_size = 0;
2349 
2350   CeedCall(CeedOperatorCheckReady(op));
2351   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2352 
2353   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2354   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2355 
2356   // Early exit for empty operator
2357   if (!is_composite) {
2358     CeedInt num_elem = 0;
2359 
2360     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2361     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2362   }
2363 
2364   if (op->LinearAssemblePointBlockDiagonal) {
2365     // Backend version
2366     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
2367     return CEED_ERROR_SUCCESS;
2368   } else if (op->LinearAssembleAddPointBlockDiagonal) {
2369     // Backend version with zeroing first
2370     CeedCall(CeedVectorSetValue(assembled, 0.0));
2371     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2372     return CEED_ERROR_SUCCESS;
2373   } else {
2374     // Operator fallback
2375     CeedOperator op_fallback;
2376 
2377     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssemblePointBlockDiagonal\n");
2378     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2379     if (op_fallback) {
2380       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
2381       return CEED_ERROR_SUCCESS;
2382     }
2383   }
2384   // Default interface implementation
2385   CeedCall(CeedVectorSetValue(assembled, 0.0));
2386   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2387   return CEED_ERROR_SUCCESS;
2388 }
2389 
2390 /**
2391   @brief Assemble the point block diagonal of a square linear `CeedOperator`.
2392 
2393   This sums into a `CeedVector` with the point block diagonal of a linear `CeedOperator`.
2394 
2395   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2396 
2397   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2398 
2399   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2400   @param[out] assembled `CeedVector` to store assembled CeedOperator point block diagonal, provided in row-major form with an `num_comp * num_comp` block at each node.
2401                           The dimensions of this vector are derived from the active vector for the `CeedOperator`.
2402                           The array has shape `[nodes, component out, component in]`.
2403   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2404 
2405   @return An error code: 0 - success, otherwise - failure
2406 
2407   @ref User
2408 **/
2409 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2410   bool     is_composite;
2411   CeedSize input_size = 0, output_size = 0;
2412 
2413   CeedCall(CeedOperatorCheckReady(op));
2414   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2415 
2416   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2417   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2418 
2419   // Early exit for empty operator
2420   if (!is_composite) {
2421     CeedInt num_elem = 0;
2422 
2423     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2424     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2425   }
2426 
2427   if (op->LinearAssembleAddPointBlockDiagonal) {
2428     // Backend version
2429     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
2430     return CEED_ERROR_SUCCESS;
2431   } else {
2432     // Operator fallback
2433     CeedOperator op_fallback;
2434 
2435     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleAddPointBlockDiagonal\n");
2436     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2437     if (op_fallback) {
2438       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
2439       return CEED_ERROR_SUCCESS;
2440     }
2441   }
2442   // Default interface implementation
2443   if (is_composite) {
2444     CeedCall(CeedOperatorLinearAssembleAddDiagonalComposite(op, request, true, assembled));
2445   } else {
2446     CeedCall(CeedOperatorLinearAssembleAddDiagonalSingle(op, request, true, assembled));
2447   }
2448   return CEED_ERROR_SUCCESS;
2449 }
2450 
2451 /**
2452    @brief Fully assemble the nonzero pattern of a linear `CeedOperator`.
2453 
2454    Expected to be used in conjunction with @ref CeedOperatorLinearAssemble().
2455 
2456    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2457    Note that the `(i, j)` pairs are not unique and may repeat.
2458    This function returns the number of entries and their `(i, j)` locations, while @ref CeedOperatorLinearAssemble() provides the values in the same ordering.
2459 
2460    This will generally be slow unless your operator is low-order.
2461 
2462    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2463 
2464    @param[in]  op          `CeedOperator` to assemble
2465    @param[out] num_entries Number of entries in coordinate nonzero pattern
2466    @param[out] rows        Row number for each entry
2467    @param[out] cols        Column number for each entry
2468 
2469    @ref User
2470 **/
2471 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2472   bool          is_composite;
2473   CeedInt       num_suboperators, offset = 0;
2474   CeedSize      single_entries;
2475   CeedOperator *sub_operators;
2476 
2477   CeedCall(CeedOperatorCheckReady(op));
2478   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2479 
2480   if (op->LinearAssembleSymbolic) {
2481     // Backend version
2482     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
2483     return CEED_ERROR_SUCCESS;
2484   } else {
2485     // Operator fallback
2486     CeedOperator op_fallback;
2487 
2488     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleSymbolic\n");
2489     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2490     if (op_fallback) {
2491       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
2492       return CEED_ERROR_SUCCESS;
2493     }
2494   }
2495 
2496   // Default interface implementation
2497 
2498   // Count entries and allocate rows, cols arrays
2499   CeedCall(CeedOperatorLinearAssembleGetNumEntries(op, num_entries));
2500   CeedCall(CeedCalloc(*num_entries, rows));
2501   CeedCall(CeedCalloc(*num_entries, cols));
2502 
2503   // Assemble nonzero locations
2504   if (is_composite) {
2505     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
2506     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
2507     for (CeedInt k = 0; k < num_suboperators; ++k) {
2508       CeedCall(CeedOperatorAssembleSymbolicSingle(sub_operators[k], offset, *rows, *cols));
2509       CeedCall(CeedOperatorAssemblyCountEntriesSingle(sub_operators[k], &single_entries));
2510       offset += single_entries;
2511     }
2512   } else {
2513     CeedCall(CeedOperatorAssembleSymbolicSingle(op, offset, *rows, *cols));
2514   }
2515   return CEED_ERROR_SUCCESS;
2516 }
2517 
2518 /**
2519    @brief Fully assemble the nonzero entries of a linear operator.
2520 
2521    Expected to be used in conjunction with @ref CeedOperatorLinearAssembleSymbolic().
2522 
2523    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2524    Note that the `(i, j)` pairs are not unique and may repeat.
2525    This function returns the values of the nonzero entries to be added, their `(i, j)` locations are provided by @ref CeedOperatorLinearAssembleSymbolic().
2526 
2527    This will generally be slow unless your operator is low-order.
2528 
2529    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2530 
2531    @param[in]  op     `CeedOperator` to assemble
2532    @param[out] values Values to assemble into matrix
2533 
2534    @ref User
2535 **/
2536 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
2537   bool          is_composite;
2538   CeedInt       num_suboperators, offset = 0;
2539   CeedSize      single_entries = 0;
2540   CeedOperator *sub_operators;
2541 
2542   CeedCall(CeedOperatorCheckReady(op));
2543   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2544 
2545   // Early exit for empty operator
2546   if (!is_composite) {
2547     CeedInt num_elem = 0;
2548 
2549     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2550     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2551   }
2552 
2553   if (op->LinearAssemble) {
2554     // Backend version
2555     CeedCall(op->LinearAssemble(op, values));
2556     return CEED_ERROR_SUCCESS;
2557   } else if (is_composite) {
2558     // Default to summing contributions of suboperators
2559     CeedCall(CeedVectorSetValue(values, 0.0));
2560     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
2561     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
2562     for (CeedInt k = 0; k < num_suboperators; k++) {
2563       CeedCall(CeedOperatorAssembleSingle(sub_operators[k], offset, values));
2564       CeedCall(CeedOperatorAssemblyCountEntriesSingle(sub_operators[k], &single_entries));
2565       offset += single_entries;
2566     }
2567     return CEED_ERROR_SUCCESS;
2568   } else if (op->LinearAssembleSingle) {
2569     CeedCall(CeedVectorSetValue(values, 0.0));
2570     CeedCall(CeedOperatorAssembleSingle(op, offset, values));
2571     return CEED_ERROR_SUCCESS;
2572   } else {
2573     // Operator fallback
2574     CeedOperator op_fallback;
2575 
2576     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssemble\n");
2577     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2578     if (op_fallback) {
2579       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2580       return CEED_ERROR_SUCCESS;
2581     }
2582   }
2583 
2584   // Default to interface version if non-composite and no fallback
2585   CeedCall(CeedVectorSetValue(values, 0.0));
2586   CeedCall(CeedOperatorAssembleSingle(op, offset, values));
2587   return CEED_ERROR_SUCCESS;
2588 }
2589 
2590 /**
2591   @brief Get the multiplicity of nodes across sub-operators in a composite `CeedOperator`.
2592 
2593   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2594 
2595   @param[in]  op               Composite `CeedOperator`
2596   @param[in]  num_skip_indices Number of sub-operators to skip
2597   @param[in]  skip_indices     Array of indices of sub-operators to skip
2598   @param[out] mult             Vector to store multiplicity (of size `l_size` )
2599 
2600   @return An error code: 0 - success, otherwise - failure
2601 
2602   @ref User
2603 **/
2604 int CeedOperatorCompositeGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2605   Ceed                ceed;
2606   CeedInt             num_suboperators;
2607   CeedSize            l_vec_len;
2608   CeedScalar         *mult_array;
2609   CeedVector          ones_l_vec;
2610   CeedElemRestriction elem_rstr, mult_elem_rstr;
2611   CeedOperator       *sub_operators;
2612 
2613   CeedCall(CeedOperatorCheckReady(op));
2614 
2615   // Zero mult vector
2616   CeedCall(CeedVectorSetValue(mult, 0.0));
2617 
2618   // Get suboperators
2619   CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
2620   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2621   CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
2622 
2623   // Work vector
2624   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2625   CeedCall(CeedOperatorGetCeed(op, &ceed));
2626   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2627   CeedCall(CeedDestroy(&ceed));
2628   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2629   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2630 
2631   // Compute multiplicity across suboperators
2632   for (CeedInt i = 0; i < num_suboperators; i++) {
2633     const CeedScalar *sub_mult_array;
2634     CeedVector        sub_mult_l_vec, ones_e_vec;
2635 
2636     // -- Check for suboperator to skip
2637     for (CeedInt j = 0; j < num_skip_indices; j++) {
2638       if (skip_indices[j] == i) continue;
2639     }
2640 
2641     // -- Sub operator multiplicity
2642     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2643     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr));
2644     CeedCall(CeedElemRestrictionDestroy(&elem_rstr));
2645     CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2646     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2647     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2648     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2649     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2650     // ---- Flag every node present in the current suboperator
2651     for (CeedSize j = 0; j < l_vec_len; j++) {
2652       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2653     }
2654     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2655     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2656     CeedCall(CeedVectorDestroy(&ones_e_vec));
2657     CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr));
2658   }
2659   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2660   CeedCall(CeedVectorDestroy(&ones_l_vec));
2661   return CEED_ERROR_SUCCESS;
2662 }
2663 
2664 /**
2665   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator`, creating the prolongation basis from the fine and coarse grid interpolation.
2666 
2667   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2668 
2669   @param[in]  op_fine      Fine grid `CeedOperator`
2670   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2671   @param[in]  rstr_coarse  Coarse grid `CeedElemRestriction`
2672   @param[in]  basis_coarse Coarse grid active vector `CeedBasis`
2673   @param[out] op_coarse    Coarse grid `CeedOperator`
2674   @param[out] op_prolong   Coarse to fine `CeedOperator`, or `NULL`
2675   @param[out] op_restrict  Fine to coarse `CeedOperator`, or `NULL`
2676 
2677   @return An error code: 0 - success, otherwise - failure
2678 
2679   @ref User
2680 **/
2681 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2682                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2683   CeedBasis basis_c_to_f = NULL;
2684 
2685   CeedCall(CeedOperatorCheckReady(op_fine));
2686 
2687   // Build prolongation matrix, if required
2688   if (op_prolong || op_restrict) {
2689     CeedBasis basis_fine;
2690 
2691     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2692     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2693     CeedCall(CeedBasisDestroy(&basis_fine));
2694   }
2695 
2696   // Core code
2697   CeedCall(CeedOperatorMultigridLevelCreateSingle_Core(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong,
2698                                                        op_restrict));
2699   return CEED_ERROR_SUCCESS;
2700 }
2701 
2702 /**
2703   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator` with a tensor basis for the active basis.
2704 
2705   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2706 
2707   @param[in]  op_fine       Fine grid `CeedOperator`
2708   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2709   @param[in]  rstr_coarse   Coarse grid `CeedElemRestriction`
2710   @param[in]  basis_coarse  Coarse grid active vector `CeedBasis`
2711   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction `CeedOperator`
2712   @param[out] op_coarse     Coarse grid `CeedOperator`
2713   @param[out] op_prolong    Coarse to fine `CeedOperator`, or `NULL`
2714   @param[out] op_restrict   Fine to coarse `CeedOperator`, or `NULL`
2715 
2716   @return An error code: 0 - success, otherwise - failure
2717 
2718   @ref User
2719 **/
2720 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2721                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2722                                              CeedOperator *op_restrict) {
2723   Ceed      ceed;
2724   CeedInt   Q_f, Q_c;
2725   CeedBasis basis_fine, basis_c_to_f = NULL;
2726 
2727   CeedCall(CeedOperatorCheckReady(op_fine));
2728   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2729 
2730   // Check for compatible quadrature spaces
2731   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2732   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2733   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2734   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION,
2735             "Bases must have compatible quadrature spaces."
2736             " Fine grid: %" CeedInt_FMT " points, Coarse grid: %" CeedInt_FMT " points",
2737             Q_f, Q_c);
2738 
2739   // Create coarse to fine basis, if required
2740   if (op_prolong || op_restrict) {
2741     CeedInt     dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2742     CeedScalar *q_ref, *q_weight, *grad;
2743 
2744     // Check if interpolation matrix is provided
2745     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2746               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2747     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2748     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2749     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2750     CeedCall(CeedBasisDestroy(&basis_fine));
2751     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2752     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2753     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2754     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2755     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2756     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2757     CeedCall(CeedFree(&q_ref));
2758     CeedCall(CeedFree(&q_weight));
2759     CeedCall(CeedFree(&grad));
2760   }
2761 
2762   // Core code
2763   CeedCall(CeedOperatorMultigridLevelCreateSingle_Core(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong,
2764                                                        op_restrict));
2765   CeedCall(CeedDestroy(&ceed));
2766   return CEED_ERROR_SUCCESS;
2767 }
2768 
2769 /**
2770   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator` with a non-tensor basis for the active vector
2771 
2772   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2773 
2774   @param[in]  op_fine       Fine grid `CeedOperator`
2775   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2776   @param[in]  rstr_coarse   Coarse grid `CeedElemRestriction`
2777   @param[in]  basis_coarse  Coarse grid active vector `CeedBasis`
2778   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction `CeedOperator`
2779   @param[out] op_coarse     Coarse grid `CeedOperator`
2780   @param[out] op_prolong    Coarse to fine `CeedOperator`, or `NULL`
2781   @param[out] op_restrict   Fine to coarse `CeedOperator`, or `NULL`
2782 
2783   @return An error code: 0 - success, otherwise - failure
2784 
2785   @ref User
2786 **/
2787 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2788                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2789                                        CeedOperator *op_restrict) {
2790   Ceed      ceed;
2791   CeedInt   Q_f, Q_c;
2792   CeedBasis basis_fine, basis_c_to_f = NULL;
2793 
2794   CeedCall(CeedOperatorCheckReady(op_fine));
2795   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2796 
2797   // Check for compatible quadrature spaces
2798   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2799   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2800   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2801   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2802 
2803   // Coarse to fine basis
2804   if (op_prolong || op_restrict) {
2805     CeedInt          dim, num_comp, num_nodes_c, num_nodes_f;
2806     CeedScalar      *q_ref, *q_weight, *grad;
2807     CeedElemTopology topo;
2808 
2809     // Check if interpolation matrix is provided
2810     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2811               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2812     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2813     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2814     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2815     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2816     CeedCall(CeedBasisDestroy(&basis_fine));
2817     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2818     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2819     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2820     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2821     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2822     CeedCall(CeedFree(&q_ref));
2823     CeedCall(CeedFree(&q_weight));
2824     CeedCall(CeedFree(&grad));
2825   }
2826 
2827   // Core code
2828   CeedCall(CeedOperatorMultigridLevelCreateSingle_Core(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong,
2829                                                        op_restrict));
2830   CeedCall(CeedDestroy(&ceed));
2831   return CEED_ERROR_SUCCESS;
2832 }
2833 
2834 /**
2835   @brief Build a FDM based approximate inverse for each element for a `CeedOperator`.
2836 
2837   This returns a `CeedOperator` and `CeedVector` to apply a Fast Diagonalization Method based approximate inverse.
2838   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2839   The assembled `CeedQFunction` is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T \hat S V\f$.
2840   The `CeedOperator` must be linear and non-composite.
2841   The associated `CeedQFunction` must therefore also be linear.
2842 
2843   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2844 
2845   @param[in]  op      `CeedOperator` to create element inverses
2846   @param[out] fdm_inv `CeedOperator` to apply the action of a FDM based inverse for each element
2847   @param[in]  request Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2848 
2849   @return An error code: 0 - success, otherwise - failure
2850 
2851   @ref User
2852 **/
2853 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2854   Ceed                 ceed, ceed_parent;
2855   bool                 interp = false, grad = false, is_tensor_basis = true;
2856   CeedInt              num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2857   CeedScalar          *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg;
2858   const CeedScalar    *interp_1d, *grad_1d, *q_weight_1d;
2859   CeedVector           q_data;
2860   CeedElemRestriction  rstr  = NULL, rstr_qd_i;
2861   CeedBasis            basis = NULL, fdm_basis;
2862   CeedQFunctionContext ctx_fdm;
2863   CeedQFunctionField  *qf_fields;
2864   CeedQFunction        qf, qf_fdm;
2865   CeedOperatorField   *op_fields;
2866 
2867   CeedCall(CeedOperatorCheckReady(op));
2868 
2869   if (op->CreateFDMElementInverse) {
2870     // Backend version
2871     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2872     return CEED_ERROR_SUCCESS;
2873   } else {
2874     // Operator fallback
2875     CeedOperator op_fallback;
2876 
2877     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorCreateFDMElementInverse\n");
2878     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2879     if (op_fallback) {
2880       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2881       return CEED_ERROR_SUCCESS;
2882     }
2883   }
2884 
2885   // Default interface implementation
2886   CeedCall(CeedOperatorGetCeed(op, &ceed));
2887   CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent));
2888   CeedCall(CeedOperatorGetQFunction(op, &qf));
2889 
2890   // Determine active input basis
2891   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2892   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2893   for (CeedInt i = 0; i < num_input_fields; i++) {
2894     CeedVector vec;
2895 
2896     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2897     if (vec == CEED_VECTOR_ACTIVE) {
2898       CeedEvalMode eval_mode;
2899 
2900       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2901       interp = interp || eval_mode == CEED_EVAL_INTERP;
2902       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2903       if (!basis) CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2904       if (!rstr) CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2905     }
2906     CeedCall(CeedVectorDestroy(&vec));
2907   }
2908   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2909   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2910   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2911   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2912   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2913   CeedCall(CeedBasisGetDimension(basis, &dim));
2914   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2915   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2916 
2917   // Build and diagonalize 1D Mass and Laplacian
2918   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2919   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2920   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2921   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2922   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2923   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2924   CeedCall(CeedCalloc(P_1d, &lambda));
2925   // -- Build matrices
2926   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2927   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2928   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2929   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2930 
2931   // -- Diagonalize
2932   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2933   CeedCall(CeedFree(&mass));
2934   CeedCall(CeedFree(&laplace));
2935   for (CeedInt i = 0; i < P_1d; i++) {
2936     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2937   }
2938   CeedCall(CeedFree(&x));
2939 
2940   {
2941     CeedInt             layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2942     CeedScalar          max_norm = 0;
2943     const CeedScalar   *assembled_array, *q_weight_array;
2944     CeedVector          assembled = NULL, q_weight;
2945     CeedElemRestriction rstr_qf   = NULL;
2946 
2947     // Assemble QFunction
2948     CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2949     CeedCall(CeedElemRestrictionGetELayout(rstr_qf, layout));
2950     CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2951     CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2952 
2953     // Calculate element averages
2954     CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2955     CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2956     CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2957     CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2958     CeedCall(CeedCalloc(num_elem, &elem_avg));
2959     const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2960 
2961     for (CeedInt e = 0; e < num_elem; e++) {
2962       CeedInt count = 0;
2963 
2964       for (CeedInt q = 0; q < num_qpts; q++) {
2965         for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2966           if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2967             elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2968             count++;
2969           }
2970         }
2971       }
2972       if (count) {
2973         elem_avg[e] /= count;
2974       } else {
2975         elem_avg[e] = 1.0;
2976       }
2977     }
2978     CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2979     CeedCall(CeedVectorDestroy(&assembled));
2980     CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2981     CeedCall(CeedVectorDestroy(&q_weight));
2982   }
2983 
2984   // Build FDM diagonal
2985   {
2986     CeedScalar *q_data_array, *fdm_diagonal;
2987 
2988     CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2989     const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2990     for (CeedInt c = 0; c < num_comp; c++) {
2991       for (CeedInt n = 0; n < num_nodes; n++) {
2992         if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2993         if (grad) {
2994           for (CeedInt d = 0; d < dim; d++) {
2995             CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2996             fdm_diagonal[c * num_nodes + n] += lambda[i];
2997           }
2998         }
2999         if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
3000       }
3001     }
3002     CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
3003     CeedCall(CeedVectorSetValue(q_data, 0.0));
3004     CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
3005     for (CeedInt e = 0; e < num_elem; e++) {
3006       for (CeedInt c = 0; c < num_comp; c++) {
3007         for (CeedInt n = 0; n < num_nodes; n++) {
3008           q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
3009         }
3010       }
3011     }
3012     CeedCall(CeedFree(&elem_avg));
3013     CeedCall(CeedFree(&fdm_diagonal));
3014     CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
3015   }
3016 
3017   // Setup FDM operator
3018   // -- Basis
3019   {
3020     CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
3021 
3022     CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
3023     CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
3024     CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
3025     CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
3026     CeedCall(CeedFree(&fdm_interp));
3027     CeedCall(CeedFree(&grad_dummy));
3028     CeedCall(CeedFree(&q_ref_dummy));
3029     CeedCall(CeedFree(&q_weight_dummy));
3030     CeedCall(CeedFree(&lambda));
3031   }
3032 
3033   // -- Restriction
3034   {
3035     CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp};
3036     CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp,
3037                                               (CeedSize)num_elem * (CeedSize)num_comp * (CeedSize)num_nodes, strides, &rstr_qd_i));
3038   }
3039 
3040   // -- QFunction
3041   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
3042   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
3043   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
3044   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
3045   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
3046 
3047   // -- QFunction context
3048   {
3049     CeedInt *num_comp_data;
3050 
3051     CeedCall(CeedCalloc(1, &num_comp_data));
3052     num_comp_data[0] = num_comp;
3053     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
3054     CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
3055   }
3056   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
3057   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
3058 
3059   // -- Operator
3060   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
3061   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
3062   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data));
3063   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
3064 
3065   // Cleanup
3066   CeedCall(CeedDestroy(&ceed));
3067   CeedCall(CeedDestroy(&ceed_parent));
3068   CeedCall(CeedVectorDestroy(&q_data));
3069   CeedCall(CeedElemRestrictionDestroy(&rstr));
3070   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
3071   CeedCall(CeedBasisDestroy(&basis));
3072   CeedCall(CeedBasisDestroy(&fdm_basis));
3073   CeedCall(CeedQFunctionDestroy(&qf));
3074   CeedCall(CeedQFunctionDestroy(&qf_fdm));
3075   return CEED_ERROR_SUCCESS;
3076 }
3077 
3078 /// @}
3079