xref: /libCEED/interface/ceed-preconditioning.c (revision 9c944cbf21f25d9ee2d211269694a2e860ed203b)
1 // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a `CeedQFunction` with a reference `Ceed` to fallback for advanced `CeedOperator` functionality
28 
29   @param[in]  fallback_ceed `Ceed` on which to create fallback `CeedQFunction`
30   @param[in]  qf            `CeedQFunction` to create fallback for
31   @param[out] qf_fallback   Fallback `CeedQFunction`
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   char               *source_path_with_name = NULL;
39   CeedInt             num_input_fields, num_output_fields;
40   CeedQFunctionField *input_fields, *output_fields;
41 
42   // Check if NULL qf passed in
43   if (!qf) return CEED_ERROR_SUCCESS;
44 
45   CeedDebug(CeedQFunctionReturnCeed(qf), "Creating fallback CeedQFunction\n");
46 
47   if (qf->source_path) {
48     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
49 
50     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
51     memcpy(source_path_with_name, qf->source_path, path_len);
52     memcpy(&source_path_with_name[path_len], ":", 1);
53     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
54   } else if (qf->user_source) {
55     CeedCall(CeedStringAllocCopy(qf->user_source, &source_path_with_name));
56   } else {
57     CeedCall(CeedCalloc(1, &source_path_with_name));
58   }
59 
60   {
61     CeedInt           vec_length;
62     CeedQFunctionUser f;
63 
64     CeedCall(CeedQFunctionGetVectorLength(qf, &vec_length));
65     CeedCall(CeedQFunctionGetUserFunction(qf, &f));
66     CeedCall(CeedQFunctionCreateInterior(fallback_ceed, vec_length, f, source_path_with_name, qf_fallback));
67   }
68   {
69     CeedQFunctionContext ctx;
70 
71     CeedCall(CeedQFunctionGetContext(qf, &ctx));
72     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
73     CeedCall(CeedQFunctionContextDestroy(&ctx));
74   }
75   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
76   for (CeedInt i = 0; i < num_input_fields; i++) {
77     const char  *field_name;
78     CeedInt      size;
79     CeedEvalMode eval_mode;
80 
81     CeedCall(CeedQFunctionFieldGetData(input_fields[i], &field_name, &size, &eval_mode));
82     CeedCall(CeedQFunctionAddInput(*qf_fallback, field_name, size, eval_mode));
83   }
84   for (CeedInt i = 0; i < num_output_fields; i++) {
85     const char  *field_name;
86     CeedInt      size;
87     CeedEvalMode eval_mode;
88 
89     CeedCall(CeedQFunctionFieldGetData(output_fields[i], &field_name, &size, &eval_mode));
90     CeedCall(CeedQFunctionAddOutput(*qf_fallback, field_name, size, eval_mode));
91   }
92   CeedCall(CeedFree(&source_path_with_name));
93   return CEED_ERROR_SUCCESS;
94 }
95 
96 /**
97   @brief Duplicate a `CeedOperator` with a reference `Ceed` to fallback for advanced `CeedOperator` functionality
98 
99   @param[in,out] op `CeedOperator` to create fallback for
100 
101   @return An error code: 0 - success, otherwise - failure
102 
103   @ref Developer
104 **/
105 static int CeedOperatorCreateFallback(CeedOperator op) {
106   bool         is_composite;
107   Ceed         ceed, ceed_fallback;
108   CeedOperator op_fallback;
109 
110   // Check not already created
111   if (op->op_fallback) return CEED_ERROR_SUCCESS;
112 
113   // Fallback Ceed
114   CeedCall(CeedOperatorGetCeed(op, &ceed));
115   CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
116   CeedCall(CeedDestroy(&ceed));
117   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
118 
119   CeedDebug(CeedOperatorReturnCeed(op), "Creating fallback CeedOperator\n");
120 
121   // Clone Op
122   CeedCall(CeedOperatorIsComposite(op, &is_composite));
123   if (is_composite) {
124     CeedInt       num_suboperators;
125     CeedOperator *sub_operators;
126 
127     CeedCall(CeedOperatorCreateComposite(ceed_fallback, &op_fallback));
128     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
129     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
130     for (CeedInt i = 0; i < num_suboperators; i++) {
131       CeedOperator op_sub_fallback;
132 
133       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
134       CeedCall(CeedOperatorCompositeAddSub(op_fallback, op_sub_fallback));
135     }
136   } else {
137     bool               is_at_points = false;
138     CeedInt            num_input_fields, num_output_fields;
139     CeedQFunction      qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
140     CeedOperatorField *input_fields, *output_fields;
141 
142     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
143     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
144     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
145     CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
146     if (is_at_points) {
147       CeedVector          points;
148       CeedElemRestriction rstr_points;
149 
150       CeedCall(CeedOperatorCreateAtPoints(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
151       CeedCall(CeedOperatorAtPointsGetPoints(op, &rstr_points, &points));
152       CeedCall(CeedOperatorAtPointsSetPoints(op_fallback, rstr_points, points));
153       CeedCall(CeedVectorDestroy(&points));
154       CeedCall(CeedElemRestrictionDestroy(&rstr_points));
155     } else {
156       CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
157     }
158     CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
159     for (CeedInt i = 0; i < num_input_fields; i++) {
160       const char         *field_name;
161       CeedVector          vec;
162       CeedElemRestriction rstr;
163       CeedBasis           basis;
164 
165       CeedCall(CeedOperatorFieldGetData(input_fields[i], &field_name, &rstr, &basis, &vec));
166       CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec));
167       CeedCall(CeedVectorDestroy(&vec));
168       CeedCall(CeedElemRestrictionDestroy(&rstr));
169       CeedCall(CeedBasisDestroy(&basis));
170     }
171     for (CeedInt i = 0; i < num_output_fields; i++) {
172       const char         *field_name;
173       CeedVector          vec;
174       CeedElemRestriction rstr;
175       CeedBasis           basis;
176 
177       CeedCall(CeedOperatorFieldGetData(output_fields[i], &field_name, &rstr, &basis, &vec));
178       CeedCall(CeedOperatorSetField(op_fallback, field_name, rstr, basis, vec));
179       CeedCall(CeedVectorDestroy(&vec));
180       CeedCall(CeedElemRestrictionDestroy(&rstr));
181       CeedCall(CeedBasisDestroy(&basis));
182     }
183     {
184       CeedQFunctionAssemblyData data;
185 
186       CeedCall(CeedOperatorGetQFunctionAssemblyData(op, &data));
187       CeedCall(CeedQFunctionAssemblyDataReferenceCopy(data, &op_fallback->qf_assembled));
188     }
189     // Cleanup
190     CeedCall(CeedQFunctionDestroy(&qf_fallback));
191     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
192     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
193   }
194   CeedCall(CeedOperatorSetName(op_fallback, op->name));
195   CeedCall(CeedOperatorCheckReady(op_fallback));
196   // Note: No ref-counting here so we don't get caught in a reference loop.
197   //       The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback.
198   op->op_fallback                 = op_fallback;
199   op_fallback->op_fallback_parent = op;
200   CeedCall(CeedDestroy(&ceed_fallback));
201   return CEED_ERROR_SUCCESS;
202 }
203 
204 /**
205   @brief Core logic for assembling operator diagonal or point block diagonal
206 
207   @param[in]  op             `CeedOperator` to assemble diagonal or point block diagonal
208   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
209   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
210   @param[out] assembled      `CeedVector` to store assembled diagonal
211 
212   @return An error code: 0 - success, otherwise - failure
213 
214   @ref Developer
215 **/
216 static inline int CeedOperatorLinearAssembleAddDiagonalSingle_Mesh(CeedOperator op, CeedRequest *request, const bool is_point_block,
217                                                                    CeedVector assembled) {
218   bool is_composite;
219 
220   CeedCall(CeedOperatorIsComposite(op, &is_composite));
221   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
222 
223   // Assemble QFunction
224   CeedInt             layout_qf[3];
225   const CeedScalar   *assembled_qf_array;
226   CeedVector          assembled_qf        = NULL;
227   CeedElemRestriction assembled_elem_rstr = NULL;
228 
229   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
230   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf));
231   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
232   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
233 
234   // Get assembly data
235   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
236   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
237   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
238   CeedBasis               *active_bases_in, *active_bases_out;
239   CeedElemRestriction     *active_elem_rstrs_in, *active_elem_rstrs_out;
240   CeedOperatorAssemblyData data;
241 
242   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
243   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
244                                                 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out,
245                                                 &num_output_components));
246   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL));
247   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out));
248 
249   // Loop over all active bases (find matching input/output pairs)
250   for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) {
251     CeedInt             b_in, b_out, num_elem, num_nodes, num_qpts, num_comp;
252     bool                has_eval_none = false;
253     CeedScalar         *elem_diag_array, *identity = NULL;
254     CeedVector          elem_diag;
255     CeedElemRestriction diag_elem_rstr;
256 
257     if (num_active_bases_in <= num_active_bases_out) {
258       b_in = b;
259       for (b_out = 0; b_out < num_active_bases_out; b_out++) {
260         if (active_bases_in[b_in] == active_bases_out[b_out]) {
261           break;
262         }
263       }
264       if (b_out == num_active_bases_out) {
265         continue;
266       }  // No matching output basis found
267     } else {
268       b_out = b;
269       for (b_in = 0; b_in < num_active_bases_in; b_in++) {
270         if (active_bases_in[b_in] == active_bases_out[b_out]) {
271           break;
272         }
273       }
274       if (b_in == num_active_bases_in) {
275         continue;
276       }  // No matching output basis found
277     }
278     CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
279               "Cannot assemble operator diagonal with different input and output active element restrictions");
280 
281     // Assemble point block diagonal restriction, if needed
282     if (is_point_block) {
283       CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr));
284     } else {
285       CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr));
286     }
287 
288     // Create diagonal vector
289     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
290 
291     // Assemble element operator diagonals
292     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
293     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
294     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
295     CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes));
296     CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp));
297     if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes;
298     else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts));
299 
300     // Construct identity matrix for basis if required
301     for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) {
302       has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE);
303     }
304     for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) {
305       has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE);
306     }
307     if (has_eval_none) {
308       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
309       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
310     }
311 
312     // Compute the diagonal of B^T D B
313     // Each element
314     for (CeedSize e = 0; e < num_elem; e++) {
315       // Each basis eval mode pair
316       CeedInt      d_out              = 0, q_comp_out;
317       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
318 
319       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) {
320         CeedInt           d_in              = 0, q_comp_in;
321         const CeedScalar *B_t               = NULL;
322         CeedEvalMode      eval_mode_in_prev = CEED_EVAL_NONE;
323 
324         CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t));
325         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out));
326         if (q_comp_out > 1) {
327           if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0;
328           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
329         }
330         eval_mode_out_prev = eval_modes_out[b_out][e_out];
331 
332         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) {
333           const CeedScalar *B = NULL;
334 
335           CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B));
336           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in));
337           if (q_comp_in > 1) {
338             if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0;
339             else B = &B[(++d_in) * num_qpts * num_nodes];
340           }
341           eval_mode_in_prev = eval_modes_in[b_in][e_in];
342 
343           // Each component
344           for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
345             // Each qpt/node pair
346             for (CeedInt q = 0; q < num_qpts; q++) {
347               if (is_point_block) {
348                 // Point Block Diagonal
349                 for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
350                   const CeedSize c_offset =
351                       (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
352                   const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
353 
354                   for (CeedInt n = 0; n < num_nodes; n++) {
355                     elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
356                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
357                   }
358                 }
359               } else {
360                 // Diagonal Only
361                 const CeedInt c_offset =
362                     (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
363                 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
364 
365                 for (CeedInt n = 0; n < num_nodes; n++) {
366                   elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
367                 }
368               }
369             }
370           }
371         }
372       }
373     }
374     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
375 
376     // Assemble local operator diagonal
377     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
378 
379     // Cleanup
380     CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
381     CeedCall(CeedVectorDestroy(&elem_diag));
382     CeedCall(CeedFree(&identity));
383   }
384   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
385   CeedCall(CeedVectorDestroy(&assembled_qf));
386   return CEED_ERROR_SUCCESS;
387 }
388 
389 /**
390   @brief Core logic for assembling operator diagonal or point block diagonal
391 
392   @param[in]  op             `CeedOperator` to assemble diagonal or point block diagonal
393   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
394   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
395   @param[out] assembled      `CeedVector` to store assembled diagonal
396 
397   @return An error code: 0 - success, otherwise - failure
398 
399   @ref Developer
400 **/
401 static inline int CeedOperatorLinearAssembleAddDiagonalSingle(CeedOperator op, CeedRequest *request, const bool is_point_block,
402                                                               CeedVector assembled) {
403   bool is_at_points;
404 
405   CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
406   CeedCheck(!is_at_points, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "AtPoints operator not supported");
407   CeedCall(CeedOperatorLinearAssembleAddDiagonalSingle_Mesh(op, request, is_point_block, assembled));
408   return CEED_ERROR_SUCCESS;
409 }
410 
411 /**
412   @brief Core logic for assembling composite operator diagonal
413 
414   @param[in]  op             `CeedOperator` to assemble point block diagonal
415   @param[in]  request        Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
416   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
417   @param[out] assembled      `CeedVector` to store assembled diagonal
418 
419   @return An error code: 0 - success, otherwise - failure
420 
421   @ref Developer
422 **/
423 static inline int CeedOperatorLinearAssembleAddDiagonalComposite(CeedOperator op, CeedRequest *request, const bool is_point_block,
424                                                                  CeedVector assembled) {
425   CeedInt       num_sub;
426   CeedOperator *suboperators;
427 
428   CeedCall(CeedOperatorCompositeGetNumSub(op, &num_sub));
429   CeedCall(CeedOperatorCompositeGetSubList(op, &suboperators));
430   for (CeedInt i = 0; i < num_sub; i++) {
431     if (is_point_block) {
432       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
433     } else {
434       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
435     }
436   }
437   return CEED_ERROR_SUCCESS;
438 }
439 
440 /**
441   @brief Build nonzero pattern for non-composite CeedOperator`.
442 
443   Users should generally use @ref CeedOperatorLinearAssembleSymbolic().
444 
445   @param[in]  op     `CeedOperator` to assemble nonzero pattern
446   @param[in]  offset Offset for number of entries
447   @param[out] rows   Row number for each entry
448   @param[out] cols   Column number for each entry
449 
450   @return An error code: 0 - success, otherwise - failure
451 
452   @ref Developer
453 **/
454 static int CeedOperatorAssembleSymbolicSingle(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
455   Ceed                ceed;
456   bool                is_composite;
457   CeedSize            num_nodes_in, num_nodes_out, local_num_entries, count = 0;
458   CeedInt             num_elem_in, elem_size_in, num_comp_in, layout_er_in[3];
459   CeedInt             num_elem_out, elem_size_out, num_comp_out, layout_er_out[3];
460   CeedScalar         *array;
461   const CeedScalar   *elem_dof_a_in, *elem_dof_a_out;
462   CeedVector          index_vec_in, index_vec_out, elem_dof_in, elem_dof_out;
463   CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out;
464 
465   CeedCall(CeedOperatorIsComposite(op, &is_composite));
466   CeedCall(CeedOperatorGetCeed(op, &ceed));
467   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
468 
469   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out));
470   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
471   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
472   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
473   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
474   CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, layout_er_in));
475 
476   // Determine elem_dof relation for input
477   CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in));
478   CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array));
479   for (CeedSize i = 0; i < num_nodes_in; i++) array[i] = i;
480   CeedCall(CeedVectorRestoreArray(index_vec_in, &array));
481   CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in));
482   CeedCall(CeedVectorSetValue(elem_dof_in, 0.0));
483   CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in));
484   CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE));
485   CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in));
486   CeedCall(CeedVectorDestroy(&index_vec_in));
487   CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in));
488 
489   if (elem_rstr_in != elem_rstr_out) {
490     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
491     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
492               "Active input and output operator restrictions must have the same number of elements."
493               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
494               num_elem_in, num_elem_out);
495     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
496     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
497     CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, layout_er_out));
498 
499     // Determine elem_dof relation for output
500     CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out));
501     CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array));
502     for (CeedSize i = 0; i < num_nodes_out; i++) array[i] = i;
503     CeedCall(CeedVectorRestoreArray(index_vec_out, &array));
504     CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out));
505     CeedCall(CeedVectorSetValue(elem_dof_out, 0.0));
506     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out));
507     CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE));
508     CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out));
509     CeedCall(CeedVectorDestroy(&index_vec_out));
510     CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out));
511   } else {
512     num_elem_out     = num_elem_in;
513     elem_size_out    = elem_size_in;
514     num_comp_out     = num_comp_in;
515     layout_er_out[0] = layout_er_in[0];
516     layout_er_out[1] = layout_er_in[1];
517     layout_er_out[2] = layout_er_in[2];
518     elem_dof_a_out   = elem_dof_a_in;
519   }
520   local_num_entries = (CeedSize)elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
521 
522   // Determine i, j locations for element matrices
523   for (CeedInt e = 0; e < num_elem_in; e++) {
524     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
525       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
526         for (CeedInt i = 0; i < elem_size_out; i++) {
527           for (CeedInt j = 0; j < elem_size_in; j++) {
528             const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2];
529             const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2];
530             const CeedInt row                = elem_dof_a_out[elem_dof_index_row];
531             const CeedInt col                = elem_dof_a_in[elem_dof_index_col];
532 
533             rows[offset + count] = row;
534             cols[offset + count] = col;
535             count++;
536           }
537         }
538       }
539     }
540   }
541   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
542   CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in));
543   CeedCall(CeedVectorDestroy(&elem_dof_in));
544   if (elem_rstr_in != elem_rstr_out) {
545     CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out));
546     CeedCall(CeedVectorDestroy(&elem_dof_out));
547   }
548   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
549   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
550   CeedCall(CeedDestroy(&ceed));
551   return CEED_ERROR_SUCCESS;
552 }
553 
554 /**
555   @brief Core logic to assemble `CeedQFunction` and store result internally.
556 
557   Return copied references of stored data to the caller.
558   Caller is responsible for ownership and destruction of the copied references.
559   See also @ref CeedOperatorLinearAssembleQFunction().
560 
561   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
562         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
563 
564   @param[in]  op         `CeedOperator` to assemble `CeedQFunction`
565   @param[in]  use_parent Boolean flag to check for fallback parent implementation
566   @param[out] assembled  `CeedVector` to store assembled `CeedQFunction` at quadrature points
567   @param[out] rstr       `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
568   @param[in]  request    Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
569 
570   @return An error code: 0 - success, otherwise - failure
571 
572   @ref User
573 **/
574 static int CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(CeedOperator op, bool use_parent, CeedVector *assembled, CeedElemRestriction *rstr,
575                                                                  CeedRequest *request) {
576   int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL;
577   CeedOperator op_assemble                                                                           = NULL;
578   CeedOperator op_fallback_parent                                                                    = NULL;
579 
580   CeedCall(CeedOperatorCheckReady(op));
581 
582   // Determine if fallback parent or operator has implementation
583   CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent));
584   if (op_fallback_parent && use_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) {
585     // -- Backend version for op fallback parent is faster, if it exists
586     CeedDebug(CeedOperatorReturnCeed(op), "Using fallback parent for CeedOperatorLinearAssembleQFunctionBuildOrUpdate\n");
587     LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate;
588     op_assemble                   = op_fallback_parent;
589   } else if (op->LinearAssembleQFunctionUpdate) {
590     // -- Backend version for op
591     LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate;
592     op_assemble                   = op;
593   }
594 
595   // Assemble QFunction
596   if (LinearAssembleQFunctionUpdate) {
597     // Backend or fallback parent version
598     CeedQFunctionAssemblyData data;
599     bool                      data_is_setup;
600     CeedVector                assembled_vec  = NULL;
601     CeedElemRestriction       assembled_rstr = NULL;
602 
603     CeedCall(CeedOperatorGetQFunctionAssemblyData(op, &data));
604     CeedCall(CeedQFunctionAssemblyDataIsSetup(data, &data_is_setup));
605     if (data_is_setup) {
606       bool update_needed;
607 
608       CeedCall(CeedQFunctionAssemblyDataGetObjects(data, &assembled_vec, &assembled_rstr));
609       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(data, &update_needed));
610       if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request));
611     } else {
612       CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request));
613       CeedCall(CeedQFunctionAssemblyDataSetObjects(data, assembled_vec, assembled_rstr));
614     }
615     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(data, false));
616 
617     // Copy reference from internally held copy
618     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
619     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
620     CeedCall(CeedVectorDestroy(&assembled_vec));
621     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
622   } else {
623     // Operator fallback
624     CeedOperator op_fallback;
625 
626     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleQFunctionBuildOrUpdate\n");
627     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
628     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
629     else return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
630   }
631   return CEED_ERROR_SUCCESS;
632 }
633 
634 /**
635   @brief Assemble `CeedQFunction` and store result internally, but do not use fallback parent.
636 
637   Return copied references of stored data to the caller.
638   Caller is responsible for ownership and destruction of the copied references.
639   See also @ref CeedOperatorLinearAssembleQFunction().
640 
641   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
642         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
643 
644   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
645   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
646   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
647   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
648 
649   @return An error code: 0 - success, otherwise - failure
650 
651   @ref Developer
652 **/
653 int CeedOperatorLinearAssembleQFunctionBuildOrUpdateFallback(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr,
654                                                              CeedRequest *request) {
655   return CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(op, false, assembled, rstr, request);
656 }
657 
658 /**
659   @brief Assemble nonzero entries for non-composite `CeedOperator`.
660 
661   Users should generally use @ref CeedOperatorLinearAssemble().
662 
663   @param[in]  op     `CeedOperator` to assemble
664   @param[in]  offset Offset for number of entries
665   @param[out] values Values to assemble into matrix
666 
667   @return An error code: 0 - success, otherwise - failure
668 
669   @ref Developer
670 **/
671 int CeedOperatorAssembleSingle(CeedOperator op, CeedInt offset, CeedVector values) {
672   bool is_composite, is_at_points;
673 
674   CeedCall(CeedOperatorIsComposite(op, &is_composite));
675   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
676 
677   // Early exit for empty operator
678   {
679     CeedInt num_elem = 0;
680 
681     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
682     if (num_elem == 0) return CEED_ERROR_SUCCESS;
683   }
684 
685   if (op->LinearAssembleSingle) {
686     // Backend version
687     CeedCall(op->LinearAssembleSingle(op, offset, values));
688     return CEED_ERROR_SUCCESS;
689   } else {
690     // Operator fallback
691     CeedOperator op_fallback;
692 
693     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorAssembleSingle\n");
694     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
695     if (op_fallback) {
696       CeedCall(CeedOperatorAssembleSingle(op_fallback, offset, values));
697       return CEED_ERROR_SUCCESS;
698     }
699   }
700 
701   CeedCall(CeedOperatorIsAtPoints(op, &is_at_points));
702   CeedCheck(!is_at_points, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
703             "Backend does not implement CeedOperatorLinearAssemble for AtPoints operator");
704 
705   // Assemble QFunction
706   CeedInt             layout_qf[3];
707   const CeedScalar   *assembled_qf_array;
708   CeedVector          assembled_qf        = NULL;
709   CeedElemRestriction assembled_elem_rstr = NULL;
710 
711   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE));
712   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, layout_qf));
713   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
714   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
715 
716   // Get assembly data
717   CeedInt                  num_elem_in, elem_size_in, num_comp_in, num_qpts_in;
718   CeedInt                  num_elem_out, elem_size_out, num_comp_out, num_qpts_out;
719   CeedSize                 local_num_entries, count = 0;
720   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
721   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
722   CeedBasis               *active_bases_in, *active_bases_out, basis_in, basis_out;
723   const CeedScalar       **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out;
724   CeedElemRestriction      elem_rstr_in, elem_rstr_out;
725   CeedRestrictionType      elem_rstr_type_in, elem_rstr_type_out;
726   const bool              *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL;
727   const CeedInt8          *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL;
728   CeedOperatorAssemblyData data;
729 
730   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
731   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out,
732                                                 &num_eval_modes_out, &eval_modes_out, NULL, NULL));
733 
734   CeedCheck(num_active_bases_in == 1 && num_active_bases_out == 1, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
735             "Cannot assemble operator with multiple active bases");
736   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
737             "Cannot assemble operator without inputs/outputs");
738 
739   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out));
740   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
741   basis_in  = active_bases_in[0];
742   basis_out = active_bases_out[0];
743   B_mat_in  = B_mats_in[0];
744   B_mat_out = B_mats_out[0];
745 
746   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
747   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
748   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
749   if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in;
750   else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in));
751 
752   CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in));
753   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
754     CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in));
755   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
756     CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in));
757   }
758 
759   if (elem_rstr_in != elem_rstr_out) {
760     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
761     CeedCheck(num_elem_in == num_elem_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
762               "Active input and output operator restrictions must have the same number of elements."
763               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
764               num_elem_in, num_elem_out);
765     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
766     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
767     if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out;
768     else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out));
769     CeedCheck(num_qpts_in == num_qpts_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
770               "Active input and output bases must have the same number of quadrature points."
771               " Input has %" CeedInt_FMT " points; output has %" CeedInt_FMT "points.",
772               num_qpts_in, num_qpts_out);
773 
774     CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out));
775     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
776       CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out));
777     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
778       CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out));
779     }
780   } else {
781     num_elem_out  = num_elem_in;
782     elem_size_out = elem_size_in;
783     num_comp_out  = num_comp_in;
784     num_qpts_out  = num_qpts_in;
785 
786     elem_rstr_orients_out      = elem_rstr_orients_in;
787     elem_rstr_curl_orients_out = elem_rstr_curl_orients_in;
788   }
789   local_num_entries = (CeedSize)elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
790 
791   // Loop over elements and put in data structure
792   // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
793   CeedTensorContract contract;
794   CeedScalar        *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL;
795 
796   CeedCall(CeedBasisGetTensorContract(basis_in, &contract));
797   CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat));
798   CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat));
799   if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b));
800 
801   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
802   for (CeedSize e = 0; e < num_elem_in; e++) {
803     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
804       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
805         // Compute B^T*D
806         for (CeedSize n = 0; n < elem_size_out; n++) {
807           for (CeedSize q = 0; q < num_qpts_in; q++) {
808             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
809               const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in;
810               CeedScalar     sum       = 0.0;
811 
812               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
813                 const CeedSize b_out_index     = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n;
814                 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out;
815                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
816 
817                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
818               }
819               BTD_mat[btd_index] = sum;
820             }
821           }
822         }
823 
824         // Form element matrix itself (for each block component)
825         if (contract) {
826           CeedCall(CeedTensorContractApply(contract, 1, num_qpts_in * num_eval_modes_in[0], elem_size_in, elem_size_out, BTD_mat, CEED_NOTRANSPOSE,
827                                            false, B_mat_in, elem_mat));
828         } else {
829           Ceed ceed;
830 
831           CeedCall(CeedOperatorGetCeed(op, &ceed));
832           CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size_out, elem_size_in, num_qpts_in * num_eval_modes_in[0]));
833           CeedCall(CeedDestroy(&ceed));
834         }
835 
836         // Transform the element matrix if required
837         if (elem_rstr_orients_out) {
838           const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out];
839 
840           for (CeedInt i = 0; i < elem_size_out; i++) {
841             const double orient = elem_orients[i] ? -1.0 : 1.0;
842 
843             for (CeedInt j = 0; j < elem_size_in; j++) {
844               elem_mat[i * elem_size_in + j] *= orient;
845             }
846           }
847         } else if (elem_rstr_curl_orients_out) {
848           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out];
849 
850           // T^T*(B^T*D*B)
851           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
852           for (CeedInt i = 0; i < elem_size_out; i++) {
853             for (CeedInt j = 0; j < elem_size_in; j++) {
854               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] +
855                                                (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) +
856                                                (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0);
857             }
858           }
859         }
860         if (elem_rstr_orients_in) {
861           const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in];
862 
863           for (CeedInt i = 0; i < elem_size_out; i++) {
864             for (CeedInt j = 0; j < elem_size_in; j++) {
865               elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0;
866             }
867           }
868         } else if (elem_rstr_curl_orients_in) {
869           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in];
870 
871           // (B^T*D*B)*T
872           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
873           for (CeedInt i = 0; i < elem_size_out; i++) {
874             for (CeedInt j = 0; j < elem_size_in; j++) {
875               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] +
876                                                (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) +
877                                                (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0);
878             }
879           }
880         }
881 
882         // Put element matrix in coordinate data structure
883         for (CeedInt i = 0; i < elem_size_out; i++) {
884           for (CeedInt j = 0; j < elem_size_in; j++) {
885             vals[offset + count] = elem_mat[i * elem_size_in + j];
886             count++;
887           }
888         }
889       }
890     }
891   }
892   CeedCheck(count == local_num_entries, CeedOperatorReturnCeed(op), CEED_ERROR_MAJOR, "Error computing entries");
893   CeedCall(CeedVectorRestoreArray(values, &vals));
894 
895   // Cleanup
896   CeedCall(CeedFree(&BTD_mat));
897   CeedCall(CeedFree(&elem_mat));
898   CeedCall(CeedFree(&elem_mat_b));
899   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
900     CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in));
901   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
902     CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in));
903   }
904   if (elem_rstr_in != elem_rstr_out) {
905     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
906       CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out));
907     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
908       CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out));
909     }
910   }
911   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
912   CeedCall(CeedVectorDestroy(&assembled_qf));
913   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
914   CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
915   return CEED_ERROR_SUCCESS;
916 }
917 
918 /**
919   @brief Count number of entries for assembled `CeedOperator`
920 
921   @param[in]  op          `CeedOperator` to assemble
922   @param[out] num_entries Number of entries in assembled representation
923 
924   @return An error code: 0 - success, otherwise - failure
925 
926   @ref Utility
927 **/
928 static int CeedOperatorAssemblyCountEntriesSingle(CeedOperator op, CeedSize *num_entries) {
929   bool                is_composite;
930   CeedInt             num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out;
931   CeedElemRestriction rstr_in, rstr_out;
932 
933   CeedCall(CeedOperatorIsComposite(op, &is_composite));
934   CeedCheck(!is_composite, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
935 
936   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out));
937   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in));
938   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in));
939   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
940   if (rstr_in != rstr_out) {
941     CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out));
942     CeedCheck(num_elem_in == num_elem_out, CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED,
943               "Active input and output operator restrictions must have the same number of elements."
944               " Input has %" CeedInt_FMT " elements; output has %" CeedInt_FMT "elements.",
945               num_elem_in, num_elem_out);
946     CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out));
947     CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
948   } else {
949     num_elem_out  = num_elem_in;
950     elem_size_out = elem_size_in;
951     num_comp_out  = num_comp_in;
952   }
953   CeedCall(CeedElemRestrictionDestroy(&rstr_in));
954   CeedCall(CeedElemRestrictionDestroy(&rstr_out));
955   *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in;
956   return CEED_ERROR_SUCCESS;
957 }
958 
959 /**
960   @brief Count number of entries for assembled `CeedOperator`
961 
962   @param[in]  op          `CeedOperator` to assemble
963   @param[out] num_entries Number of entries in assembled representation
964 
965   @return An error code: 0 - success, otherwise - failure
966 
967   @ref Utility
968 **/
969 int CeedOperatorLinearAssembleGetNumEntries(CeedOperator op, CeedSize *num_entries) {
970   bool is_composite;
971 
972   CeedCall(CeedOperatorCheckReady(op));
973   CeedCall(CeedOperatorIsComposite(op, &is_composite));
974 
975   if (is_composite) {
976     CeedInt       num_suboperators;
977     CeedOperator *sub_operators;
978 
979     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
980     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
981 
982     *num_entries = 0;
983     for (CeedInt k = 0; k < num_suboperators; ++k) {
984       CeedSize single_entries;
985 
986       CeedCall(CeedOperatorAssemblyCountEntriesSingle(sub_operators[k], &single_entries));
987       *num_entries += single_entries;
988     }
989   } else {
990     CeedCall(CeedOperatorAssemblyCountEntriesSingle(op, num_entries));
991   }
992   return CEED_ERROR_SUCCESS;
993 }
994 
995 /**
996   @brief Common code for creating a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator`
997 
998   @param[in]  op_fine      Fine grid `CeedOperator`
999   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
1000   @param[in]  rstr_coarse  Coarse grid `CeedElemRestriction`
1001   @param[in]  basis_coarse Coarse grid active vector `CeedBasis`
1002   @param[in]  basis_c_to_f `CeedBasis` for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction operators
1003   @param[out] op_coarse    Coarse grid `CeedOperator`
1004   @param[out] op_prolong   Coarse to fine `CeedOperator`, or `NULL`
1005   @param[out] op_restrict  Fine to coarse `CeedOperator`, or `NULL`
1006 
1007   @return An error code: 0 - success, otherwise - failure
1008 
1009   @ref Developer
1010 **/
1011 static int CeedOperatorMultigridLevelCreateSingle_Core(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse,
1012                                                        CeedBasis basis_coarse, CeedBasis basis_c_to_f, CeedOperator *op_coarse,
1013                                                        CeedOperator *op_prolong, CeedOperator *op_restrict) {
1014   bool                is_composite;
1015   Ceed                ceed;
1016   CeedInt             num_comp, num_input_fields, num_output_fields;
1017   CeedVector          mult_vec         = NULL;
1018   CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL;
1019   CeedOperatorField  *input_fields, *output_fields;
1020 
1021   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1022 
1023   // Check for composite operator
1024   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
1025   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
1026 
1027   // Coarse Grid
1028   {
1029     bool is_at_points;
1030 
1031     CeedCall(CeedOperatorIsAtPoints(op_fine, &is_at_points));
1032     if (is_at_points) {
1033       CeedVector          point_coords;
1034       CeedElemRestriction rstr_points;
1035 
1036       CeedCall(CeedOperatorCreateAtPoints(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
1037       CeedCall(CeedOperatorAtPointsGetPoints(op_fine, &rstr_points, &point_coords));
1038       CeedCall(CeedOperatorAtPointsSetPoints(*op_coarse, rstr_points, point_coords));
1039       CeedCall(CeedVectorDestroy(&point_coords));
1040       CeedCall(CeedElemRestrictionDestroy(&rstr_points));
1041     } else {
1042       CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
1043     }
1044   }
1045   CeedCall(CeedOperatorGetFields(op_fine, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
1046   // -- Clone input fields
1047   for (CeedInt i = 0; i < num_input_fields; i++) {
1048     const char         *field_name;
1049     CeedVector          vec;
1050     CeedElemRestriction rstr  = NULL;
1051     CeedBasis           basis = NULL;
1052 
1053     CeedCall(CeedOperatorFieldGetName(input_fields[i], &field_name));
1054     CeedCall(CeedOperatorFieldGetVector(input_fields[i], &vec));
1055     if (vec == CEED_VECTOR_ACTIVE) {
1056       CeedCall(CeedElemRestrictionReferenceCopy(rstr_coarse, &rstr));
1057       CeedCall(CeedBasisReferenceCopy(basis_coarse, &basis));
1058       if (!rstr_fine) CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_fine));
1059     } else {
1060       CeedCall(CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr));
1061       CeedCall(CeedOperatorFieldGetBasis(input_fields[i], &basis));
1062     }
1063     CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec));
1064     CeedCall(CeedVectorDestroy(&vec));
1065     CeedCall(CeedElemRestrictionDestroy(&rstr));
1066     CeedCall(CeedBasisDestroy(&basis));
1067   }
1068   // -- Clone output fields
1069   for (CeedInt i = 0; i < num_output_fields; i++) {
1070     const char         *field_name;
1071     CeedVector          vec;
1072     CeedElemRestriction rstr  = NULL;
1073     CeedBasis           basis = NULL;
1074 
1075     CeedCall(CeedOperatorFieldGetName(output_fields[i], &field_name));
1076     CeedCall(CeedOperatorFieldGetVector(output_fields[i], &vec));
1077     if (vec == CEED_VECTOR_ACTIVE) {
1078       CeedCall(CeedElemRestrictionReferenceCopy(rstr_coarse, &rstr));
1079       CeedCall(CeedBasisReferenceCopy(basis_coarse, &basis));
1080       if (!rstr_fine) CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_fine));
1081     } else {
1082       CeedCall(CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr));
1083       CeedCall(CeedOperatorFieldGetBasis(output_fields[i], &basis));
1084     }
1085     CeedCall(CeedOperatorSetField(*op_coarse, field_name, rstr, basis, vec));
1086     CeedCall(CeedVectorDestroy(&vec));
1087     CeedCall(CeedElemRestrictionDestroy(&rstr));
1088     CeedCall(CeedBasisDestroy(&basis));
1089   }
1090   // -- Clone QFunctionAssemblyData
1091   {
1092     CeedQFunctionAssemblyData fine_data;
1093 
1094     CeedCall(CeedOperatorGetQFunctionAssemblyData(op_fine, &fine_data));
1095     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(fine_data, &(*op_coarse)->qf_assembled));
1096   }
1097 
1098   // Multiplicity vector
1099   if (op_restrict || op_prolong) {
1100     CeedVector          mult_e_vec;
1101     CeedRestrictionType rstr_type;
1102 
1103     CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type));
1104     CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED,
1105               "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported");
1106     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
1107     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
1108     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
1109     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
1110     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
1111     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
1112     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
1113     CeedCall(CeedVectorDestroy(&mult_e_vec));
1114     CeedCall(CeedVectorReciprocal(mult_vec));
1115   }
1116 
1117   // Clone name
1118   bool   has_name = op_fine->name;
1119   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
1120   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
1121 
1122   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
1123   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
1124             "Prolongation or restriction operator creation requires coarse-to-fine basis");
1125 
1126   // Restriction/Prolongation Operators
1127   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
1128 
1129   // Restriction
1130   if (op_restrict) {
1131     CeedInt             *num_comp_r_data;
1132     CeedQFunctionContext ctx_r;
1133     CeedQFunction        qf_restrict;
1134 
1135     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
1136     CeedCall(CeedCalloc(1, &num_comp_r_data));
1137     num_comp_r_data[0] = num_comp;
1138     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
1139     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
1140     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
1141     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
1142     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
1143     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
1144     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
1145     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
1146 
1147     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
1148     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
1149     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
1150     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
1151 
1152     // Set name
1153     char *restriction_name;
1154 
1155     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
1156     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
1157     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
1158     CeedCall(CeedFree(&restriction_name));
1159 
1160     // Check
1161     CeedCall(CeedOperatorCheckReady(*op_restrict));
1162 
1163     // Cleanup
1164     CeedCall(CeedQFunctionDestroy(&qf_restrict));
1165   }
1166 
1167   // Prolongation
1168   if (op_prolong) {
1169     CeedInt             *num_comp_p_data;
1170     CeedQFunctionContext ctx_p;
1171     CeedQFunction        qf_prolong;
1172 
1173     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
1174     CeedCall(CeedCalloc(1, &num_comp_p_data));
1175     num_comp_p_data[0] = num_comp;
1176     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
1177     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
1178     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
1179     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
1180     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
1181     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
1182     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
1183     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
1184 
1185     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
1186     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
1187     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
1188     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
1189 
1190     // Set name
1191     char *prolongation_name;
1192 
1193     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
1194     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
1195     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
1196     CeedCall(CeedFree(&prolongation_name));
1197 
1198     // Check
1199     CeedCall(CeedOperatorCheckReady(*op_prolong));
1200 
1201     // Cleanup
1202     CeedCall(CeedQFunctionDestroy(&qf_prolong));
1203   }
1204 
1205   // Check
1206   CeedCall(CeedOperatorCheckReady(*op_coarse));
1207 
1208   // Cleanup
1209   CeedCall(CeedDestroy(&ceed));
1210   CeedCall(CeedVectorDestroy(&mult_vec));
1211   CeedCall(CeedElemRestrictionDestroy(&rstr_fine));
1212   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
1213   CeedCall(CeedBasisDestroy(&basis_c_to_f));
1214   return CEED_ERROR_SUCCESS;
1215 }
1216 
1217 /**
1218   @brief Build 1D mass matrix and Laplacian with perturbation
1219 
1220   @param[in]  interp_1d   Interpolation matrix in one dimension
1221   @param[in]  grad_1d     Gradient matrix in one dimension
1222   @param[in]  q_weight_1d Quadrature weights in one dimension
1223   @param[in]  P_1d        Number of basis nodes in one dimension
1224   @param[in]  Q_1d        Number of quadrature points in one dimension
1225   @param[in]  dim         Dimension of basis
1226   @param[out] mass        Assembled mass matrix in one dimension
1227   @param[out] laplace     Assembled perturbed Laplacian in one dimension
1228 
1229   @return An error code: 0 - success, otherwise - failure
1230 
1231   @ref Developer
1232 **/
1233 CeedPragmaOptimizeOff
1234 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
1235                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
1236   for (CeedInt i = 0; i < P_1d; i++) {
1237     for (CeedInt j = 0; j < P_1d; j++) {
1238       CeedScalar sum = 0.0;
1239       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
1240       mass[i + j * P_1d] = sum;
1241     }
1242   }
1243   // -- Laplacian
1244   for (CeedInt i = 0; i < P_1d; i++) {
1245     for (CeedInt j = 0; j < P_1d; j++) {
1246       CeedScalar sum = 0.0;
1247 
1248       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
1249       laplace[i + j * P_1d] = sum;
1250     }
1251   }
1252   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
1253   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
1254   return CEED_ERROR_SUCCESS;
1255 }
1256 CeedPragmaOptimizeOn
1257 
1258 /// @}
1259 
1260 /// ----------------------------------------------------------------------------
1261 /// CeedOperator Backend API
1262 /// ----------------------------------------------------------------------------
1263 /// @addtogroup CeedOperatorBackend
1264 /// @{
1265 
1266 /**
1267   @brief Select correct basis matrix pointer based on @ref CeedEvalMode
1268 
1269   @param[in]  basis     `CeedBasis` from which to get the basis matrix
1270   @param[in]  eval_mode Current basis evaluation mode
1271   @param[in]  identity  Pointer to identity matrix
1272   @param[out] basis_ptr `CeedBasis` pointer to set
1273 
1274   @ref Backend
1275 **/
1276 int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
1277   switch (eval_mode) {
1278     case CEED_EVAL_NONE:
1279       *basis_ptr = identity;
1280       break;
1281     case CEED_EVAL_INTERP:
1282       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
1283       break;
1284     case CEED_EVAL_GRAD:
1285       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
1286       break;
1287     case CEED_EVAL_DIV:
1288       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
1289       break;
1290     case CEED_EVAL_CURL:
1291       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
1292       break;
1293     case CEED_EVAL_WEIGHT:
1294       break;  // Caught by QF Assembly
1295   }
1296   assert(*basis_ptr != NULL);
1297   return CEED_ERROR_SUCCESS;
1298 }
1299 
1300 /**
1301   @brief Create point block restriction for active `CeedOperatorField`
1302 
1303   @param[in]  rstr             Original `CeedElemRestriction` for active field
1304   @param[out] point_block_rstr Address of the variable where the newly created `CeedElemRestriction` will be stored
1305 
1306   @return An error code: 0 - success, otherwise - failure
1307 
1308   @ref Backend
1309 **/
1310 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) {
1311   Ceed           ceed;
1312   CeedInt        num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets;
1313   CeedSize       l_size;
1314   const CeedInt *offsets;
1315 
1316   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
1317   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
1318 
1319   // Expand offsets
1320   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
1321   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
1322   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
1323   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
1324   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
1325   shift = num_comp;
1326   if (comp_stride != 1) shift *= num_comp;
1327   CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets));
1328   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
1329     point_block_offsets[i] = offsets[i] * shift;
1330   }
1331 
1332   // Create new restriction
1333   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
1334                                      point_block_offsets, point_block_rstr));
1335 
1336   // Cleanup
1337   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
1338   CeedCall(CeedDestroy(&ceed));
1339   return CEED_ERROR_SUCCESS;
1340 }
1341 
1342 /**
1343   @brief Get `CeedQFunctionAssemblyData`
1344 
1345   @param[in]  op   `CeedOperator` to assemble
1346   @param[out] data `CeedQFunctionAssemblyData`
1347 
1348   @return An error code: 0 - success, otherwise - failure
1349 
1350   @ref Backend
1351 **/
1352 int CeedOperatorGetQFunctionAssemblyData(CeedOperator op, CeedQFunctionAssemblyData *data) {
1353   if (!op->qf_assembled) {
1354     CeedQFunctionAssemblyData data;
1355 
1356     CeedCall(CeedQFunctionAssemblyDataCreate(op->ceed, &data));
1357     op->qf_assembled = data;
1358   }
1359   *data = op->qf_assembled;
1360   return CEED_ERROR_SUCCESS;
1361 }
1362 
1363 /**
1364   @brief Create object holding `CeedQFunction` assembly data for `CeedOperator`
1365 
1366   @param[in]  ceed `Ceed` object used to create the `CeedQFunctionAssemblyData`
1367   @param[out] data Address of the variable where the newly created `CeedQFunctionAssemblyData` will be stored
1368 
1369   @return An error code: 0 - success, otherwise - failure
1370 
1371   @ref Backend
1372 **/
1373 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
1374   CeedCall(CeedCalloc(1, data));
1375   (*data)->ref_count = 1;
1376   (*data)->ceed      = ceed;
1377   CeedCall(CeedReference(ceed));
1378   return CEED_ERROR_SUCCESS;
1379 }
1380 
1381 /**
1382   @brief Increment the reference counter for a `CeedQFunctionAssemblyData`
1383 
1384   @param[in,out] data `CeedQFunctionAssemblyData` to increment the reference counter
1385 
1386   @return An error code: 0 - success, otherwise - failure
1387 
1388   @ref Backend
1389 **/
1390 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1391   data->ref_count++;
1392   return CEED_ERROR_SUCCESS;
1393 }
1394 
1395 /**
1396   @brief Set re-use of `CeedQFunctionAssemblyData`
1397 
1398   @param[in,out] data       `CeedQFunctionAssemblyData` to mark for reuse
1399   @param[in]     reuse_data Boolean flag indicating data re-use
1400 
1401   @return An error code: 0 - success, otherwise - failure
1402 
1403   @ref Backend
1404 **/
1405 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
1406   data->reuse_data        = reuse_data;
1407   data->needs_data_update = true;
1408   return CEED_ERROR_SUCCESS;
1409 }
1410 
1411 /**
1412   @brief Mark `CeedQFunctionAssemblyData` as stale
1413 
1414   @param[in,out] data              `CeedQFunctionAssemblyData` to mark as stale
1415   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
1416 
1417   @return An error code: 0 - success, otherwise - failure
1418 
1419   @ref Backend
1420 **/
1421 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
1422   data->needs_data_update = needs_data_update;
1423   return CEED_ERROR_SUCCESS;
1424 }
1425 
1426 /**
1427   @brief Determine if `CeedQFunctionAssemblyData` needs update
1428 
1429   @param[in]  data             `CeedQFunctionAssemblyData` to mark as stale
1430   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1431 
1432   @return An error code: 0 - success, otherwise - failure
1433 
1434   @ref Backend
1435 **/
1436 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1437   *is_update_needed = !data->reuse_data || data->needs_data_update;
1438   return CEED_ERROR_SUCCESS;
1439 }
1440 
1441 /**
1442   @brief Copy the pointer to a `CeedQFunctionAssemblyData`.
1443 
1444   Both pointers should be destroyed with @ref CeedQFunctionAssemblyDataDestroy().
1445 
1446   Note: If the value of ` *data_copy` passed to this function is non-`NULL` , then it is assumed that ` *data_copy` is a pointer to a `CeedQFunctionAssemblyData`.
1447         This `CeedQFunctionAssemblyData` will be destroyed if ` *data_copy` is the only reference to this `CeedQFunctionAssemblyData`.
1448 
1449   @param[in]     data      `CeedQFunctionAssemblyData` to copy reference to
1450   @param[in,out] data_copy Variable to store copied reference
1451 
1452   @return An error code: 0 - success, otherwise - failure
1453 
1454   @ref Backend
1455 **/
1456 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1457   CeedCall(CeedQFunctionAssemblyDataReference(data));
1458   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1459   *data_copy = data;
1460   return CEED_ERROR_SUCCESS;
1461 }
1462 
1463 /**
1464   @brief Get setup status for internal objects for `CeedQFunctionAssemblyData`
1465 
1466   @param[in]  data     `CeedQFunctionAssemblyData` to retrieve status
1467   @param[out] is_setup Boolean flag for setup status
1468 
1469   @return An error code: 0 - success, otherwise - failure
1470 
1471   @ref Backend
1472 **/
1473 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1474   *is_setup = data->is_setup;
1475   return CEED_ERROR_SUCCESS;
1476 }
1477 
1478 /**
1479   @brief Set internal objects for `CeedQFunctionAssemblyData`
1480 
1481   @param[in,out] data `CeedQFunctionAssemblyData` to set objects
1482   @param[in]     vec  `CeedVector` to store assembled `CeedQFunction` at quadrature points
1483   @param[in]     rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
1484 
1485   @return An error code: 0 - success, otherwise - failure
1486 
1487   @ref Backend
1488 **/
1489 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1490   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1491   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1492 
1493   data->is_setup = true;
1494   return CEED_ERROR_SUCCESS;
1495 }
1496 
1497 /**
1498   @brief Get internal objects for `CeedQFunctionAssemblyData`
1499 
1500   @param[in,out] data `CeedQFunctionAssemblyData` to set objects
1501   @param[out]    vec  `CeedVector` to store assembled `CeedQFunction` at quadrature points
1502   @param[out]    rstr `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
1503 
1504   @return An error code: 0 - success, otherwise - failure
1505 
1506   @ref Backend
1507 **/
1508 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1509   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1510 
1511   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1512   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1513   return CEED_ERROR_SUCCESS;
1514 }
1515 
1516 /**
1517   @brief Destroy `CeedQFunctionAssemblyData`
1518 
1519   @param[in,out] data  `CeedQFunctionAssemblyData` to destroy
1520 
1521   @return An error code: 0 - success, otherwise - failure
1522 
1523   @ref Backend
1524 **/
1525 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1526   if (!*data || --(*data)->ref_count > 0) {
1527     *data = NULL;
1528     return CEED_ERROR_SUCCESS;
1529   }
1530   CeedCall(CeedDestroy(&(*data)->ceed));
1531   CeedCall(CeedVectorDestroy(&(*data)->vec));
1532   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1533 
1534   CeedCall(CeedFree(data));
1535   return CEED_ERROR_SUCCESS;
1536 }
1537 
1538 /**
1539   @brief Get `CeedOperatorAssemblyData`
1540 
1541   @param[in]  op   `CeedOperator` to assemble
1542   @param[out] data `CeedOperatorAssemblyData`
1543 
1544   @return An error code: 0 - success, otherwise - failure
1545 
1546   @ref Backend
1547 **/
1548 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1549   if (!op->op_assembled) {
1550     CeedOperatorAssemblyData data;
1551 
1552     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1553     op->op_assembled = data;
1554   }
1555   *data = op->op_assembled;
1556   return CEED_ERROR_SUCCESS;
1557 }
1558 
1559 /**
1560   @brief Create object holding `CeedOperator` assembly data.
1561 
1562   The `CeedOperatorAssemblyData` holds an array with references to every active `CeedBasis` used in the `CeedOperator`.
1563   An array with references to the corresponding active `CeedElemRestriction` is also stored.
1564   For each active `CeedBasis, the `CeedOperatorAssemblyData` holds an array of all input and output @ref CeedEvalMode for this `CeedBasis`.
1565   The `CeedOperatorAssemblyData` holds an array of offsets for indexing into the assembled `CeedQFunction` arrays to the row representing each @ref CeedEvalMode.
1566   The number of input columns across all active bases for the assembled `CeedQFunction` is also stored.
1567   Lastly, the `CeedOperatorAssembly` data holds assembled matrices representing the full action of the `CeedBasis` for all @ref CeedEvalMode.
1568 
1569   @param[in]  ceed `Ceed` object used to create the `CeedOperatorAssemblyData`
1570   @param[in]  op   `CeedOperator` to be assembled
1571   @param[out] data Address of the variable where the newly created `CeedOperatorAssemblyData` will be stored
1572 
1573   @return An error code: 0 - success, otherwise - failure
1574 
1575   @ref Backend
1576 **/
1577 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1578   CeedInt             num_active_bases_in = 0, num_active_bases_out = 0, offset = 0;
1579   CeedInt             num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL;
1580   CeedSize          **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1581   CeedEvalMode      **eval_modes_in = NULL, **eval_modes_out = NULL;
1582   CeedQFunctionField *qf_fields;
1583   CeedQFunction       qf;
1584   CeedOperatorField  *op_fields;
1585   bool                is_composite;
1586 
1587   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1588   CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators.");
1589 
1590   // Allocate
1591   CeedCall(CeedCalloc(1, data));
1592   (*data)->ceed = ceed;
1593   CeedCall(CeedReference(ceed));
1594 
1595   // Build OperatorAssembly data
1596   CeedCall(CeedOperatorGetQFunction(op, &qf));
1597 
1598   // Determine active input basis
1599   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1600   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1601   for (CeedInt i = 0; i < num_input_fields; i++) {
1602     CeedVector vec;
1603 
1604     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1605     if (vec == CEED_VECTOR_ACTIVE) {
1606       CeedInt      index = -1, num_comp, q_comp;
1607       CeedEvalMode eval_mode;
1608       CeedBasis    basis_in = NULL;
1609 
1610       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1611       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1612       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1613       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1614       for (CeedInt i = 0; i < num_active_bases_in; i++) {
1615         if ((*data)->active_bases_in[i] == basis_in) index = i;
1616       }
1617       if (index == -1) {
1618         CeedElemRestriction elem_rstr_in;
1619 
1620         index = num_active_bases_in;
1621         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in));
1622         (*data)->active_bases_in[num_active_bases_in] = NULL;
1623         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in]));
1624         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in));
1625         (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL;
1626         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1627         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in]));
1628         CeedCall(CeedElemRestrictionDestroy(&elem_rstr_in));
1629         CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in));
1630         num_eval_modes_in[index] = 0;
1631         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in));
1632         eval_modes_in[index] = NULL;
1633         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in));
1634         eval_mode_offsets_in[index] = NULL;
1635         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in));
1636         (*data)->assembled_bases_in[index] = NULL;
1637         num_active_bases_in++;
1638       }
1639       if (eval_mode != CEED_EVAL_WEIGHT) {
1640         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1641         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1642         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1643         for (CeedInt d = 0; d < q_comp; d++) {
1644           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1645           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1646           offset += num_comp;
1647         }
1648         num_eval_modes_in[index] += q_comp;
1649       }
1650       CeedCall(CeedBasisDestroy(&basis_in));
1651     }
1652     CeedCall(CeedVectorDestroy(&vec));
1653   }
1654 
1655   // Determine active output basis
1656   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1657   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1658   offset = 0;
1659   for (CeedInt i = 0; i < num_output_fields; i++) {
1660     CeedVector vec;
1661 
1662     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1663     if (vec == CEED_VECTOR_ACTIVE) {
1664       CeedInt      index = -1, num_comp, q_comp;
1665       CeedEvalMode eval_mode;
1666       CeedBasis    basis_out = NULL;
1667 
1668       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1669       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1670       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1671       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1672       for (CeedInt i = 0; i < num_active_bases_out; i++) {
1673         if ((*data)->active_bases_out[i] == basis_out) index = i;
1674       }
1675       if (index == -1) {
1676         CeedElemRestriction elem_rstr_out;
1677 
1678         index = num_active_bases_out;
1679         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out));
1680         (*data)->active_bases_out[num_active_bases_out] = NULL;
1681         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out]));
1682         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out));
1683         (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL;
1684         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1685         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out]));
1686         CeedCall(CeedElemRestrictionDestroy(&elem_rstr_out));
1687         CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out));
1688         num_eval_modes_out[index] = 0;
1689         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out));
1690         eval_modes_out[index] = NULL;
1691         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out));
1692         eval_mode_offsets_out[index] = NULL;
1693         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out));
1694         (*data)->assembled_bases_out[index] = NULL;
1695         num_active_bases_out++;
1696       }
1697       if (eval_mode != CEED_EVAL_WEIGHT) {
1698         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1699         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1700         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1701         for (CeedInt d = 0; d < q_comp; d++) {
1702           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1703           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1704           offset += num_comp;
1705         }
1706         num_eval_modes_out[index] += q_comp;
1707       }
1708       CeedCall(CeedBasisDestroy(&basis_out));
1709     }
1710     CeedCall(CeedVectorDestroy(&vec));
1711   }
1712   CeedCall(CeedQFunctionDestroy(&qf));
1713   (*data)->num_active_bases_in   = num_active_bases_in;
1714   (*data)->num_eval_modes_in     = num_eval_modes_in;
1715   (*data)->eval_modes_in         = eval_modes_in;
1716   (*data)->eval_mode_offsets_in  = eval_mode_offsets_in;
1717   (*data)->num_active_bases_out  = num_active_bases_out;
1718   (*data)->num_eval_modes_out    = num_eval_modes_out;
1719   (*data)->eval_modes_out        = eval_modes_out;
1720   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1721   (*data)->num_output_components = offset;
1722   return CEED_ERROR_SUCCESS;
1723 }
1724 
1725 /**
1726   @brief Get `CeedOperator` @ref CeedEvalMode for assembly.
1727 
1728   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1729 
1730   @param[in]  data                  `CeedOperatorAssemblyData`
1731   @param[out] num_active_bases_in   Total number of active bases for input
1732   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input @ref CeedEvalMode, or `NULL`.
1733                                       `eval_modes_in[0]` holds an array of eval modes for the first active `CeedBasis`.
1734   @param[out] eval_modes_in         Pointer to hold arrays of input @ref CeedEvalMode, or `NULL`
1735   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point
1736   @param[out] num_active_bases_out  Total number of active bases for output
1737   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output @ref CeedEvalMode, or `NULL`
1738   @param[out] eval_modes_out        Pointer to hold arrays of output @ref CeedEvalMode, or `NULL`
1739   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1740   @param[out] num_output_components The number of columns in the assembled `CeedQFunction` matrix for each quadrature point, including contributions of all active bases
1741 
1742   @return An error code: 0 - success, otherwise - failure
1743 
1744   @ref Backend
1745 **/
1746 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in,
1747                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out,
1748                                          CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out,
1749                                          CeedSize *num_output_components) {
1750   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1751   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1752   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1753   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1754   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1755   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1756   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1757   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1758   if (num_output_components) *num_output_components = data->num_output_components;
1759   return CEED_ERROR_SUCCESS;
1760 }
1761 
1762 /**
1763   @brief Get `CeedOperator` `CeedBasis` data for assembly.
1764 
1765   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1766 
1767   @param[in]  data                 `CeedOperatorAssemblyData`
1768   @param[out] num_active_bases_in  Number of active input bases, or `NULL`
1769   @param[out] active_bases_in      Pointer to hold active input `CeedBasis`, or `NULL`
1770   @param[out] assembled_bases_in   Pointer to hold assembled active input `B` , or `NULL`
1771   @param[out] num_active_bases_out Number of active output bases, or `NULL`
1772   @param[out] active_bases_out     Pointer to hold active output `CeedBasis`, or `NULL`
1773   @param[out] assembled_bases_out  Pointer to hold assembled active output `B` , or `NULL`
1774 
1775   @return An error code: 0 - success, otherwise - failure
1776 
1777   @ref Backend
1778 **/
1779 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in,
1780                                      const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out,
1781                                      const CeedScalar ***assembled_bases_out) {
1782   // Assemble B_in, B_out if needed
1783   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1784     CeedInt num_qpts;
1785 
1786     if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts));
1787     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts));
1788     for (CeedInt b = 0; b < data->num_active_bases_in; b++) {
1789       bool        has_eval_none = false;
1790       CeedInt     num_nodes;
1791       CeedScalar *B_in = NULL, *identity = NULL;
1792 
1793       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes));
1794       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1795 
1796       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1797         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1798       }
1799       if (has_eval_none) {
1800         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1801         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1802           identity[i * num_nodes + i] = 1.0;
1803         }
1804       }
1805 
1806       for (CeedInt q = 0; q < num_qpts; q++) {
1807         for (CeedInt n = 0; n < num_nodes; n++) {
1808           CeedInt      d_in              = 0, q_comp_in;
1809           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1810 
1811           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1812             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1813             const CeedScalar *B  = NULL;
1814 
1815             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B));
1816             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in));
1817             if (q_comp_in > 1) {
1818               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1819               else B = &B[(++d_in) * num_qpts * num_nodes];
1820             }
1821             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1822             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1823           }
1824         }
1825       }
1826       if (identity) CeedCall(CeedFree(&identity));
1827       data->assembled_bases_in[b] = B_in;
1828     }
1829   }
1830 
1831   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1832     CeedInt num_qpts;
1833 
1834     if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts));
1835     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts));
1836     for (CeedInt b = 0; b < data->num_active_bases_out; b++) {
1837       bool        has_eval_none = false;
1838       CeedInt     num_nodes;
1839       CeedScalar *B_out = NULL, *identity = NULL;
1840 
1841       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes));
1842       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1843 
1844       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1845         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1846       }
1847       if (has_eval_none) {
1848         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1849         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1850           identity[i * num_nodes + i] = 1.0;
1851         }
1852       }
1853 
1854       for (CeedInt q = 0; q < num_qpts; q++) {
1855         for (CeedInt n = 0; n < num_nodes; n++) {
1856           CeedInt      d_out              = 0, q_comp_out;
1857           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1858 
1859           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1860             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1861             const CeedScalar *B  = NULL;
1862 
1863             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B));
1864             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out));
1865             if (q_comp_out > 1) {
1866               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1867               else B = &B[(++d_out) * num_qpts * num_nodes];
1868             }
1869             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1870             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1871           }
1872         }
1873       }
1874       if (identity) CeedCall(CeedFree(&identity));
1875       data->assembled_bases_out[b] = B_out;
1876     }
1877   }
1878 
1879   // Pass out assembled data
1880   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1881   if (active_bases_in) *active_bases_in = data->active_bases_in;
1882   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1883   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1884   if (active_bases_out) *active_bases_out = data->active_bases_out;
1885   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1886   return CEED_ERROR_SUCCESS;
1887 }
1888 
1889 /**
1890   @brief Get `CeedOperator` `CeedBasis` data for assembly.
1891 
1892   Note: See @ref CeedOperatorAssemblyDataCreate() for a full description of the data stored in this object.
1893 
1894   @param[in]  data                      `CeedOperatorAssemblyData`
1895   @param[out] num_active_elem_rstrs_in  Number of active input element restrictions, or `NULL`
1896   @param[out] active_elem_rstrs_in      Pointer to hold active input `CeedElemRestriction`, or `NULL`
1897   @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or `NULL`
1898   @param[out] active_elem_rstrs_out     Pointer to hold active output `CeedElemRestriction`, or `NULL`
1899 
1900   @return An error code: 0 - success, otherwise - failure
1901 
1902   @ref Backend
1903 **/
1904 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in,
1905                                                 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out,
1906                                                 CeedElemRestriction **active_elem_rstrs_out) {
1907   if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in;
1908   if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in;
1909   if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out;
1910   if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out;
1911   return CEED_ERROR_SUCCESS;
1912 }
1913 
1914 /**
1915   @brief Destroy `CeedOperatorAssemblyData`
1916 
1917   @param[in,out] data `CeedOperatorAssemblyData` to destroy
1918 
1919   @return An error code: 0 - success, otherwise - failure
1920 
1921   @ref Backend
1922 **/
1923 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1924   if (!*data) {
1925     *data = NULL;
1926     return CEED_ERROR_SUCCESS;
1927   }
1928   CeedCall(CeedDestroy(&(*data)->ceed));
1929   for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) {
1930     CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b]));
1931     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b]));
1932     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1933     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1934     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1935   }
1936   for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) {
1937     CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b]));
1938     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b]));
1939     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1940     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1941     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1942   }
1943   CeedCall(CeedFree(&(*data)->active_bases_in));
1944   CeedCall(CeedFree(&(*data)->active_bases_out));
1945   CeedCall(CeedFree(&(*data)->active_elem_rstrs_in));
1946   CeedCall(CeedFree(&(*data)->active_elem_rstrs_out));
1947   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1948   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1949   CeedCall(CeedFree(&(*data)->eval_modes_in));
1950   CeedCall(CeedFree(&(*data)->eval_modes_out));
1951   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1952   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1953   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1954   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1955 
1956   CeedCall(CeedFree(data));
1957   return CEED_ERROR_SUCCESS;
1958 }
1959 
1960 /**
1961   @brief Retrieve fallback `CeedOperator` with a reference `Ceed` for advanced `CeedOperator` functionality
1962 
1963   @param[in]  op          `CeedOperator` to retrieve fallback for
1964   @param[out] op_fallback Fallback `CeedOperator`
1965 
1966   @return An error code: 0 - success, otherwise - failure
1967 
1968   @ref Backend
1969 **/
1970 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
1971   // Create if needed
1972   if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op));
1973   if (op->op_fallback) {
1974     bool is_debug;
1975     Ceed ceed;
1976 
1977     CeedCall(CeedOperatorGetCeed(op, &ceed));
1978     CeedCall(CeedIsDebug(ceed, &is_debug));
1979     if (is_debug) {
1980       Ceed        ceed_fallback;
1981       const char *resource, *resource_fallback, *op_name;
1982 
1983       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
1984       CeedCall(CeedGetResource(ceed, &resource));
1985       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
1986       CeedCall(CeedOperatorGetName(op, &op_name));
1987 
1988       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
1989       CeedDebug(ceed, "Falling back from Operator with backend %s at address %p to Operator with backend %s at address %p for CeedOperator \"%s\"\n",
1990                 resource, op, resource_fallback, op->op_fallback, op_name);
1991       CeedCall(CeedDestroy(&ceed_fallback));
1992     }
1993     CeedCall(CeedDestroy(&ceed));
1994   }
1995   *op_fallback = op->op_fallback;
1996   return CEED_ERROR_SUCCESS;
1997 }
1998 
1999 /**
2000   @brief Get the parent `CeedOperator` for a fallback `CeedOperator`
2001 
2002   @param[in]  op     `CeedOperator` context
2003   @param[out] parent Variable to store parent `CeedOperator` context
2004 
2005   @return An error code: 0 - success, otherwise - failure
2006 
2007   @ref Backend
2008 **/
2009 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) {
2010   *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL;
2011   return CEED_ERROR_SUCCESS;
2012 }
2013 
2014 /**
2015   @brief Get the `Ceed` context of the parent `CeedOperator` for a fallback `CeedOperator`
2016 
2017   @param[in]  op     `CeedOperator` context
2018   @param[out] parent Variable to store parent `Ceed` context
2019 
2020   @return An error code: 0 - success, otherwise - failure
2021 
2022   @ref Backend
2023 **/
2024 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) {
2025   *parent = NULL;
2026   if (op->op_fallback_parent) CeedCall(CeedReferenceCopy(op->op_fallback_parent->ceed, parent));
2027   else CeedCall(CeedReferenceCopy(CeedOperatorReturnCeed(op), parent));
2028   return CEED_ERROR_SUCCESS;
2029 }
2030 
2031 /// @}
2032 
2033 /// ----------------------------------------------------------------------------
2034 /// CeedOperator Public API
2035 /// ----------------------------------------------------------------------------
2036 /// @addtogroup CeedOperatorUser
2037 /// @{
2038 
2039 /**
2040   @brief Assemble a linear `CeedQFunction` associated with a `CeedOperator`.
2041 
2042   This returns a `CeedVector` containing a matrix at each quadrature point providing the action of the `CeedQFunction` associated with the `CeedOperator`.
2043   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices representing the action of the `CeedQFunction` for a corresponding quadrature point on an element.
2044 
2045   Inputs and outputs are in the order provided by the user when adding `CeedOperator` fields.
2046   For example, a `CeedQFunction` with inputs `u` and `gradu` and outputs `gradv` and `v` , provided in that order, would result in an assembled `CeedQFunction` that consists of `(1 + dim) x (dim + 1)` matrices at each quadrature point acting on the input ` [u, du_0, du_1]` and producing the output `[dv_0, dv_1, v]`.
2047 
2048   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2049 
2050   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2051   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
2052   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
2053   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2054 
2055   @return An error code: 0 - success, otherwise - failure
2056 
2057   @ref User
2058 **/
2059 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
2060   CeedCall(CeedOperatorCheckReady(op));
2061 
2062   if (op->LinearAssembleQFunction) {
2063     // Backend version
2064     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
2065   } else {
2066     // Operator fallback
2067     CeedOperator op_fallback;
2068 
2069     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleQFunction\n");
2070     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2071     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
2072     else return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
2073   }
2074   return CEED_ERROR_SUCCESS;
2075 }
2076 
2077 /**
2078   @brief Assemble `CeedQFunction` and store result internally.
2079 
2080   Return copied references of stored data to the caller.
2081   Caller is responsible for ownership and destruction of the copied references.
2082   See also @ref CeedOperatorLinearAssembleQFunction().
2083 
2084   Note: If the value of `assembled` or `rstr` passed to this function are non-`NULL` , then it is assumed that they hold valid pointers.
2085         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
2086 
2087   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2088   @param[out] assembled `CeedVector` to store assembled `CeedQFunction` at quadrature points
2089   @param[out] rstr      `CeedElemRestriction` for `CeedVector` containing assembled `CeedQFunction`
2090   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2091 
2092   @return An error code: 0 - success, otherwise - failure
2093 
2094   @ref User
2095 **/
2096 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
2097   return CeedOperatorLinearAssembleQFunctionBuildOrUpdate_Core(op, true, assembled, rstr, request);
2098 }
2099 
2100 /**
2101   @brief Assemble the diagonal of a square linear `CeedOperator`
2102 
2103   This overwrites a `CeedVector` with the diagonal of a linear `CeedOperator`.
2104 
2105   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2106 
2107   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2108 
2109   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2110   @param[out] assembled `CeedVector` to store assembled `CeedOperator` diagonal
2111   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2112 
2113   @return An error code: 0 - success, otherwise - failure
2114 
2115   @ref User
2116 **/
2117 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2118   bool     is_composite;
2119   CeedSize input_size = 0, output_size = 0;
2120 
2121   CeedCall(CeedOperatorCheckReady(op));
2122   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2123 
2124   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2125   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2126 
2127   // Early exit for empty operator
2128   if (!is_composite) {
2129     CeedInt num_elem = 0;
2130 
2131     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2132     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2133   }
2134 
2135   if (op->LinearAssembleDiagonal) {
2136     // Backend version
2137     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
2138     return CEED_ERROR_SUCCESS;
2139   } else if (op->LinearAssembleAddDiagonal) {
2140     // Backend version with zeroing first
2141     CeedCall(CeedVectorSetValue(assembled, 0.0));
2142     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
2143     return CEED_ERROR_SUCCESS;
2144   } else if (is_composite) {
2145     // Default to summing contributions of suboperators
2146     CeedCall(CeedVectorSetValue(assembled, 0.0));
2147     CeedCall(CeedOperatorLinearAssembleAddDiagonalComposite(op, request, false, assembled));
2148     return CEED_ERROR_SUCCESS;
2149   } else {
2150     // Operator fallback
2151     CeedOperator op_fallback;
2152 
2153     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleDiagonal\n");
2154     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2155     if (op_fallback) {
2156       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
2157       return CEED_ERROR_SUCCESS;
2158     }
2159   }
2160   // Default interface implementation
2161   CeedCall(CeedVectorSetValue(assembled, 0.0));
2162   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
2163   return CEED_ERROR_SUCCESS;
2164 }
2165 
2166 /**
2167   @brief Assemble the diagonal of a square linear `CeedOperator`.
2168 
2169   This sums into a `CeedVector` the diagonal of a linear `CeedOperator`.
2170 
2171   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2172 
2173   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2174 
2175   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2176   @param[out] assembled `CeedVector` to store assembled `CeedOperator` diagonal
2177   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2178 
2179   @return An error code: 0 - success, otherwise - failure
2180 
2181   @ref User
2182 **/
2183 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2184   bool     is_composite;
2185   CeedSize input_size = 0, output_size = 0;
2186 
2187   CeedCall(CeedOperatorCheckReady(op));
2188   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2189 
2190   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2191   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2192 
2193   // Early exit for empty operator
2194   if (!is_composite) {
2195     CeedInt num_elem = 0;
2196 
2197     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2198     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2199   }
2200 
2201   if (op->LinearAssembleAddDiagonal) {
2202     // Backend version
2203     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
2204     return CEED_ERROR_SUCCESS;
2205   } else if (is_composite) {
2206     // Default to summing contributions of suboperators
2207     CeedCall(CeedOperatorLinearAssembleAddDiagonalComposite(op, request, false, assembled));
2208     return CEED_ERROR_SUCCESS;
2209   } else {
2210     // Operator fallback
2211     CeedOperator op_fallback;
2212 
2213     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleAddDiagonal\n");
2214     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2215     if (op_fallback) {
2216       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
2217       return CEED_ERROR_SUCCESS;
2218     }
2219   }
2220   // Default interface implementation
2221   CeedCall(CeedOperatorLinearAssembleAddDiagonalSingle(op, request, false, assembled));
2222   return CEED_ERROR_SUCCESS;
2223 }
2224 
2225 /**
2226    @brief Fully assemble the point-block diagonal pattern of a linear `CeedOperator`.
2227 
2228    Expected to be used in conjunction with @ref CeedOperatorLinearAssemblePointBlockDiagonal().
2229 
2230    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2231    Note that the `(i, j)` pairs are unique.
2232    This function returns the number of entries and their `(i, j)` locations, while @ref CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in the same ordering.
2233 
2234    This will generally be slow unless your operator is low-order.
2235 
2236    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2237 
2238    @param[in]  op          `CeedOperator` to assemble
2239    @param[out] num_entries Number of entries in coordinate nonzero pattern
2240    @param[out] rows        Row number for each entry
2241    @param[out] cols        Column number for each entry
2242 
2243    @ref User
2244 **/
2245 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2246   bool          is_composite;
2247   CeedInt       num_active_components, num_sub_operators;
2248   CeedOperator *sub_operators;
2249 
2250   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2251 
2252   CeedSize input_size = 0, output_size = 0;
2253   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2254   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2255 
2256   if (is_composite) {
2257     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_sub_operators));
2258     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
2259   } else {
2260     sub_operators     = &op;
2261     num_sub_operators = 1;
2262   }
2263 
2264   // Verify operator can be assembled correctly
2265   {
2266     CeedOperatorAssemblyData data;
2267     CeedInt                  num_active_elem_rstrs, comp_stride;
2268     CeedElemRestriction     *active_elem_rstrs;
2269 
2270     // Get initial values to check against
2271     CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data));
2272     CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
2273     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride));
2274     CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components));
2275 
2276     // Verify that all active element restrictions have same component stride and number of components
2277     for (CeedInt k = 0; k < num_sub_operators; k++) {
2278       CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data));
2279       CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
2280       for (CeedInt i = 0; i < num_active_elem_rstrs; i++) {
2281         CeedInt comp_stride_sub, num_active_components_sub;
2282 
2283         CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub));
2284         CeedCheck(comp_stride == comp_stride_sub, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION,
2285                   "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub);
2286         CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub));
2287         CeedCheck(num_active_components == num_active_components_sub, CeedOperatorReturnCeed(op), CEED_ERROR_INCOMPATIBLE,
2288                   "All suboperators must have the same number of output components."
2289                   " Previous: %" CeedInt_FMT " Current: %" CeedInt_FMT,
2290                   num_active_components, num_active_components_sub);
2291       }
2292     }
2293   }
2294   *num_entries = input_size * num_active_components;
2295   CeedCall(CeedCalloc(*num_entries, rows));
2296   CeedCall(CeedCalloc(*num_entries, cols));
2297 
2298   for (CeedInt o = 0; o < num_sub_operators; o++) {
2299     CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr;
2300     CeedInt             comp_stride, num_elem, elem_size;
2301     const CeedInt      *offsets, *point_block_offsets;
2302 
2303     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr));
2304     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride));
2305     CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem));
2306     CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size));
2307     CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets));
2308 
2309     CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr));
2310     CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets));
2311 
2312     for (CeedSize i = 0; i < num_elem * elem_size; i++) {
2313       for (CeedInt c_out = 0; c_out < num_active_components; c_out++) {
2314         for (CeedInt c_in = 0; c_in < num_active_components; c_in++) {
2315           (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride;
2316           (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride;
2317         }
2318       }
2319     }
2320 
2321     CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets));
2322     CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets));
2323     CeedCall(CeedElemRestrictionDestroy(&active_elem_rstr));
2324     CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr));
2325   }
2326   return CEED_ERROR_SUCCESS;
2327 }
2328 
2329 /**
2330   @brief Assemble the point block diagonal of a square linear `CeedOperator`.
2331 
2332   This overwrites a `CeedVector` with the point block diagonal of a linear `CeedOperator`.
2333 
2334   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2335 
2336   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2337 
2338   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2339   @param[out] assembled `CeedVector` to store assembled `CeedOperator` point block diagonal, provided in row-major form with an `num_comp * num_comp` block at each node.
2340                           The dimensions of this vector are derived from the active vector for the `CeedOperator`.
2341                           The array has shape `[nodes, component out, component in]`.
2342   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2343 
2344   @return An error code: 0 - success, otherwise - failure
2345 
2346   @ref User
2347 **/
2348 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2349   bool     is_composite;
2350   CeedSize input_size = 0, output_size = 0;
2351 
2352   CeedCall(CeedOperatorCheckReady(op));
2353   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2354 
2355   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2356   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2357 
2358   // Early exit for empty operator
2359   if (!is_composite) {
2360     CeedInt num_elem = 0;
2361 
2362     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2363     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2364   }
2365 
2366   if (op->LinearAssemblePointBlockDiagonal) {
2367     // Backend version
2368     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
2369     return CEED_ERROR_SUCCESS;
2370   } else if (op->LinearAssembleAddPointBlockDiagonal) {
2371     // Backend version with zeroing first
2372     CeedCall(CeedVectorSetValue(assembled, 0.0));
2373     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2374     return CEED_ERROR_SUCCESS;
2375   } else {
2376     // Operator fallback
2377     CeedOperator op_fallback;
2378 
2379     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssemblePointBlockDiagonal\n");
2380     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2381     if (op_fallback) {
2382       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
2383       return CEED_ERROR_SUCCESS;
2384     }
2385   }
2386   // Default interface implementation
2387   CeedCall(CeedVectorSetValue(assembled, 0.0));
2388   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2389   return CEED_ERROR_SUCCESS;
2390 }
2391 
2392 /**
2393   @brief Assemble the point block diagonal of a square linear `CeedOperator`.
2394 
2395   This sums into a `CeedVector` with the point block diagonal of a linear `CeedOperator`.
2396 
2397   Note: Currently only non-composite `CeedOperator` with a single field and composite `CeedOperator` with single field sub-operators are supported.
2398 
2399   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2400 
2401   @param[in]  op        `CeedOperator` to assemble `CeedQFunction`
2402   @param[out] assembled `CeedVector` to store assembled CeedOperator point block diagonal, provided in row-major form with an `num_comp * num_comp` block at each node.
2403                           The dimensions of this vector are derived from the active vector for the `CeedOperator`.
2404                           The array has shape `[nodes, component out, component in]`.
2405   @param[in]  request   Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2406 
2407   @return An error code: 0 - success, otherwise - failure
2408 
2409   @ref User
2410 **/
2411 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2412   bool     is_composite;
2413   CeedSize input_size = 0, output_size = 0;
2414 
2415   CeedCall(CeedOperatorCheckReady(op));
2416   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2417 
2418   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2419   CeedCheck(input_size == output_size, CeedOperatorReturnCeed(op), CEED_ERROR_DIMENSION, "Operator must be square");
2420 
2421   // Early exit for empty operator
2422   if (!is_composite) {
2423     CeedInt num_elem = 0;
2424 
2425     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2426     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2427   }
2428 
2429   if (op->LinearAssembleAddPointBlockDiagonal) {
2430     // Backend version
2431     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
2432     return CEED_ERROR_SUCCESS;
2433   } else {
2434     // Operator fallback
2435     CeedOperator op_fallback;
2436 
2437     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleAddPointBlockDiagonal\n");
2438     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2439     if (op_fallback) {
2440       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
2441       return CEED_ERROR_SUCCESS;
2442     }
2443   }
2444   // Default interface implementation
2445   if (is_composite) {
2446     CeedCall(CeedOperatorLinearAssembleAddDiagonalComposite(op, request, true, assembled));
2447   } else {
2448     CeedCall(CeedOperatorLinearAssembleAddDiagonalSingle(op, request, true, assembled));
2449   }
2450   return CEED_ERROR_SUCCESS;
2451 }
2452 
2453 /**
2454    @brief Fully assemble the nonzero pattern of a linear `CeedOperator`.
2455 
2456    Expected to be used in conjunction with @ref CeedOperatorLinearAssemble().
2457 
2458    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2459    Note that the `(i, j)` pairs are not unique and may repeat.
2460    This function returns the number of entries and their `(i, j)` locations, while @ref CeedOperatorLinearAssemble() provides the values in the same ordering.
2461 
2462    This will generally be slow unless your operator is low-order.
2463 
2464    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2465 
2466    @param[in]  op          `CeedOperator` to assemble
2467    @param[out] num_entries Number of entries in coordinate nonzero pattern
2468    @param[out] rows        Row number for each entry
2469    @param[out] cols        Column number for each entry
2470 
2471    @ref User
2472 **/
2473 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2474   bool          is_composite;
2475   CeedInt       num_suboperators, offset = 0;
2476   CeedSize      single_entries;
2477   CeedOperator *sub_operators;
2478 
2479   CeedCall(CeedOperatorCheckReady(op));
2480   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2481 
2482   if (op->LinearAssembleSymbolic) {
2483     // Backend version
2484     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
2485     return CEED_ERROR_SUCCESS;
2486   } else {
2487     // Operator fallback
2488     CeedOperator op_fallback;
2489 
2490     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssembleSymbolic\n");
2491     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2492     if (op_fallback) {
2493       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
2494       return CEED_ERROR_SUCCESS;
2495     }
2496   }
2497 
2498   // Default interface implementation
2499 
2500   // Count entries and allocate rows, cols arrays
2501   CeedCall(CeedOperatorLinearAssembleGetNumEntries(op, num_entries));
2502   CeedCall(CeedCalloc(*num_entries, rows));
2503   CeedCall(CeedCalloc(*num_entries, cols));
2504 
2505   // Assemble nonzero locations
2506   if (is_composite) {
2507     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
2508     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
2509     for (CeedInt k = 0; k < num_suboperators; ++k) {
2510       CeedCall(CeedOperatorAssembleSymbolicSingle(sub_operators[k], offset, *rows, *cols));
2511       CeedCall(CeedOperatorAssemblyCountEntriesSingle(sub_operators[k], &single_entries));
2512       offset += single_entries;
2513     }
2514   } else {
2515     CeedCall(CeedOperatorAssembleSymbolicSingle(op, offset, *rows, *cols));
2516   }
2517   return CEED_ERROR_SUCCESS;
2518 }
2519 
2520 /**
2521    @brief Fully assemble the nonzero entries of a linear operator.
2522 
2523    Expected to be used in conjunction with @ref CeedOperatorLinearAssembleSymbolic().
2524 
2525    The assembly routines use coordinate format, with `num_entries` tuples of the form `(i, j, value)` which indicate that value should be added to the matrix in entry `(i, j)`.
2526    Note that the `(i, j)` pairs are not unique and may repeat.
2527    This function returns the values of the nonzero entries to be added, their `(i, j)` locations are provided by @ref CeedOperatorLinearAssembleSymbolic().
2528 
2529    This will generally be slow unless your operator is low-order.
2530 
2531    Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2532 
2533    @param[in]  op     `CeedOperator` to assemble
2534    @param[out] values Values to assemble into matrix
2535 
2536    @ref User
2537 **/
2538 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
2539   bool          is_composite;
2540   CeedInt       num_suboperators, offset = 0;
2541   CeedSize      single_entries = 0;
2542   CeedOperator *sub_operators;
2543 
2544   CeedCall(CeedOperatorCheckReady(op));
2545   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2546 
2547   // Early exit for empty operator
2548   if (!is_composite) {
2549     CeedInt num_elem = 0;
2550 
2551     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2552     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2553   }
2554 
2555   if (op->LinearAssemble) {
2556     // Backend version
2557     CeedCall(op->LinearAssemble(op, values));
2558     return CEED_ERROR_SUCCESS;
2559   } else if (is_composite) {
2560     // Default to summing contributions of suboperators
2561     CeedCall(CeedVectorSetValue(values, 0.0));
2562     CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
2563     CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
2564     for (CeedInt k = 0; k < num_suboperators; k++) {
2565       CeedCall(CeedOperatorAssembleSingle(sub_operators[k], offset, values));
2566       CeedCall(CeedOperatorAssemblyCountEntriesSingle(sub_operators[k], &single_entries));
2567       offset += single_entries;
2568     }
2569     return CEED_ERROR_SUCCESS;
2570   } else if (op->LinearAssembleSingle) {
2571     CeedCall(CeedVectorSetValue(values, 0.0));
2572     CeedCall(CeedOperatorAssembleSingle(op, offset, values));
2573     return CEED_ERROR_SUCCESS;
2574   } else {
2575     // Operator fallback
2576     CeedOperator op_fallback;
2577 
2578     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorLinearAssemble\n");
2579     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2580     if (op_fallback) {
2581       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2582       return CEED_ERROR_SUCCESS;
2583     }
2584   }
2585 
2586   // Default to interface version if non-composite and no fallback
2587   CeedCall(CeedVectorSetValue(values, 0.0));
2588   CeedCall(CeedOperatorAssembleSingle(op, offset, values));
2589   return CEED_ERROR_SUCCESS;
2590 }
2591 
2592 /**
2593   @brief Get the multiplicity of nodes across sub-operators in a composite `CeedOperator`.
2594 
2595   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2596 
2597   @param[in]  op               Composite `CeedOperator`
2598   @param[in]  num_skip_indices Number of sub-operators to skip
2599   @param[in]  skip_indices     Array of indices of sub-operators to skip
2600   @param[out] mult             Vector to store multiplicity (of size `l_size` )
2601 
2602   @return An error code: 0 - success, otherwise - failure
2603 
2604   @ref User
2605 **/
2606 int CeedOperatorCompositeGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2607   Ceed                ceed;
2608   CeedInt             num_suboperators;
2609   CeedSize            l_vec_len;
2610   CeedScalar         *mult_array;
2611   CeedVector          ones_l_vec;
2612   CeedElemRestriction elem_rstr, mult_elem_rstr;
2613   CeedOperator       *sub_operators;
2614 
2615   CeedCall(CeedOperatorCheckReady(op));
2616 
2617   // Zero mult vector
2618   CeedCall(CeedVectorSetValue(mult, 0.0));
2619 
2620   // Get suboperators
2621   CeedCall(CeedOperatorCompositeGetNumSub(op, &num_suboperators));
2622   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2623   CeedCall(CeedOperatorCompositeGetSubList(op, &sub_operators));
2624 
2625   // Work vector
2626   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2627   CeedCall(CeedOperatorGetCeed(op, &ceed));
2628   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2629   CeedCall(CeedDestroy(&ceed));
2630   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2631   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2632 
2633   // Compute multiplicity across suboperators
2634   for (CeedInt i = 0; i < num_suboperators; i++) {
2635     const CeedScalar *sub_mult_array;
2636     CeedVector        sub_mult_l_vec, ones_e_vec;
2637 
2638     // -- Check for suboperator to skip
2639     for (CeedInt j = 0; j < num_skip_indices; j++) {
2640       if (skip_indices[j] == i) continue;
2641     }
2642 
2643     // -- Sub operator multiplicity
2644     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2645     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr));
2646     CeedCall(CeedElemRestrictionDestroy(&elem_rstr));
2647     CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2648     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2649     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2650     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2651     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2652     // ---- Flag every node present in the current suboperator
2653     for (CeedSize j = 0; j < l_vec_len; j++) {
2654       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2655     }
2656     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2657     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2658     CeedCall(CeedVectorDestroy(&ones_e_vec));
2659     CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr));
2660   }
2661   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2662   CeedCall(CeedVectorDestroy(&ones_l_vec));
2663   return CEED_ERROR_SUCCESS;
2664 }
2665 
2666 /**
2667   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator`, creating the prolongation basis from the fine and coarse grid interpolation.
2668 
2669   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2670 
2671   @param[in]  op_fine      Fine grid `CeedOperator`
2672   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2673   @param[in]  rstr_coarse  Coarse grid `CeedElemRestriction`
2674   @param[in]  basis_coarse Coarse grid active vector `CeedBasis`
2675   @param[out] op_coarse    Coarse grid `CeedOperator`
2676   @param[out] op_prolong   Coarse to fine `CeedOperator`, or `NULL`
2677   @param[out] op_restrict  Fine to coarse `CeedOperator`, or `NULL`
2678 
2679   @return An error code: 0 - success, otherwise - failure
2680 
2681   @ref User
2682 **/
2683 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2684                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2685   CeedBasis basis_c_to_f = NULL;
2686 
2687   CeedCall(CeedOperatorCheckReady(op_fine));
2688 
2689   // Build prolongation matrix, if required
2690   if (op_prolong || op_restrict) {
2691     CeedBasis basis_fine;
2692 
2693     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2694     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2695     CeedCall(CeedBasisDestroy(&basis_fine));
2696   }
2697 
2698   // Core code
2699   CeedCall(
2700       CeedOperatorMultigridLevelCreateSingle_Core(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2701   return CEED_ERROR_SUCCESS;
2702 }
2703 
2704 /**
2705   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator` with a tensor basis for the active basis.
2706 
2707   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2708 
2709   @param[in]  op_fine       Fine grid `CeedOperator`
2710   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2711   @param[in]  rstr_coarse   Coarse grid `CeedElemRestriction`
2712   @param[in]  basis_coarse  Coarse grid active vector `CeedBasis`
2713   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction `CeedOperator`
2714   @param[out] op_coarse     Coarse grid `CeedOperator`
2715   @param[out] op_prolong    Coarse to fine `CeedOperator`, or `NULL`
2716   @param[out] op_restrict   Fine to coarse `CeedOperator`, or `NULL`
2717 
2718   @return An error code: 0 - success, otherwise - failure
2719 
2720   @ref User
2721 **/
2722 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2723                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2724                                              CeedOperator *op_restrict) {
2725   Ceed      ceed;
2726   CeedInt   Q_f, Q_c;
2727   CeedBasis basis_fine, basis_c_to_f = NULL;
2728 
2729   CeedCall(CeedOperatorCheckReady(op_fine));
2730   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2731 
2732   // Check for compatible quadrature spaces
2733   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2734   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2735   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2736   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION,
2737             "Bases must have compatible quadrature spaces."
2738             " Fine grid: %" CeedInt_FMT " points, Coarse grid: %" CeedInt_FMT " points",
2739             Q_f, Q_c);
2740 
2741   // Create coarse to fine basis, if required
2742   if (op_prolong || op_restrict) {
2743     CeedInt     dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2744     CeedScalar *q_ref, *q_weight, *grad;
2745 
2746     // Check if interpolation matrix is provided
2747     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2748               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2749     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2750     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2751     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2752     CeedCall(CeedBasisDestroy(&basis_fine));
2753     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2754     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2755     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2756     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2757     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2758     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2759     CeedCall(CeedFree(&q_ref));
2760     CeedCall(CeedFree(&q_weight));
2761     CeedCall(CeedFree(&grad));
2762   }
2763 
2764   // Core code
2765   CeedCall(
2766       CeedOperatorMultigridLevelCreateSingle_Core(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2767   CeedCall(CeedDestroy(&ceed));
2768   return CEED_ERROR_SUCCESS;
2769 }
2770 
2771 /**
2772   @brief Create a multigrid coarse `CeedOperator` and level transfer `CeedOperator` for a `CeedOperator` with a non-tensor basis for the active vector
2773 
2774   Note: Calling this function asserts that setup is complete and sets all four `CeedOperator` as immutable.
2775 
2776   @param[in]  op_fine       Fine grid `CeedOperator`
2777   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or `NULL` if not creating prolongation/restriction `CeedOperator`
2778   @param[in]  rstr_coarse   Coarse grid `CeedElemRestriction`
2779   @param[in]  basis_coarse  Coarse grid active vector `CeedBasis`
2780   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or `NULL` if not creating prolongation/restriction `CeedOperator`
2781   @param[out] op_coarse     Coarse grid `CeedOperator`
2782   @param[out] op_prolong    Coarse to fine `CeedOperator`, or `NULL`
2783   @param[out] op_restrict   Fine to coarse `CeedOperator`, or `NULL`
2784 
2785   @return An error code: 0 - success, otherwise - failure
2786 
2787   @ref User
2788 **/
2789 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2790                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2791                                        CeedOperator *op_restrict) {
2792   Ceed      ceed;
2793   CeedInt   Q_f, Q_c;
2794   CeedBasis basis_fine, basis_c_to_f = NULL;
2795 
2796   CeedCall(CeedOperatorCheckReady(op_fine));
2797   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2798 
2799   // Check for compatible quadrature spaces
2800   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2801   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2802   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2803   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2804 
2805   // Coarse to fine basis
2806   if (op_prolong || op_restrict) {
2807     CeedInt          dim, num_comp, num_nodes_c, num_nodes_f;
2808     CeedScalar      *q_ref, *q_weight, *grad;
2809     CeedElemTopology topo;
2810 
2811     // Check if interpolation matrix is provided
2812     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2813               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2814     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2815     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2816     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2817     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2818     CeedCall(CeedBasisDestroy(&basis_fine));
2819     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2820     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2821     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2822     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2823     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2824     CeedCall(CeedFree(&q_ref));
2825     CeedCall(CeedFree(&q_weight));
2826     CeedCall(CeedFree(&grad));
2827   }
2828 
2829   // Core code
2830   CeedCall(
2831       CeedOperatorMultigridLevelCreateSingle_Core(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2832   CeedCall(CeedDestroy(&ceed));
2833   return CEED_ERROR_SUCCESS;
2834 }
2835 
2836 /**
2837   @brief Build a FDM based approximate inverse for each element for a `CeedOperator`.
2838 
2839   This returns a `CeedOperator` and `CeedVector` to apply a Fast Diagonalization Method based approximate inverse.
2840   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2841   The assembled `CeedQFunction` is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T \hat S V\f$.
2842   The `CeedOperator` must be linear and non-composite.
2843   The associated `CeedQFunction` must therefore also be linear.
2844 
2845   Note: Calling this function asserts that setup is complete and sets the `CeedOperator` as immutable.
2846 
2847   @param[in]  op      `CeedOperator` to create element inverses
2848   @param[out] fdm_inv `CeedOperator` to apply the action of a FDM based inverse for each element
2849   @param[in]  request Address of @ref CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2850 
2851   @return An error code: 0 - success, otherwise - failure
2852 
2853   @ref User
2854 **/
2855 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2856   Ceed                 ceed, ceed_parent;
2857   bool                 interp = false, grad = false, is_tensor_basis = true;
2858   CeedInt              num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2859   CeedScalar          *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg;
2860   const CeedScalar    *interp_1d, *grad_1d, *q_weight_1d;
2861   CeedVector           q_data;
2862   CeedElemRestriction  rstr  = NULL, rstr_qd_i;
2863   CeedBasis            basis = NULL, fdm_basis;
2864   CeedQFunctionContext ctx_fdm;
2865   CeedQFunctionField  *qf_fields;
2866   CeedQFunction        qf, qf_fdm;
2867   CeedOperatorField   *op_fields;
2868 
2869   CeedCall(CeedOperatorCheckReady(op));
2870 
2871   if (op->CreateFDMElementInverse) {
2872     // Backend version
2873     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2874     return CEED_ERROR_SUCCESS;
2875   } else {
2876     // Operator fallback
2877     CeedOperator op_fallback;
2878 
2879     CeedDebug(CeedOperatorReturnCeed(op), "\nFalling back for CeedOperatorCreateFDMElementInverse\n");
2880     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2881     if (op_fallback) {
2882       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2883       return CEED_ERROR_SUCCESS;
2884     }
2885   }
2886 
2887   // Default interface implementation
2888   CeedCall(CeedOperatorGetCeed(op, &ceed));
2889   CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent));
2890   CeedCall(CeedOperatorGetQFunction(op, &qf));
2891 
2892   // Determine active input basis
2893   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2894   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2895   for (CeedInt i = 0; i < num_input_fields; i++) {
2896     CeedVector vec;
2897 
2898     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2899     if (vec == CEED_VECTOR_ACTIVE) {
2900       CeedEvalMode eval_mode;
2901 
2902       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2903       interp = interp || eval_mode == CEED_EVAL_INTERP;
2904       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2905       if (!basis) CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2906       if (!rstr) CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2907     }
2908     CeedCall(CeedVectorDestroy(&vec));
2909   }
2910   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2911   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2912   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2913   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2914   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2915   CeedCall(CeedBasisGetDimension(basis, &dim));
2916   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2917   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2918 
2919   // Build and diagonalize 1D Mass and Laplacian
2920   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2921   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2922   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2923   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2924   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2925   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2926   CeedCall(CeedCalloc(P_1d, &lambda));
2927   // -- Build matrices
2928   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2929   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2930   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2931   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2932 
2933   // -- Diagonalize
2934   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2935   CeedCall(CeedFree(&mass));
2936   CeedCall(CeedFree(&laplace));
2937   for (CeedInt i = 0; i < P_1d; i++) {
2938     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2939   }
2940   CeedCall(CeedFree(&x));
2941 
2942   {
2943     CeedInt             layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2944     CeedScalar          max_norm = 0;
2945     const CeedScalar   *assembled_array, *q_weight_array;
2946     CeedVector          assembled = NULL, q_weight;
2947     CeedElemRestriction rstr_qf   = NULL;
2948 
2949     // Assemble QFunction
2950     CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2951     CeedCall(CeedElemRestrictionGetELayout(rstr_qf, layout));
2952     CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2953     CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2954 
2955     // Calculate element averages
2956     CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2957     CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2958     CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2959     CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2960     CeedCall(CeedCalloc(num_elem, &elem_avg));
2961     const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2962 
2963     for (CeedInt e = 0; e < num_elem; e++) {
2964       CeedInt count = 0;
2965 
2966       for (CeedInt q = 0; q < num_qpts; q++) {
2967         for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2968           if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2969             elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2970             count++;
2971           }
2972         }
2973       }
2974       if (count) {
2975         elem_avg[e] /= count;
2976       } else {
2977         elem_avg[e] = 1.0;
2978       }
2979     }
2980     CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2981     CeedCall(CeedVectorDestroy(&assembled));
2982     CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2983     CeedCall(CeedVectorDestroy(&q_weight));
2984   }
2985 
2986   // Build FDM diagonal
2987   {
2988     CeedScalar *q_data_array, *fdm_diagonal;
2989 
2990     CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2991     const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2992     for (CeedInt c = 0; c < num_comp; c++) {
2993       for (CeedInt n = 0; n < num_nodes; n++) {
2994         if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2995         if (grad) {
2996           for (CeedInt d = 0; d < dim; d++) {
2997             CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2998             fdm_diagonal[c * num_nodes + n] += lambda[i];
2999           }
3000         }
3001         if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
3002       }
3003     }
3004     CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
3005     CeedCall(CeedVectorSetValue(q_data, 0.0));
3006     CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
3007     for (CeedInt e = 0; e < num_elem; e++) {
3008       for (CeedInt c = 0; c < num_comp; c++) {
3009         for (CeedInt n = 0; n < num_nodes; n++) {
3010           q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
3011         }
3012       }
3013     }
3014     CeedCall(CeedFree(&elem_avg));
3015     CeedCall(CeedFree(&fdm_diagonal));
3016     CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
3017   }
3018 
3019   // Setup FDM operator
3020   // -- Basis
3021   {
3022     CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
3023 
3024     CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
3025     CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
3026     CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
3027     CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
3028     CeedCall(CeedFree(&fdm_interp));
3029     CeedCall(CeedFree(&grad_dummy));
3030     CeedCall(CeedFree(&q_ref_dummy));
3031     CeedCall(CeedFree(&q_weight_dummy));
3032     CeedCall(CeedFree(&lambda));
3033   }
3034 
3035   // -- Restriction
3036   {
3037     CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp};
3038     CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp,
3039                                               (CeedSize)num_elem * (CeedSize)num_comp * (CeedSize)num_nodes, strides, &rstr_qd_i));
3040   }
3041 
3042   // -- QFunction
3043   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
3044   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
3045   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
3046   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
3047   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
3048 
3049   // -- QFunction context
3050   {
3051     CeedInt *num_comp_data;
3052 
3053     CeedCall(CeedCalloc(1, &num_comp_data));
3054     num_comp_data[0] = num_comp;
3055     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
3056     CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
3057   }
3058   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
3059   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
3060 
3061   // -- Operator
3062   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
3063   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
3064   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data));
3065   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
3066 
3067   // Cleanup
3068   CeedCall(CeedDestroy(&ceed));
3069   CeedCall(CeedDestroy(&ceed_parent));
3070   CeedCall(CeedVectorDestroy(&q_data));
3071   CeedCall(CeedElemRestrictionDestroy(&rstr));
3072   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
3073   CeedCall(CeedBasisDestroy(&basis));
3074   CeedCall(CeedBasisDestroy(&fdm_basis));
3075   CeedCall(CeedQFunctionDestroy(&qf));
3076   CeedCall(CeedQFunctionDestroy(&qf_fdm));
3077   return CEED_ERROR_SUCCESS;
3078 }
3079 
3080 /// @}
3081