xref: /libCEED/interface/ceed-preconditioning.c (revision 8130dc29e61c1acad0c1a334a475471f01fa17ef)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   char *source_path_with_name = NULL;
39 
40   // Check if NULL qf passed in
41   if (!qf) return CEED_ERROR_SUCCESS;
42 
43   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
44   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
45 
46   if (qf->source_path) {
47     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
48     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
49     memcpy(source_path_with_name, qf->source_path, path_len);
50     memcpy(&source_path_with_name[path_len], ":", 1);
51     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
52   } else {
53     CeedCall(CeedCalloc(1, &source_path_with_name));
54   }
55 
56   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
57   {
58     CeedQFunctionContext ctx;
59 
60     CeedCall(CeedQFunctionGetContext(qf, &ctx));
61     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
62   }
63   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
64     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
65   }
66   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
67     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
68   }
69   CeedCall(CeedFree(&source_path_with_name));
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   Ceed         ceed_fallback;
84   bool         is_composite;
85   CeedOperator op_fallback;
86 
87   // Check not already created
88   if (op->op_fallback) return CEED_ERROR_SUCCESS;
89 
90   // Fallback Ceed
91   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
92   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
93 
94   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
95   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
96 
97   // Clone Op
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114 
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
117     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
118     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
119     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
120       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
121                                     op->input_fields[i]->vec));
122     }
123     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
124       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
125                                     op->output_fields[i]->vec));
126     }
127     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
128     // Cleanup
129     CeedCall(CeedQFunctionDestroy(&qf_fallback));
130     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
131     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
132   }
133   CeedCall(CeedOperatorSetName(op_fallback, op->name));
134   CeedCall(CeedOperatorCheckReady(op_fallback));
135   // Note: No ref-counting here so we don't get caught in a reference loop.
136   //       The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback.
137   op->op_fallback                 = op_fallback;
138   op_fallback->op_fallback_parent = op;
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Select correct basis matrix pointer based on CeedEvalMode
144 
145   @param[in]  basis     CeedBasis from which to get the basis matrix
146   @param[in]  eval_mode Current basis evaluation mode
147   @param[in]  identity  Pointer to identity matrix
148   @param[out] basis_ptr Basis pointer to set
149 
150   @ref Developer
151 **/
152 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
153   switch (eval_mode) {
154     case CEED_EVAL_NONE:
155       *basis_ptr = identity;
156       break;
157     case CEED_EVAL_INTERP:
158       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
159       break;
160     case CEED_EVAL_GRAD:
161       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
162       break;
163     case CEED_EVAL_DIV:
164       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
165       break;
166     case CEED_EVAL_CURL:
167       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
168       break;
169     case CEED_EVAL_WEIGHT:
170       break;  // Caught by QF Assembly
171   }
172   assert(*basis_ptr != NULL);
173   return CEED_ERROR_SUCCESS;
174 }
175 
176 /**
177   @brief Core logic for assembling operator diagonal or point block diagonal
178 
179   @param[in]  op             CeedOperator to assemble point block diagonal
180   @param[in]  request        Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
181   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
182   @param[out] assembled      CeedVector to store assembled diagonal
183 
184   @return An error code: 0 - success, otherwise - failure
185 
186   @ref Developer
187 **/
188 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_point_block, CeedVector assembled) {
189   Ceed ceed;
190   bool is_composite;
191 
192   CeedCall(CeedOperatorGetCeed(op, &ceed));
193   CeedCall(CeedOperatorIsComposite(op, &is_composite));
194   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
195 
196   // Assemble QFunction
197   CeedInt             layout_qf[3];
198   const CeedScalar   *assembled_qf_array;
199   CeedVector          assembled_qf        = NULL;
200   CeedElemRestriction assembled_elem_rstr = NULL;
201 
202   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
203   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf));
204   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
205   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
206 
207   // Get assembly data
208   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
209   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
210   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
211   CeedBasis               *active_bases_in, *active_bases_out;
212   CeedElemRestriction     *active_elem_rstrs_in, *active_elem_rstrs_out;
213   CeedOperatorAssemblyData data;
214 
215   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
216   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
217                                                 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out,
218                                                 &num_output_components));
219   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL));
220   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out));
221 
222   // Loop over all active bases (find matching input/output pairs)
223   for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) {
224     CeedInt             b_in, b_out, num_elem, num_nodes, num_qpts, num_comp;
225     bool                has_eval_none = false;
226     CeedScalar         *elem_diag_array, *identity = NULL;
227     CeedVector          elem_diag;
228     CeedElemRestriction diag_elem_rstr;
229 
230     if (num_active_bases_in <= num_active_bases_out) {
231       b_in = b;
232       for (b_out = 0; b_out < num_active_bases_out; b_out++) {
233         if (active_bases_in[b_in] == active_bases_out[b_out]) {
234           break;
235         }
236       }
237       if (b_out == num_active_bases_out) {
238         continue;
239       }  // No matching output basis found
240     } else {
241       b_out = b;
242       for (b_in = 0; b_in < num_active_bases_in; b_in++) {
243         if (active_bases_in[b_in] == active_bases_out[b_out]) {
244           break;
245         }
246       }
247       if (b_in == num_active_bases_in) {
248         continue;
249       }  // No matching output basis found
250     }
251     CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], ceed, CEED_ERROR_UNSUPPORTED,
252               "Cannot assemble operator diagonal with different input and output active element restrictions");
253 
254     // Assemble point block diagonal restriction, if needed
255     if (is_point_block) {
256       CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr));
257     } else {
258       CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr));
259     }
260 
261     // Create diagonal vector
262     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
263 
264     // Assemble element operator diagonals
265     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
266     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
267     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
268     CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes));
269     CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp));
270     if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes;
271     else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts));
272 
273     // Construct identity matrix for basis if required
274     for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) {
275       has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE);
276     }
277     for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) {
278       has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE);
279     }
280     if (has_eval_none) {
281       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
282       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
283     }
284 
285     // Compute the diagonal of B^T D B
286     // Each element
287     for (CeedSize e = 0; e < num_elem; e++) {
288       // Each basis eval mode pair
289       CeedInt      d_out              = 0, q_comp_out;
290       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
291 
292       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) {
293         CeedInt           d_in              = 0, q_comp_in;
294         const CeedScalar *B_t               = NULL;
295         CeedEvalMode      eval_mode_in_prev = CEED_EVAL_NONE;
296 
297         CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t));
298         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out));
299         if (q_comp_out > 1) {
300           if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0;
301           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
302         }
303         eval_mode_out_prev = eval_modes_out[b_out][e_out];
304 
305         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) {
306           const CeedScalar *B = NULL;
307 
308           CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B));
309           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in));
310           if (q_comp_in > 1) {
311             if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0;
312             else B = &B[(++d_in) * num_qpts * num_nodes];
313           }
314           eval_mode_in_prev = eval_modes_in[b_in][e_in];
315 
316           // Each component
317           for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
318             // Each qpt/node pair
319             for (CeedInt q = 0; q < num_qpts; q++) {
320               if (is_point_block) {
321                 // Point Block Diagonal
322                 for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
323                   const CeedSize c_offset =
324                       (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
325                   const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
326 
327                   for (CeedInt n = 0; n < num_nodes; n++) {
328                     elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
329                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
330                   }
331                 }
332               } else {
333                 // Diagonal Only
334                 const CeedInt c_offset =
335                     (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
336                 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
337 
338                 for (CeedInt n = 0; n < num_nodes; n++) {
339                   elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
340                 }
341               }
342             }
343           }
344         }
345       }
346     }
347     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
348 
349     // Assemble local operator diagonal
350     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
351 
352     // Cleanup
353     CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
354     CeedCall(CeedVectorDestroy(&elem_diag));
355     CeedCall(CeedFree(&identity));
356   }
357   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
358   CeedCall(CeedVectorDestroy(&assembled_qf));
359   return CEED_ERROR_SUCCESS;
360 }
361 
362 /**
363   @brief Core logic for assembling composite operator diagonal
364 
365   @param[in]  op             CeedOperator to assemble point block diagonal
366   @param[in]  request        Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
367   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
368   @param[out] assembled      CeedVector to store assembled diagonal
369 
370   @return An error code: 0 - success, otherwise - failure
371 
372   @ref Developer
373 **/
374 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block,
375                                                                  CeedVector assembled) {
376   CeedInt       num_sub;
377   CeedOperator *suboperators;
378 
379   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
380   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
381   for (CeedInt i = 0; i < num_sub; i++) {
382     if (is_point_block) {
383       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
384     } else {
385       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
386     }
387   }
388   return CEED_ERROR_SUCCESS;
389 }
390 
391 /**
392   @brief Build nonzero pattern for non-composite operator
393 
394   Users should generally use CeedOperatorLinearAssembleSymbolic()
395 
396   @param[in]  op     CeedOperator to assemble nonzero pattern
397   @param[in]  offset Offset for number of entries
398   @param[out] rows   Row number for each entry
399   @param[out] cols   Column number for each entry
400 
401   @return An error code: 0 - success, otherwise - failure
402 
403   @ref Developer
404 **/
405 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
406   Ceed                ceed;
407   bool                is_composite;
408   CeedSize            num_nodes_in, num_nodes_out, count = 0;
409   CeedInt             num_elem_in, elem_size_in, num_comp_in, layout_er_in[3];
410   CeedInt             num_elem_out, elem_size_out, num_comp_out, layout_er_out[3], local_num_entries;
411   CeedScalar         *array;
412   const CeedScalar   *elem_dof_a_in, *elem_dof_a_out;
413   CeedVector          index_vec_in, index_vec_out, elem_dof_in, elem_dof_out;
414   CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out;
415 
416   CeedCall(CeedOperatorGetCeed(op, &ceed));
417   CeedCall(CeedOperatorIsComposite(op, &is_composite));
418   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
419 
420   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out));
421   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
422   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
423   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
424   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
425   CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, &layout_er_in));
426 
427   // Determine elem_dof relation for input
428   CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in));
429   CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array));
430   for (CeedInt i = 0; i < num_nodes_in; i++) array[i] = i;
431   CeedCall(CeedVectorRestoreArray(index_vec_in, &array));
432   CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in));
433   CeedCall(CeedVectorSetValue(elem_dof_in, 0.0));
434   CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in));
435   CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE));
436   CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in));
437   CeedCall(CeedVectorDestroy(&index_vec_in));
438   CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in));
439 
440   if (elem_rstr_in != elem_rstr_out) {
441     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
442     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
443               "Active input and output operator restrictions must have the same number of elements");
444     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
445     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
446     CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, &layout_er_out));
447 
448     // Determine elem_dof relation for output
449     CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out));
450     CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array));
451     for (CeedInt i = 0; i < num_nodes_out; i++) array[i] = i;
452     CeedCall(CeedVectorRestoreArray(index_vec_out, &array));
453     CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out));
454     CeedCall(CeedVectorSetValue(elem_dof_out, 0.0));
455     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out));
456     CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE));
457     CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out));
458     CeedCall(CeedVectorDestroy(&index_vec_out));
459     CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out));
460   } else {
461     num_elem_out     = num_elem_in;
462     elem_size_out    = elem_size_in;
463     num_comp_out     = num_comp_in;
464     layout_er_out[0] = layout_er_in[0];
465     layout_er_out[1] = layout_er_in[1];
466     layout_er_out[2] = layout_er_in[2];
467     elem_dof_a_out   = elem_dof_a_in;
468   }
469   local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
470 
471   // Determine i, j locations for element matrices
472   for (CeedInt e = 0; e < num_elem_in; e++) {
473     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
474       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
475         for (CeedInt i = 0; i < elem_size_out; i++) {
476           for (CeedInt j = 0; j < elem_size_in; j++) {
477             const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2];
478             const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2];
479             const CeedInt row                = elem_dof_a_out[elem_dof_index_row];
480             const CeedInt col                = elem_dof_a_in[elem_dof_index_col];
481 
482             rows[offset + count] = row;
483             cols[offset + count] = col;
484             count++;
485           }
486         }
487       }
488     }
489   }
490   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
491   CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in));
492   CeedCall(CeedVectorDestroy(&elem_dof_in));
493   if (elem_rstr_in != elem_rstr_out) {
494     CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out));
495     CeedCall(CeedVectorDestroy(&elem_dof_out));
496   }
497   return CEED_ERROR_SUCCESS;
498 }
499 
500 /**
501   @brief Assemble nonzero entries for non-composite operator
502 
503   Users should generally use CeedOperatorLinearAssemble()
504 
505   @param[in]  op     CeedOperator to assemble
506   @param[in]  offset Offset for number of entries
507   @param[out] values Values to assemble into matrix
508 
509   @return An error code: 0 - success, otherwise - failure
510 
511   @ref Developer
512 **/
513 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
514   Ceed ceed;
515   bool is_composite;
516 
517   CeedCall(CeedOperatorGetCeed(op, &ceed));
518   CeedCall(CeedOperatorIsComposite(op, &is_composite));
519   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
520 
521   // Early exit for empty operator
522   {
523     CeedInt num_elem = 0;
524 
525     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
526     if (num_elem == 0) return CEED_ERROR_SUCCESS;
527   }
528 
529   if (op->LinearAssembleSingle) {
530     // Backend version
531     CeedCall(op->LinearAssembleSingle(op, offset, values));
532     return CEED_ERROR_SUCCESS;
533   } else {
534     // Operator fallback
535     CeedOperator op_fallback;
536 
537     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
538     if (op_fallback) {
539       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
540       return CEED_ERROR_SUCCESS;
541     }
542   }
543 
544   // Assemble QFunction
545   CeedInt             layout_qf[3];
546   const CeedScalar   *assembled_qf_array;
547   CeedVector          assembled_qf        = NULL;
548   CeedElemRestriction assembled_elem_rstr = NULL;
549 
550   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE));
551   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf));
552   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
553   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
554 
555   // Get assembly data
556   CeedInt                  num_elem_in, elem_size_in, num_comp_in, num_qpts_in;
557   CeedInt                  num_elem_out, elem_size_out, num_comp_out, num_qpts_out, local_num_entries;
558   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
559   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
560   CeedBasis               *active_bases_in, *active_bases_out, basis_in, basis_out;
561   const CeedScalar       **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out;
562   CeedElemRestriction      elem_rstr_in, elem_rstr_out;
563   CeedRestrictionType      elem_rstr_type_in, elem_rstr_type_out;
564   const bool              *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL;
565   const CeedInt8          *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL;
566   CeedOperatorAssemblyData data;
567 
568   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
569   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out,
570                                                 &num_eval_modes_out, &eval_modes_out, NULL, NULL));
571 
572   CeedCheck(num_active_bases_in == num_active_bases_out && num_active_bases_in == 1, ceed, CEED_ERROR_UNSUPPORTED,
573             "Cannot assemble operator with multiple active bases");
574   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs");
575 
576   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out));
577   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
578   basis_in  = active_bases_in[0];
579   basis_out = active_bases_out[0];
580   B_mat_in  = B_mats_in[0];
581   B_mat_out = B_mats_out[0];
582 
583   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
584   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
585   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
586   if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in;
587   else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in));
588 
589   CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in));
590   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
591     CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in));
592   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
593     CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in));
594   }
595 
596   if (elem_rstr_in != elem_rstr_out) {
597     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
598     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
599               "Active input and output operator restrictions must have the same number of elements");
600     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
601     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
602     if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out;
603     else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out));
604     CeedCheck(num_qpts_in == num_qpts_out, ceed, CEED_ERROR_UNSUPPORTED,
605               "Active input and output bases must have the same number of quadrature points");
606 
607     CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out));
608     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
609       CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out));
610     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
611       CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out));
612     }
613   } else {
614     num_elem_out  = num_elem_in;
615     elem_size_out = elem_size_in;
616     num_comp_out  = num_comp_in;
617     num_qpts_out  = num_qpts_in;
618 
619     elem_rstr_orients_out      = elem_rstr_orients_in;
620     elem_rstr_curl_orients_out = elem_rstr_curl_orients_in;
621   }
622   local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
623 
624   // Loop over elements and put in data structure
625   // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
626   CeedSize           count = 0;
627   CeedTensorContract contract;
628   CeedScalar        *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL;
629 
630   CeedCall(CeedBasisGetTensorContract(basis_in, &contract));
631   CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat));
632   CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat));
633   if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b));
634 
635   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
636   for (CeedSize e = 0; e < num_elem_in; e++) {
637     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
638       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
639         // Compute B^T*D
640         for (CeedSize n = 0; n < elem_size_out; n++) {
641           for (CeedSize q = 0; q < num_qpts_in; q++) {
642             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
643               const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in;
644               CeedScalar     sum       = 0.0;
645 
646               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
647                 const CeedSize b_out_index     = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n;
648                 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out;
649                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
650 
651                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
652               }
653               BTD_mat[btd_index] = sum;
654             }
655           }
656         }
657 
658         // Form element matrix itself (for each block component)
659         CeedCall(CeedTensorContractApply(contract, 1, num_qpts_in * num_eval_modes_in[0], elem_size_in, elem_size_out, BTD_mat, CEED_NOTRANSPOSE,
660                                          false, B_mat_in, elem_mat));
661 
662         // Transform the element matrix if required
663         if (elem_rstr_orients_out) {
664           const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out];
665 
666           for (CeedInt i = 0; i < elem_size_out; i++) {
667             const double orient = elem_orients[i] ? -1.0 : 1.0;
668 
669             for (CeedInt j = 0; j < elem_size_in; j++) {
670               elem_mat[i * elem_size_in + j] *= orient;
671             }
672           }
673         } else if (elem_rstr_curl_orients_out) {
674           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out];
675 
676           // T^T*(B^T*D*B)
677           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
678           for (CeedInt i = 0; i < elem_size_out; i++) {
679             for (CeedInt j = 0; j < elem_size_in; j++) {
680               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] +
681                                                (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) +
682                                                (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0);
683             }
684           }
685         }
686         if (elem_rstr_orients_in) {
687           const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in];
688 
689           for (CeedInt i = 0; i < elem_size_out; i++) {
690             for (CeedInt j = 0; j < elem_size_in; j++) {
691               elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0;
692             }
693           }
694         } else if (elem_rstr_curl_orients_in) {
695           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in];
696 
697           // (B^T*D*B)*T
698           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
699           for (CeedInt i = 0; i < elem_size_out; i++) {
700             for (CeedInt j = 0; j < elem_size_in; j++) {
701               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] +
702                                                (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) +
703                                                (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0);
704             }
705           }
706         }
707 
708         // Put element matrix in coordinate data structure
709         for (CeedInt i = 0; i < elem_size_out; i++) {
710           for (CeedInt j = 0; j < elem_size_in; j++) {
711             vals[offset + count] = elem_mat[i * elem_size_in + j];
712             count++;
713           }
714         }
715       }
716     }
717   }
718   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries");
719   CeedCall(CeedVectorRestoreArray(values, &vals));
720 
721   // Cleanup
722   CeedCall(CeedFree(&BTD_mat));
723   CeedCall(CeedFree(&elem_mat));
724   CeedCall(CeedFree(&elem_mat_b));
725   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
726     CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in));
727   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
728     CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in));
729   }
730   if (elem_rstr_in != elem_rstr_out) {
731     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
732       CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out));
733     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
734       CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out));
735     }
736   }
737   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
738   CeedCall(CeedVectorDestroy(&assembled_qf));
739   return CEED_ERROR_SUCCESS;
740 }
741 
742 /**
743   @brief Count number of entries for assembled CeedOperator
744 
745   @param[in]  op          CeedOperator to assemble
746   @param[out] num_entries Number of entries in assembled representation
747 
748   @return An error code: 0 - success, otherwise - failure
749 
750   @ref Utility
751 **/
752 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) {
753   bool                is_composite;
754   CeedInt             num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out;
755   CeedElemRestriction rstr_in, rstr_out;
756 
757   CeedCall(CeedOperatorIsComposite(op, &is_composite));
758   CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
759 
760   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out));
761   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in));
762   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in));
763   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
764   if (rstr_in != rstr_out) {
765     CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out));
766     CeedCheck(num_elem_in == num_elem_out, op->ceed, CEED_ERROR_UNSUPPORTED,
767               "Active input and output operator restrictions must have the same number of elements");
768     CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out));
769     CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
770   } else {
771     num_elem_out  = num_elem_in;
772     elem_size_out = elem_size_in;
773     num_comp_out  = num_comp_in;
774   }
775   *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in;
776   return CEED_ERROR_SUCCESS;
777 }
778 
779 /**
780   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
781 
782   @param[in]  op_fine      Fine grid operator
783   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
784   @param[in]  rstr_coarse  Coarse grid restriction
785   @param[in]  basis_coarse Coarse grid active vector basis
786   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
787   @param[out] op_coarse    Coarse grid operator
788   @param[out] op_prolong   Coarse to fine operator, or NULL
789   @param[out] op_restrict  Fine to coarse operator, or NULL
790 
791   @return An error code: 0 - success, otherwise - failure
792 
793   @ref Developer
794 **/
795 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
796                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
797   bool                is_composite;
798   Ceed                ceed;
799   CeedInt             num_comp;
800   CeedVector          mult_vec         = NULL;
801   CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL;
802 
803   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
804 
805   // Check for composite operator
806   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
807   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
808 
809   // Coarse Grid
810   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
811   // -- Clone input fields
812   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
813     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
814       rstr_fine = op_fine->input_fields[i]->elem_rstr;
815       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
816     } else {
817       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
818                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
819     }
820   }
821   // -- Clone output fields
822   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
823     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
824       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
825     } else {
826       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
827                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
828     }
829   }
830   // -- Clone QFunctionAssemblyData
831   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
832 
833   // Multiplicity vector
834   if (op_restrict || op_prolong) {
835     CeedVector          mult_e_vec;
836     CeedRestrictionType rstr_type;
837 
838     CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type));
839     CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED,
840               "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported");
841     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
842     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
843     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
844     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
845     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
846     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
847     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
848     CeedCall(CeedVectorDestroy(&mult_e_vec));
849     CeedCall(CeedVectorReciprocal(mult_vec));
850   }
851 
852   // Clone name
853   bool   has_name = op_fine->name;
854   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
855   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
856 
857   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
858   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
859             "Prolongation or restriction operator creation requires coarse-to-fine basis");
860 
861   // Restriction/Prolongation Operators
862   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
863 
864   // Restriction
865   if (op_restrict) {
866     CeedInt             *num_comp_r_data;
867     CeedQFunctionContext ctx_r;
868     CeedQFunction        qf_restrict;
869 
870     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
871     CeedCall(CeedCalloc(1, &num_comp_r_data));
872     num_comp_r_data[0] = num_comp;
873     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
874     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
875     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
876     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
877     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
878     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
879     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
880     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
881 
882     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
883     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
884     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
885     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
886 
887     // Set name
888     char *restriction_name;
889 
890     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
891     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
892     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
893     CeedCall(CeedFree(&restriction_name));
894 
895     // Check
896     CeedCall(CeedOperatorCheckReady(*op_restrict));
897 
898     // Cleanup
899     CeedCall(CeedQFunctionDestroy(&qf_restrict));
900   }
901 
902   // Prolongation
903   if (op_prolong) {
904     CeedInt             *num_comp_p_data;
905     CeedQFunctionContext ctx_p;
906     CeedQFunction        qf_prolong;
907 
908     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
909     CeedCall(CeedCalloc(1, &num_comp_p_data));
910     num_comp_p_data[0] = num_comp;
911     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
912     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
913     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
914     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
915     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
916     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
917     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
918     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
919 
920     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
921     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
922     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
923     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
924 
925     // Set name
926     char *prolongation_name;
927 
928     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
929     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
930     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
931     CeedCall(CeedFree(&prolongation_name));
932 
933     // Check
934     CeedCall(CeedOperatorCheckReady(*op_prolong));
935 
936     // Cleanup
937     CeedCall(CeedQFunctionDestroy(&qf_prolong));
938   }
939 
940   // Check
941   CeedCall(CeedOperatorCheckReady(*op_coarse));
942 
943   // Cleanup
944   CeedCall(CeedVectorDestroy(&mult_vec));
945   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
946   CeedCall(CeedBasisDestroy(&basis_c_to_f));
947   return CEED_ERROR_SUCCESS;
948 }
949 
950 /**
951   @brief Build 1D mass matrix and Laplacian with perturbation
952 
953   @param[in]  interp_1d   Interpolation matrix in one dimension
954   @param[in]  grad_1d     Gradient matrix in one dimension
955   @param[in]  q_weight_1d Quadrature weights in one dimension
956   @param[in]  P_1d        Number of basis nodes in one dimension
957   @param[in]  Q_1d        Number of quadrature points in one dimension
958   @param[in]  dim         Dimension of basis
959   @param[out] mass        Assembled mass matrix in one dimension
960   @param[out] laplace     Assembled perturbed Laplacian in one dimension
961 
962   @return An error code: 0 - success, otherwise - failure
963 
964   @ref Developer
965 **/
966 CeedPragmaOptimizeOff
967 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
968                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
969   for (CeedInt i = 0; i < P_1d; i++) {
970     for (CeedInt j = 0; j < P_1d; j++) {
971       CeedScalar sum = 0.0;
972       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
973       mass[i + j * P_1d] = sum;
974     }
975   }
976   // -- Laplacian
977   for (CeedInt i = 0; i < P_1d; i++) {
978     for (CeedInt j = 0; j < P_1d; j++) {
979       CeedScalar sum = 0.0;
980 
981       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
982       laplace[i + j * P_1d] = sum;
983     }
984   }
985   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
986   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
987   return CEED_ERROR_SUCCESS;
988 }
989 CeedPragmaOptimizeOn
990 
991 /// @}
992 
993 /// ----------------------------------------------------------------------------
994 /// CeedOperator Backend API
995 /// ----------------------------------------------------------------------------
996 /// @addtogroup CeedOperatorBackend
997 /// @{
998 
999 /**
1000   @brief Create point block restriction for active operator field
1001 
1002   @param[in]  rstr             Original CeedElemRestriction for active field
1003   @param[out] point_block_rstr Address of the variable where the newly created CeedElemRestriction will be stored
1004 
1005   @return An error code: 0 - success, otherwise - failure
1006 
1007   @ref Backend
1008 **/
1009 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) {
1010   Ceed           ceed;
1011   CeedInt        num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets;
1012   CeedSize       l_size;
1013   const CeedInt *offsets;
1014 
1015   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
1016   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
1017 
1018   // Expand offsets
1019   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
1020   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
1021   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
1022   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
1023   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
1024   shift = num_comp;
1025   if (comp_stride != 1) shift *= num_comp;
1026   CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets));
1027   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
1028     point_block_offsets[i] = offsets[i] * shift;
1029   }
1030 
1031   // Create new restriction
1032   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
1033                                      point_block_offsets, point_block_rstr));
1034 
1035   // Cleanup
1036   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
1037   return CEED_ERROR_SUCCESS;
1038 }
1039 
1040 /**
1041   @brief Create object holding CeedQFunction assembly data for CeedOperator
1042 
1043   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
1044   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
1045 
1046   @return An error code: 0 - success, otherwise - failure
1047 
1048   @ref Backend
1049 **/
1050 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
1051   CeedCall(CeedCalloc(1, data));
1052   (*data)->ref_count = 1;
1053   (*data)->ceed      = ceed;
1054   CeedCall(CeedReference(ceed));
1055   return CEED_ERROR_SUCCESS;
1056 }
1057 
1058 /**
1059   @brief Increment the reference counter for a CeedQFunctionAssemblyData
1060 
1061   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
1062 
1063   @return An error code: 0 - success, otherwise - failure
1064 
1065   @ref Backend
1066 **/
1067 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1068   data->ref_count++;
1069   return CEED_ERROR_SUCCESS;
1070 }
1071 
1072 /**
1073   @brief Set re-use of CeedQFunctionAssemblyData
1074 
1075   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
1076   @param[in]     reuse_data Boolean flag indicating data re-use
1077 
1078   @return An error code: 0 - success, otherwise - failure
1079 
1080   @ref Backend
1081 **/
1082 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
1083   data->reuse_data        = reuse_data;
1084   data->needs_data_update = true;
1085   return CEED_ERROR_SUCCESS;
1086 }
1087 
1088 /**
1089   @brief Mark QFunctionAssemblyData as stale
1090 
1091   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
1092   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
1093 
1094   @return An error code: 0 - success, otherwise - failure
1095 
1096   @ref Backend
1097 **/
1098 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
1099   data->needs_data_update = needs_data_update;
1100   return CEED_ERROR_SUCCESS;
1101 }
1102 
1103 /**
1104   @brief Determine if QFunctionAssemblyData needs update
1105 
1106   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
1107   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1108 
1109   @return An error code: 0 - success, otherwise - failure
1110 
1111   @ref Backend
1112 **/
1113 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1114   *is_update_needed = !data->reuse_data || data->needs_data_update;
1115   return CEED_ERROR_SUCCESS;
1116 }
1117 
1118 /**
1119   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1120 
1121   Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1122 
1123   Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
1124         CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
1125         CeedQFunctionAssemblyData.
1126 
1127   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1128   @param[in,out] data_copy Variable to store copied reference
1129 
1130   @return An error code: 0 - success, otherwise - failure
1131 
1132   @ref Backend
1133 **/
1134 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1135   CeedCall(CeedQFunctionAssemblyDataReference(data));
1136   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1137   *data_copy = data;
1138   return CEED_ERROR_SUCCESS;
1139 }
1140 
1141 /**
1142   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1143 
1144   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1145   @param[out] is_setup Boolean flag for setup status
1146 
1147   @return An error code: 0 - success, otherwise - failure
1148 
1149   @ref Backend
1150 **/
1151 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1152   *is_setup = data->is_setup;
1153   return CEED_ERROR_SUCCESS;
1154 }
1155 
1156 /**
1157   @brief Set internal objects for CeedQFunctionAssemblyData
1158 
1159   @param[in,out] data CeedQFunctionAssemblyData to set objects
1160   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1161   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1162 
1163   @return An error code: 0 - success, otherwise - failure
1164 
1165   @ref Backend
1166 **/
1167 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1168   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1169   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1170 
1171   data->is_setup = true;
1172   return CEED_ERROR_SUCCESS;
1173 }
1174 
1175 /**
1176   @brief Get internal objects for CeedQFunctionAssemblyData
1177 
1178   @param[in,out] data CeedQFunctionAssemblyData to set objects
1179   @param[out]    vec  CeedVector to store assembled CeedQFunction at quadrature points
1180   @param[out]    rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1181 
1182   @return An error code: 0 - success, otherwise - failure
1183 
1184   @ref Backend
1185 **/
1186 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1187   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1188 
1189   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1190   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1191   return CEED_ERROR_SUCCESS;
1192 }
1193 
1194 /**
1195   @brief Destroy CeedQFunctionAssemblyData
1196 
1197   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1198 
1199   @return An error code: 0 - success, otherwise - failure
1200 
1201   @ref Backend
1202 **/
1203 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1204   if (!*data || --(*data)->ref_count > 0) {
1205     *data = NULL;
1206     return CEED_ERROR_SUCCESS;
1207   }
1208   CeedCall(CeedDestroy(&(*data)->ceed));
1209   CeedCall(CeedVectorDestroy(&(*data)->vec));
1210   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1211 
1212   CeedCall(CeedFree(data));
1213   return CEED_ERROR_SUCCESS;
1214 }
1215 
1216 /**
1217   @brief Get CeedOperatorAssemblyData
1218 
1219   @param[in]  op   CeedOperator to assemble
1220   @param[out] data CeedQFunctionAssemblyData
1221 
1222   @return An error code: 0 - success, otherwise - failure
1223 
1224   @ref Backend
1225 **/
1226 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1227   if (!op->op_assembled) {
1228     CeedOperatorAssemblyData data;
1229 
1230     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1231     op->op_assembled = data;
1232   }
1233   *data = op->op_assembled;
1234   return CEED_ERROR_SUCCESS;
1235 }
1236 
1237 /**
1238   @brief Create object holding CeedOperator assembly data.
1239 
1240   The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1241   An array with references to the corresponding active CeedElemRestrictions is also stored.
1242   For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1243   The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1244 CeedEvalMode.
1245   The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1246   Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1247 
1248   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1249   @param[in]  op   CeedOperator to be assembled
1250   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1251 
1252   @return An error code: 0 - success, otherwise - failure
1253 
1254   @ref Backend
1255 **/
1256 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1257   CeedInt             num_active_bases_in = 0, num_active_bases_out = 0, offset = 0;
1258   CeedInt             num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL;
1259   CeedSize          **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1260   CeedEvalMode      **eval_modes_in = NULL, **eval_modes_out = NULL;
1261   CeedQFunctionField *qf_fields;
1262   CeedQFunction       qf;
1263   CeedOperatorField  *op_fields;
1264   bool                is_composite;
1265 
1266   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1267   CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators.");
1268 
1269   // Allocate
1270   CeedCall(CeedCalloc(1, data));
1271   (*data)->ceed = ceed;
1272   CeedCall(CeedReference(ceed));
1273 
1274   // Build OperatorAssembly data
1275   CeedCall(CeedOperatorGetQFunction(op, &qf));
1276   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1277   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1278 
1279   // Determine active input basis
1280   for (CeedInt i = 0; i < num_input_fields; i++) {
1281     CeedVector vec;
1282 
1283     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1284     if (vec == CEED_VECTOR_ACTIVE) {
1285       CeedInt      index = -1, num_comp, q_comp;
1286       CeedEvalMode eval_mode;
1287       CeedBasis    basis_in = NULL;
1288 
1289       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1290       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1291       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1292       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1293       for (CeedInt i = 0; i < num_active_bases_in; i++) {
1294         if ((*data)->active_bases_in[i] == basis_in) index = i;
1295       }
1296       if (index == -1) {
1297         CeedElemRestriction elem_rstr_in;
1298 
1299         index = num_active_bases_in;
1300         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in));
1301         (*data)->active_bases_in[num_active_bases_in] = NULL;
1302         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in]));
1303         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in));
1304         (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL;
1305         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1306         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in]));
1307         CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in));
1308         num_eval_modes_in[index] = 0;
1309         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in));
1310         eval_modes_in[index] = NULL;
1311         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in));
1312         eval_mode_offsets_in[index] = NULL;
1313         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in));
1314         (*data)->assembled_bases_in[index] = NULL;
1315         num_active_bases_in++;
1316       }
1317       if (eval_mode != CEED_EVAL_WEIGHT) {
1318         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1319         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1320         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1321         for (CeedInt d = 0; d < q_comp; d++) {
1322           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1323           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1324           offset += num_comp;
1325         }
1326         num_eval_modes_in[index] += q_comp;
1327       }
1328     }
1329   }
1330 
1331   // Determine active output basis
1332   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1333   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1334   offset = 0;
1335   for (CeedInt i = 0; i < num_output_fields; i++) {
1336     CeedVector vec;
1337 
1338     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1339     if (vec == CEED_VECTOR_ACTIVE) {
1340       CeedInt      index = -1, num_comp, q_comp;
1341       CeedEvalMode eval_mode;
1342       CeedBasis    basis_out = NULL;
1343 
1344       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1345       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1346       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1347       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1348       for (CeedInt i = 0; i < num_active_bases_out; i++) {
1349         if ((*data)->active_bases_out[i] == basis_out) index = i;
1350       }
1351       if (index == -1) {
1352         CeedElemRestriction elem_rstr_out;
1353 
1354         index = num_active_bases_out;
1355         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out));
1356         (*data)->active_bases_out[num_active_bases_out] = NULL;
1357         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out]));
1358         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out));
1359         (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL;
1360         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1361         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out]));
1362         CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out));
1363         num_eval_modes_out[index] = 0;
1364         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out));
1365         eval_modes_out[index] = NULL;
1366         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out));
1367         eval_mode_offsets_out[index] = NULL;
1368         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out));
1369         (*data)->assembled_bases_out[index] = NULL;
1370         num_active_bases_out++;
1371       }
1372       if (eval_mode != CEED_EVAL_WEIGHT) {
1373         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1374         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1375         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1376         for (CeedInt d = 0; d < q_comp; d++) {
1377           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1378           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1379           offset += num_comp;
1380         }
1381         num_eval_modes_out[index] += q_comp;
1382       }
1383     }
1384   }
1385   (*data)->num_active_bases_in   = num_active_bases_in;
1386   (*data)->num_eval_modes_in     = num_eval_modes_in;
1387   (*data)->eval_modes_in         = eval_modes_in;
1388   (*data)->eval_mode_offsets_in  = eval_mode_offsets_in;
1389   (*data)->num_active_bases_out  = num_active_bases_out;
1390   (*data)->num_eval_modes_out    = num_eval_modes_out;
1391   (*data)->eval_modes_out        = eval_modes_out;
1392   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1393   (*data)->num_output_components = offset;
1394   return CEED_ERROR_SUCCESS;
1395 }
1396 
1397 /**
1398   @brief Get CeedOperator CeedEvalModes for assembly.
1399 
1400   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1401 
1402   @param[in]  data                  CeedOperatorAssemblyData
1403   @param[out] num_active_bases_in   Total number of active bases for input
1404   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1405                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1406   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1407   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1408   @param[out] num_active_bases_out  Total number of active bases for output
1409   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1410   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1411   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1412   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1413                                       including contributions of all active bases
1414 
1415   @return An error code: 0 - success, otherwise - failure
1416 
1417   @ref Backend
1418 **/
1419 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in,
1420                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out,
1421                                          CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out,
1422                                          CeedSize *num_output_components) {
1423   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1424   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1425   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1426   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1427   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1428   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1429   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1430   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1431   if (num_output_components) *num_output_components = data->num_output_components;
1432   return CEED_ERROR_SUCCESS;
1433 }
1434 
1435 /**
1436   @brief Get CeedOperator CeedBasis data for assembly.
1437 
1438   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1439 
1440   @param[in]  data                 CeedOperatorAssemblyData
1441   @param[out] num_active_bases_in  Number of active input bases, or NULL
1442   @param[out] active_bases_in      Pointer to hold active input CeedBasis, or NULL
1443   @param[out] assembled_bases_in   Pointer to hold assembled active input B, or NULL
1444   @param[out] num_active_bases_out Number of active output bases, or NULL
1445   @param[out] active_bases_out     Pointer to hold active output CeedBasis, or NULL
1446   @param[out] assembled_bases_out  Pointer to hold assembled active output B, or NULL
1447 
1448   @return An error code: 0 - success, otherwise - failure
1449 
1450   @ref Backend
1451 **/
1452 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in,
1453                                      const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out,
1454                                      const CeedScalar ***assembled_bases_out) {
1455   // Assemble B_in, B_out if needed
1456   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1457     CeedInt num_qpts;
1458 
1459     if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts));
1460     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts));
1461     for (CeedInt b = 0; b < data->num_active_bases_in; b++) {
1462       bool        has_eval_none = false;
1463       CeedInt     num_nodes;
1464       CeedScalar *B_in = NULL, *identity = NULL;
1465 
1466       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes));
1467       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1468 
1469       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1470         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1471       }
1472       if (has_eval_none) {
1473         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1474         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1475           identity[i * num_nodes + i] = 1.0;
1476         }
1477       }
1478 
1479       for (CeedInt q = 0; q < num_qpts; q++) {
1480         for (CeedInt n = 0; n < num_nodes; n++) {
1481           CeedInt      d_in              = 0, q_comp_in;
1482           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1483 
1484           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1485             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1486             const CeedScalar *B  = NULL;
1487 
1488             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B));
1489             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in));
1490             if (q_comp_in > 1) {
1491               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1492               else B = &B[(++d_in) * num_qpts * num_nodes];
1493             }
1494             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1495             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1496           }
1497         }
1498       }
1499       if (identity) CeedCall(CeedFree(&identity));
1500       data->assembled_bases_in[b] = B_in;
1501     }
1502   }
1503 
1504   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1505     CeedInt num_qpts;
1506 
1507     if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts));
1508     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts));
1509     for (CeedInt b = 0; b < data->num_active_bases_out; b++) {
1510       bool        has_eval_none = false;
1511       CeedInt     num_nodes;
1512       CeedScalar *B_out = NULL, *identity = NULL;
1513 
1514       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes));
1515       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1516 
1517       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1518         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1519       }
1520       if (has_eval_none) {
1521         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1522         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1523           identity[i * num_nodes + i] = 1.0;
1524         }
1525       }
1526 
1527       for (CeedInt q = 0; q < num_qpts; q++) {
1528         for (CeedInt n = 0; n < num_nodes; n++) {
1529           CeedInt      d_out              = 0, q_comp_out;
1530           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1531 
1532           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1533             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1534             const CeedScalar *B  = NULL;
1535 
1536             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B));
1537             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out));
1538             if (q_comp_out > 1) {
1539               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1540               else B = &B[(++d_out) * num_qpts * num_nodes];
1541             }
1542             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1543             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1544           }
1545         }
1546       }
1547       if (identity) CeedCall(CeedFree(&identity));
1548       data->assembled_bases_out[b] = B_out;
1549     }
1550   }
1551 
1552   // Pass out assembled data
1553   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1554   if (active_bases_in) *active_bases_in = data->active_bases_in;
1555   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1556   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1557   if (active_bases_out) *active_bases_out = data->active_bases_out;
1558   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1559   return CEED_ERROR_SUCCESS;
1560 }
1561 
1562 /**
1563   @brief Get CeedOperator CeedBasis data for assembly.
1564 
1565   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1566 
1567   @param[in]  data                      CeedOperatorAssemblyData
1568   @param[out] num_active_elem_rstrs_in  Number of active input element restrictions, or NULL
1569   @param[out] active_elem_rstrs_in      Pointer to hold active input CeedElemRestrictions, or NULL
1570   @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or NULL
1571   @param[out] active_elem_rstrs_out     Pointer to hold active output CeedElemRestrictions, or NULL
1572 
1573   @return An error code: 0 - success, otherwise - failure
1574 
1575   @ref Backend
1576 **/
1577 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in,
1578                                                 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out,
1579                                                 CeedElemRestriction **active_elem_rstrs_out) {
1580   if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in;
1581   if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in;
1582   if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out;
1583   if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out;
1584   return CEED_ERROR_SUCCESS;
1585 }
1586 
1587 /**
1588   @brief Destroy CeedOperatorAssemblyData
1589 
1590   @param[in,out] data CeedOperatorAssemblyData to destroy
1591 
1592   @return An error code: 0 - success, otherwise - failure
1593 
1594   @ref Backend
1595 **/
1596 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1597   if (!*data) {
1598     *data = NULL;
1599     return CEED_ERROR_SUCCESS;
1600   }
1601   CeedCall(CeedDestroy(&(*data)->ceed));
1602   for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) {
1603     CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b]));
1604     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b]));
1605     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1606     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1607     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1608   }
1609   for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) {
1610     CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b]));
1611     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b]));
1612     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1613     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1614     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1615   }
1616   CeedCall(CeedFree(&(*data)->active_bases_in));
1617   CeedCall(CeedFree(&(*data)->active_bases_out));
1618   CeedCall(CeedFree(&(*data)->active_elem_rstrs_in));
1619   CeedCall(CeedFree(&(*data)->active_elem_rstrs_out));
1620   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1621   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1622   CeedCall(CeedFree(&(*data)->eval_modes_in));
1623   CeedCall(CeedFree(&(*data)->eval_modes_out));
1624   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1625   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1626   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1627   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1628 
1629   CeedCall(CeedFree(data));
1630   return CEED_ERROR_SUCCESS;
1631 }
1632 
1633 /**
1634   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
1635 
1636   @param[in]  op          CeedOperator to retrieve fallback for
1637   @param[out] op_fallback Fallback CeedOperator
1638 
1639   @return An error code: 0 - success, otherwise - failure
1640 
1641   @ref Backend
1642 **/
1643 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
1644   // Create if needed
1645   if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op));
1646   if (op->op_fallback) {
1647     bool is_debug;
1648 
1649     CeedCall(CeedIsDebug(op->ceed, &is_debug));
1650     if (is_debug) {
1651       Ceed        ceed, ceed_fallback;
1652       const char *resource, *resource_fallback;
1653 
1654       CeedCall(CeedOperatorGetCeed(op, &ceed));
1655       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
1656       CeedCall(CeedGetResource(ceed, &resource));
1657       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
1658 
1659       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
1660       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
1661                 op->op_fallback);
1662     }
1663   }
1664   *op_fallback = op->op_fallback;
1665   return CEED_ERROR_SUCCESS;
1666 }
1667 
1668 /**
1669   @brief Get the parent CeedOperator for a fallback CeedOperator
1670 
1671   @param[in]  op     CeedOperator context
1672   @param[out] parent Variable to store parent CeedOperator context
1673 
1674   @return An error code: 0 - success, otherwise - failure
1675 
1676   @ref Backend
1677 **/
1678 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) {
1679   *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL;
1680   return CEED_ERROR_SUCCESS;
1681 }
1682 
1683 /**
1684   @brief Get the Ceed context of the parent CeedOperator for a fallback CeedOperator
1685 
1686   @param[in]  op     CeedOperator context
1687   @param[out] parent Variable to store parent Ceed context
1688 
1689   @return An error code: 0 - success, otherwise - failure
1690 
1691   @ref Backend
1692 **/
1693 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) {
1694   *parent = op->op_fallback_parent ? op->op_fallback_parent->ceed : op->ceed;
1695   return CEED_ERROR_SUCCESS;
1696 }
1697 
1698 /// @}
1699 
1700 /// ----------------------------------------------------------------------------
1701 /// CeedOperator Public API
1702 /// ----------------------------------------------------------------------------
1703 /// @addtogroup CeedOperatorUser
1704 /// @{
1705 
1706 /**
1707   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1708 
1709   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1710   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1711 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1712 
1713   Inputs and outputs are in the order provided by the user when adding CeedOperator fields.
1714   For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction
1715 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1716 
1717   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1718 
1719   @param[in]  op        CeedOperator to assemble CeedQFunction
1720   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1721   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1722   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1723 
1724   @return An error code: 0 - success, otherwise - failure
1725 
1726   @ref User
1727 **/
1728 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1729   CeedCall(CeedOperatorCheckReady(op));
1730 
1731   if (op->LinearAssembleQFunction) {
1732     // Backend version
1733     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1734   } else {
1735     // Operator fallback
1736     CeedOperator op_fallback;
1737 
1738     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1739     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1740     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1741   }
1742   return CEED_ERROR_SUCCESS;
1743 }
1744 
1745 /**
1746   @brief Assemble CeedQFunction and store result internally.
1747 
1748   Return copied references of stored data to the caller.
1749   Caller is responsible for ownership and destruction of the copied references.
1750   See also @ref CeedOperatorLinearAssembleQFunction
1751 
1752   Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers.
1753         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
1754 
1755   @param[in]  op        CeedOperator to assemble CeedQFunction
1756   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1757   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1758   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1759 
1760   @return An error code: 0 - success, otherwise - failure
1761 
1762   @ref User
1763 **/
1764 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1765   int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL;
1766   CeedOperator op_assemble                                                                           = NULL;
1767   CeedOperator op_fallback_parent                                                                    = NULL;
1768 
1769   CeedCall(CeedOperatorCheckReady(op));
1770 
1771   // Determine if fallback parent or operator has implementation
1772   CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent));
1773   if (op_fallback_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) {
1774     // -- Backend version for op fallback parent is faster, if it exists
1775     LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate;
1776     op_assemble                   = op_fallback_parent;
1777   } else if (op->LinearAssembleQFunctionUpdate) {
1778     // -- Backend version for op
1779     LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate;
1780     op_assemble                   = op;
1781   }
1782 
1783   // Assemble QFunction
1784   if (LinearAssembleQFunctionUpdate) {
1785     // Backend or fallback parent version
1786     bool                qf_assembled_is_setup;
1787     CeedVector          assembled_vec  = NULL;
1788     CeedElemRestriction assembled_rstr = NULL;
1789 
1790     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1791     if (qf_assembled_is_setup) {
1792       bool update_needed;
1793 
1794       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1795       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1796       if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request));
1797     } else {
1798       CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request));
1799       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1800     }
1801     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1802 
1803     // Copy reference from internally held copy
1804     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1805     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1806     CeedCall(CeedVectorDestroy(&assembled_vec));
1807     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1808   } else {
1809     // Operator fallback
1810     CeedOperator op_fallback;
1811 
1812     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1813     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1814     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1815   }
1816   return CEED_ERROR_SUCCESS;
1817 }
1818 
1819 /**
1820   @brief Assemble the diagonal of a square linear CeedOperator
1821 
1822   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1823 
1824   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1825 
1826   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1827 
1828   @param[in]  op        CeedOperator to assemble CeedQFunction
1829   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1830   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1831 
1832   @return An error code: 0 - success, otherwise - failure
1833 
1834   @ref User
1835 **/
1836 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1837   bool     is_composite;
1838   CeedSize input_size = 0, output_size = 0;
1839 
1840   CeedCall(CeedOperatorCheckReady(op));
1841   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1842 
1843   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1844   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1845 
1846   // Early exit for empty operator
1847   if (!is_composite) {
1848     CeedInt num_elem = 0;
1849 
1850     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1851     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1852   }
1853 
1854   if (op->LinearAssembleDiagonal) {
1855     // Backend version
1856     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1857     return CEED_ERROR_SUCCESS;
1858   } else if (op->LinearAssembleAddDiagonal) {
1859     // Backend version with zeroing first
1860     CeedCall(CeedVectorSetValue(assembled, 0.0));
1861     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1862     return CEED_ERROR_SUCCESS;
1863   } else {
1864     // Operator fallback
1865     CeedOperator op_fallback;
1866 
1867     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1868     if (op_fallback) {
1869       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1870       return CEED_ERROR_SUCCESS;
1871     }
1872   }
1873   // Default interface implementation
1874   CeedCall(CeedVectorSetValue(assembled, 0.0));
1875   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1876   return CEED_ERROR_SUCCESS;
1877 }
1878 
1879 /**
1880   @brief Assemble the diagonal of a square linear CeedOperator
1881 
1882   This sums into a CeedVector the diagonal of a linear CeedOperator.
1883 
1884   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1885 
1886   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1887 
1888   @param[in]  op        CeedOperator to assemble CeedQFunction
1889   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1890   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1891 
1892   @return An error code: 0 - success, otherwise - failure
1893 
1894   @ref User
1895 **/
1896 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1897   bool     is_composite;
1898   CeedSize input_size = 0, output_size = 0;
1899 
1900   CeedCall(CeedOperatorCheckReady(op));
1901   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1902 
1903   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1904   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1905 
1906   // Early exit for empty operator
1907   if (!is_composite) {
1908     CeedInt num_elem = 0;
1909 
1910     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1911     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1912   }
1913 
1914   if (op->LinearAssembleAddDiagonal) {
1915     // Backend version
1916     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1917     return CEED_ERROR_SUCCESS;
1918   } else {
1919     // Operator fallback
1920     CeedOperator op_fallback;
1921 
1922     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1923     if (op_fallback) {
1924       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1925       return CEED_ERROR_SUCCESS;
1926     }
1927   }
1928   // Default interface implementation
1929   if (is_composite) {
1930     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1931   } else {
1932     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1933   }
1934   return CEED_ERROR_SUCCESS;
1935 }
1936 
1937 /**
1938    @brief Fully assemble the point-block diagonal pattern of a linear operator.
1939 
1940    Expected to be used in conjunction with CeedOperatorLinearAssemblePointBlockDiagonal().
1941 
1942    The assembly routines use coordinate format, with `num_entries` tuples of the form (i, j, value) which indicate that value should be added to the
1943 matrix in entry (i, j).
1944   Note that the (i, j) pairs are unique.
1945   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in
1946 the same ordering.
1947 
1948    This will generally be slow unless your operator is low-order.
1949 
1950    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1951 
1952    @param[in]  op          CeedOperator to assemble
1953    @param[out] num_entries Number of entries in coordinate nonzero pattern
1954    @param[out] rows        Row number for each entry
1955    @param[out] cols        Column number for each entry
1956 
1957    @ref User
1958 **/
1959 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1960   Ceed          ceed;
1961   bool          is_composite;
1962   CeedInt       num_active_components, num_sub_operators;
1963   CeedOperator *sub_operators;
1964 
1965   CeedCall(CeedOperatorGetCeed(op, &ceed));
1966   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1967 
1968   CeedSize input_size = 0, output_size = 0;
1969   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1970   CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1971 
1972   if (is_composite) {
1973     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_operators));
1974     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1975   } else {
1976     sub_operators     = &op;
1977     num_sub_operators = 1;
1978   }
1979 
1980   // Verify operator can be assembled correctly
1981   {
1982     CeedOperatorAssemblyData data;
1983     CeedInt                  num_active_elem_rstrs, comp_stride;
1984     CeedElemRestriction     *active_elem_rstrs;
1985 
1986     // Get initial values to check against
1987     CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data));
1988     CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
1989     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride));
1990     CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components));
1991 
1992     // Verify that all active element restrictions have same component stride and number of components
1993     for (CeedInt k = 0; k < num_sub_operators; k++) {
1994       CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data));
1995       CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
1996       for (CeedInt i = 0; i < num_active_elem_rstrs; i++) {
1997         CeedInt comp_stride_sub, num_active_components_sub;
1998 
1999         CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub));
2000         CeedCheck(comp_stride == comp_stride_sub, ceed, CEED_ERROR_DIMENSION,
2001                   "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub);
2002         CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub));
2003         CeedCheck(num_active_components == num_active_components_sub, ceed, CEED_ERROR_INCOMPATIBLE,
2004                   "All suboperators must have the same number of output components");
2005       }
2006     }
2007   }
2008   *num_entries = input_size * num_active_components;
2009   CeedCall(CeedCalloc(*num_entries, rows));
2010   CeedCall(CeedCalloc(*num_entries, cols));
2011 
2012   for (CeedInt o = 0; o < num_sub_operators; o++) {
2013     CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr;
2014     CeedInt             comp_stride, num_elem, elem_size;
2015     const CeedInt      *offsets, *point_block_offsets;
2016 
2017     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr));
2018     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride));
2019     CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem));
2020     CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size));
2021     CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets));
2022 
2023     CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr));
2024     CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets));
2025 
2026     for (CeedSize i = 0; i < num_elem * elem_size; i++) {
2027       for (CeedInt c_out = 0; c_out < num_active_components; c_out++) {
2028         for (CeedInt c_in = 0; c_in < num_active_components; c_in++) {
2029           (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride;
2030           (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride;
2031         }
2032       }
2033     }
2034 
2035     CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets));
2036     CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets));
2037     CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr));
2038   }
2039   return CEED_ERROR_SUCCESS;
2040 }
2041 
2042 /**
2043   @brief Assemble the point block diagonal of a square linear CeedOperator
2044 
2045   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
2046 
2047   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
2048 
2049   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2050 
2051   @param[in]  op        CeedOperator to assemble CeedQFunction
2052   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
2053 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
2054 component in].
2055   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2056 
2057   @return An error code: 0 - success, otherwise - failure
2058 
2059   @ref User
2060 **/
2061 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2062   bool     is_composite;
2063   CeedSize input_size = 0, output_size = 0;
2064 
2065   CeedCall(CeedOperatorCheckReady(op));
2066   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2067 
2068   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2069   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
2070 
2071   // Early exit for empty operator
2072   if (!is_composite) {
2073     CeedInt num_elem = 0;
2074 
2075     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2076     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2077   }
2078 
2079   if (op->LinearAssemblePointBlockDiagonal) {
2080     // Backend version
2081     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
2082     return CEED_ERROR_SUCCESS;
2083   } else if (op->LinearAssembleAddPointBlockDiagonal) {
2084     // Backend version with zeroing first
2085     CeedCall(CeedVectorSetValue(assembled, 0.0));
2086     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2087     return CEED_ERROR_SUCCESS;
2088   } else {
2089     // Operator fallback
2090     CeedOperator op_fallback;
2091 
2092     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2093     if (op_fallback) {
2094       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
2095       return CEED_ERROR_SUCCESS;
2096     }
2097   }
2098   // Default interface implementation
2099   CeedCall(CeedVectorSetValue(assembled, 0.0));
2100   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2101   return CEED_ERROR_SUCCESS;
2102 }
2103 
2104 /**
2105   @brief Assemble the point block diagonal of a square linear CeedOperator
2106 
2107   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
2108 
2109   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
2110 
2111   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2112 
2113   @param[in]  op        CeedOperator to assemble CeedQFunction
2114   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
2115 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
2116 component in].
2117   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2118 
2119   @return An error code: 0 - success, otherwise - failure
2120 
2121   @ref User
2122 **/
2123 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2124   bool     is_composite;
2125   CeedSize input_size = 0, output_size = 0;
2126 
2127   CeedCall(CeedOperatorCheckReady(op));
2128   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2129 
2130   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2131   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
2132 
2133   // Early exit for empty operator
2134   if (!is_composite) {
2135     CeedInt num_elem = 0;
2136 
2137     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2138     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2139   }
2140 
2141   if (op->LinearAssembleAddPointBlockDiagonal) {
2142     // Backend version
2143     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
2144     return CEED_ERROR_SUCCESS;
2145   } else {
2146     // Operator fallback
2147     CeedOperator op_fallback;
2148 
2149     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2150     if (op_fallback) {
2151       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
2152       return CEED_ERROR_SUCCESS;
2153     }
2154   }
2155   // Default interface implementation
2156   if (is_composite) {
2157     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
2158   } else {
2159     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
2160   }
2161   return CEED_ERROR_SUCCESS;
2162 }
2163 
2164 /**
2165    @brief Fully assemble the nonzero pattern of a linear operator.
2166 
2167    Expected to be used in conjunction with CeedOperatorLinearAssemble().
2168 
2169    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
2170 matrix in entry (i, j).
2171   Note that the (i, j) pairs are not unique and may repeat.
2172   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
2173 
2174    This will generally be slow unless your operator is low-order.
2175 
2176    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2177 
2178    @param[in]  op          CeedOperator to assemble
2179    @param[out] num_entries Number of entries in coordinate nonzero pattern
2180    @param[out] rows        Row number for each entry
2181    @param[out] cols        Column number for each entry
2182 
2183    @ref User
2184 **/
2185 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2186   bool          is_composite;
2187   CeedInt       num_suboperators, offset = 0;
2188   CeedSize      single_entries;
2189   CeedOperator *sub_operators;
2190 
2191   CeedCall(CeedOperatorCheckReady(op));
2192   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2193 
2194   if (op->LinearAssembleSymbolic) {
2195     // Backend version
2196     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
2197     return CEED_ERROR_SUCCESS;
2198   } else {
2199     // Operator fallback
2200     CeedOperator op_fallback;
2201 
2202     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2203     if (op_fallback) {
2204       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
2205       return CEED_ERROR_SUCCESS;
2206     }
2207   }
2208 
2209   // Default interface implementation
2210 
2211   // Count entries and allocate rows, cols arrays
2212   *num_entries = 0;
2213   if (is_composite) {
2214     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2215     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2216     for (CeedInt k = 0; k < num_suboperators; ++k) {
2217       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2218       *num_entries += single_entries;
2219     }
2220   } else {
2221     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
2222     *num_entries += single_entries;
2223   }
2224   CeedCall(CeedCalloc(*num_entries, rows));
2225   CeedCall(CeedCalloc(*num_entries, cols));
2226 
2227   // Assemble nonzero locations
2228   if (is_composite) {
2229     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2230     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2231     for (CeedInt k = 0; k < num_suboperators; ++k) {
2232       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
2233       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2234       offset += single_entries;
2235     }
2236   } else {
2237     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
2238   }
2239   return CEED_ERROR_SUCCESS;
2240 }
2241 
2242 /**
2243    @brief Fully assemble the nonzero entries of a linear operator.
2244 
2245    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
2246 
2247    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
2248 matrix in entry (i, j).
2249   Note that the (i, j) pairs are not unique and may repeat.
2250   This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
2251 
2252    This will generally be slow unless your operator is low-order.
2253 
2254    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2255 
2256    @param[in]  op     CeedOperator to assemble
2257    @param[out] values Values to assemble into matrix
2258 
2259    @ref User
2260 **/
2261 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
2262   bool          is_composite;
2263   CeedInt       num_suboperators, offset = 0;
2264   CeedSize      single_entries = 0;
2265   CeedOperator *sub_operators;
2266 
2267   CeedCall(CeedOperatorCheckReady(op));
2268   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2269 
2270   // Early exit for empty operator
2271   if (!is_composite) {
2272     CeedInt num_elem = 0;
2273 
2274     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2275     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2276   }
2277 
2278   if (op->LinearAssemble) {
2279     // Backend version
2280     CeedCall(op->LinearAssemble(op, values));
2281     return CEED_ERROR_SUCCESS;
2282   } else {
2283     // Operator fallback
2284     CeedOperator op_fallback;
2285 
2286     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2287     if (op_fallback) {
2288       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2289       return CEED_ERROR_SUCCESS;
2290     }
2291   }
2292 
2293   // Default interface implementation
2294   CeedCall(CeedVectorSetValue(values, 0.0));
2295   if (is_composite) {
2296     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2297     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2298     for (CeedInt k = 0; k < num_suboperators; k++) {
2299       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2300       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2301       offset += single_entries;
2302     }
2303   } else {
2304     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2305   }
2306   return CEED_ERROR_SUCCESS;
2307 }
2308 
2309 /**
2310   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2311 
2312   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2313 
2314   @param[in]  op               Composite CeedOperator
2315   @param[in]  num_skip_indices Number of suboperators to skip
2316   @param[in]  skip_indices     Array of indices of suboperators to skip
2317   @param[out] mult             Vector to store multiplicity (of size l_size)
2318 
2319   @return An error code: 0 - success, otherwise - failure
2320 
2321   @ref User
2322 **/
2323 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2324   Ceed                ceed;
2325   CeedInt             num_suboperators;
2326   CeedSize            l_vec_len;
2327   CeedScalar         *mult_array;
2328   CeedVector          ones_l_vec;
2329   CeedElemRestriction elem_rstr, mult_elem_rstr;
2330   CeedOperator       *sub_operators;
2331 
2332   CeedCall(CeedOperatorCheckReady(op));
2333 
2334   CeedCall(CeedOperatorGetCeed(op, &ceed));
2335 
2336   // Zero mult vector
2337   CeedCall(CeedVectorSetValue(mult, 0.0));
2338 
2339   // Get suboperators
2340   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2341   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2342   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2343 
2344   // Work vector
2345   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2346   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2347   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2348   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2349 
2350   // Compute multiplicity across suboperators
2351   for (CeedInt i = 0; i < num_suboperators; i++) {
2352     const CeedScalar *sub_mult_array;
2353     CeedVector        sub_mult_l_vec, ones_e_vec;
2354 
2355     // -- Check for suboperator to skip
2356     for (CeedInt j = 0; j < num_skip_indices; j++) {
2357       if (skip_indices[j] == i) continue;
2358     }
2359 
2360     // -- Sub operator multiplicity
2361     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2362     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr));
2363     CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2364     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2365     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2366     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2367     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2368     // ---- Flag every node present in the current suboperator
2369     for (CeedInt j = 0; j < l_vec_len; j++) {
2370       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2371     }
2372     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2373     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2374     CeedCall(CeedVectorDestroy(&ones_e_vec));
2375     CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr));
2376   }
2377   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2378   CeedCall(CeedVectorDestroy(&ones_l_vec));
2379   return CEED_ERROR_SUCCESS;
2380 }
2381 
2382 /**
2383   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2384 grid interpolation
2385 
2386   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2387 
2388   @param[in]  op_fine      Fine grid operator
2389   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2390   @param[in]  rstr_coarse  Coarse grid restriction
2391   @param[in]  basis_coarse Coarse grid active vector basis
2392   @param[out] op_coarse    Coarse grid operator
2393   @param[out] op_prolong   Coarse to fine operator, or NULL
2394   @param[out] op_restrict  Fine to coarse operator, or NULL
2395 
2396   @return An error code: 0 - success, otherwise - failure
2397 
2398   @ref User
2399 **/
2400 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2401                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2402   CeedBasis basis_c_to_f = NULL;
2403 
2404   CeedCall(CeedOperatorCheckReady(op_fine));
2405 
2406   // Build prolongation matrix, if required
2407   if (op_prolong || op_restrict) {
2408     CeedBasis basis_fine;
2409 
2410     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2411     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2412   }
2413 
2414   // Core code
2415   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2416   return CEED_ERROR_SUCCESS;
2417 }
2418 
2419 /**
2420   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2421 
2422   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2423 
2424   @param[in]  op_fine       Fine grid operator
2425   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2426   @param[in]  rstr_coarse   Coarse grid restriction
2427   @param[in]  basis_coarse  Coarse grid active vector basis
2428   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2429   @param[out] op_coarse     Coarse grid operator
2430   @param[out] op_prolong    Coarse to fine operator, or NULL
2431   @param[out] op_restrict   Fine to coarse operator, or NULL
2432 
2433   @return An error code: 0 - success, otherwise - failure
2434 
2435   @ref User
2436 **/
2437 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2438                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2439                                              CeedOperator *op_restrict) {
2440   Ceed      ceed;
2441   CeedInt   Q_f, Q_c;
2442   CeedBasis basis_fine, basis_c_to_f = NULL;
2443 
2444   CeedCall(CeedOperatorCheckReady(op_fine));
2445   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2446 
2447   // Check for compatible quadrature spaces
2448   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2449   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2450   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2451   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2452 
2453   // Create coarse to fine basis, if required
2454   if (op_prolong || op_restrict) {
2455     CeedInt     dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2456     CeedScalar *q_ref, *q_weight, *grad;
2457 
2458     // Check if interpolation matrix is provided
2459     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2460               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2461     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2462     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2463     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2464     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2465     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2466     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2467     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2468     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2469     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2470     CeedCall(CeedFree(&q_ref));
2471     CeedCall(CeedFree(&q_weight));
2472     CeedCall(CeedFree(&grad));
2473   }
2474 
2475   // Core code
2476   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2477   return CEED_ERROR_SUCCESS;
2478 }
2479 
2480 /**
2481   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2482 
2483   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2484 
2485   @param[in]  op_fine       Fine grid operator
2486   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2487   @param[in]  rstr_coarse   Coarse grid restriction
2488   @param[in]  basis_coarse  Coarse grid active vector basis
2489   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2490   @param[out] op_coarse     Coarse grid operator
2491   @param[out] op_prolong    Coarse to fine operator, or NULL
2492   @param[out] op_restrict   Fine to coarse operator, or NULL
2493 
2494   @return An error code: 0 - success, otherwise - failure
2495 
2496   @ref User
2497 **/
2498 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2499                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2500                                        CeedOperator *op_restrict) {
2501   Ceed      ceed;
2502   CeedInt   Q_f, Q_c;
2503   CeedBasis basis_fine, basis_c_to_f = NULL;
2504 
2505   CeedCall(CeedOperatorCheckReady(op_fine));
2506   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2507 
2508   // Check for compatible quadrature spaces
2509   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2510   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2511   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2512   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2513 
2514   // Coarse to fine basis
2515   if (op_prolong || op_restrict) {
2516     CeedInt          dim, num_comp, num_nodes_c, num_nodes_f;
2517     CeedScalar      *q_ref, *q_weight, *grad;
2518     CeedElemTopology topo;
2519 
2520     // Check if interpolation matrix is provided
2521     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2522               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2523     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2524     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2525     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2526     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2527     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2528     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2529     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2530     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2531     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2532     CeedCall(CeedFree(&q_ref));
2533     CeedCall(CeedFree(&q_weight));
2534     CeedCall(CeedFree(&grad));
2535   }
2536 
2537   // Core code
2538   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2539   return CEED_ERROR_SUCCESS;
2540 }
2541 
2542 /**
2543   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2544 
2545   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2546   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2547   The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2548 \hat S V\f$.
2549   The CeedOperator must be linear and non-composite.
2550   The associated CeedQFunction must therefore also be linear.
2551 
2552   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2553 
2554   @param[in]  op      CeedOperator to create element inverses
2555   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2556   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2557 
2558   @return An error code: 0 - success, otherwise - failure
2559 
2560   @ref User
2561 **/
2562 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2563   Ceed                 ceed, ceed_parent;
2564   bool                 interp = false, grad = false, is_tensor_basis = true;
2565   CeedInt              num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2566   CeedSize             l_size = 1;
2567   CeedScalar          *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg;
2568   const CeedScalar    *interp_1d, *grad_1d, *q_weight_1d;
2569   CeedVector           q_data;
2570   CeedElemRestriction  rstr  = NULL, rstr_qd_i;
2571   CeedBasis            basis = NULL, fdm_basis;
2572   CeedQFunctionContext ctx_fdm;
2573   CeedQFunctionField  *qf_fields;
2574   CeedQFunction        qf, qf_fdm;
2575   CeedOperatorField   *op_fields;
2576 
2577   CeedCall(CeedOperatorCheckReady(op));
2578 
2579   if (op->CreateFDMElementInverse) {
2580     // Backend version
2581     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2582     return CEED_ERROR_SUCCESS;
2583   } else {
2584     // Operator fallback
2585     CeedOperator op_fallback;
2586 
2587     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2588     if (op_fallback) {
2589       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2590       return CEED_ERROR_SUCCESS;
2591     }
2592   }
2593 
2594   // Default interface implementation
2595   CeedCall(CeedOperatorGetCeed(op, &ceed));
2596   CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent));
2597   CeedCall(CeedOperatorGetQFunction(op, &qf));
2598 
2599   // Determine active input basis
2600   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2601   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2602   for (CeedInt i = 0; i < num_input_fields; i++) {
2603     CeedVector vec;
2604 
2605     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2606     if (vec == CEED_VECTOR_ACTIVE) {
2607       CeedEvalMode eval_mode;
2608 
2609       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2610       interp = interp || eval_mode == CEED_EVAL_INTERP;
2611       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2612       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2613       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2614     }
2615   }
2616   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2617   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2618   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2619   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2620   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2621   CeedCall(CeedBasisGetDimension(basis, &dim));
2622   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2623   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2624   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2625 
2626   // Build and diagonalize 1D Mass and Laplacian
2627   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2628   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2629   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2630   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2631   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2632   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2633   CeedCall(CeedCalloc(P_1d, &lambda));
2634   // -- Build matrices
2635   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2636   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2637   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2638   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2639 
2640   // -- Diagonalize
2641   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2642   CeedCall(CeedFree(&mass));
2643   CeedCall(CeedFree(&laplace));
2644   for (CeedInt i = 0; i < P_1d; i++) {
2645     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2646   }
2647   CeedCall(CeedFree(&x));
2648 
2649   {
2650     CeedInt             layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2651     CeedScalar          max_norm = 0;
2652     const CeedScalar   *assembled_array, *q_weight_array;
2653     CeedVector          assembled = NULL, q_weight;
2654     CeedElemRestriction rstr_qf   = NULL;
2655 
2656     // Assemble QFunction
2657     CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2658     CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2659     CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2660     CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2661 
2662     // Calculate element averages
2663     CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2664     CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2665     CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2666     CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2667     CeedCall(CeedCalloc(num_elem, &elem_avg));
2668     const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2669 
2670     for (CeedInt e = 0; e < num_elem; e++) {
2671       CeedInt count = 0;
2672 
2673       for (CeedInt q = 0; q < num_qpts; q++) {
2674         for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2675           if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2676             elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2677             count++;
2678           }
2679         }
2680       }
2681       if (count) {
2682         elem_avg[e] /= count;
2683       } else {
2684         elem_avg[e] = 1.0;
2685       }
2686     }
2687     CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2688     CeedCall(CeedVectorDestroy(&assembled));
2689     CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2690     CeedCall(CeedVectorDestroy(&q_weight));
2691   }
2692 
2693   // Build FDM diagonal
2694   {
2695     CeedScalar *q_data_array, *fdm_diagonal;
2696 
2697     CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2698     const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2699     for (CeedInt c = 0; c < num_comp; c++) {
2700       for (CeedInt n = 0; n < num_nodes; n++) {
2701         if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2702         if (grad) {
2703           for (CeedInt d = 0; d < dim; d++) {
2704             CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2705             fdm_diagonal[c * num_nodes + n] += lambda[i];
2706           }
2707         }
2708         if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2709       }
2710     }
2711     CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2712     CeedCall(CeedVectorSetValue(q_data, 0.0));
2713     CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2714     for (CeedInt e = 0; e < num_elem; e++) {
2715       for (CeedInt c = 0; c < num_comp; c++) {
2716         for (CeedInt n = 0; n < num_nodes; n++)
2717           q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2718       }
2719     }
2720     CeedCall(CeedFree(&elem_avg));
2721     CeedCall(CeedFree(&fdm_diagonal));
2722     CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2723   }
2724 
2725   // Setup FDM operator
2726   // -- Basis
2727   {
2728     CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2729 
2730     CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2731     CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2732     CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2733     CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2734     CeedCall(CeedFree(&fdm_interp));
2735     CeedCall(CeedFree(&grad_dummy));
2736     CeedCall(CeedFree(&q_ref_dummy));
2737     CeedCall(CeedFree(&q_weight_dummy));
2738     CeedCall(CeedFree(&lambda));
2739   }
2740 
2741   // -- Restriction
2742   {
2743     CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp};
2744     CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2745   }
2746 
2747   // -- QFunction
2748   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2749   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2750   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2751   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2752   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2753 
2754   // -- QFunction context
2755   {
2756     CeedInt *num_comp_data;
2757 
2758     CeedCall(CeedCalloc(1, &num_comp_data));
2759     num_comp_data[0] = num_comp;
2760     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2761     CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2762   }
2763   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2764   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2765 
2766   // -- Operator
2767   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2768   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2769   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data));
2770   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2771 
2772   // Cleanup
2773   CeedCall(CeedVectorDestroy(&q_data));
2774   CeedCall(CeedBasisDestroy(&fdm_basis));
2775   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2776   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2777   return CEED_ERROR_SUCCESS;
2778 }
2779 
2780 /// @}
2781