xref: /libCEED/interface/ceed-preconditioning.c (revision 0459ebd300026985503495dab7b02c229b713bb9)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   char *source_path_with_name = NULL;
39 
40   // Check if NULL qf passed in
41   if (!qf) return CEED_ERROR_SUCCESS;
42 
43   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
44   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
45 
46   if (qf->source_path) {
47     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
48     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
49     memcpy(source_path_with_name, qf->source_path, path_len);
50     memcpy(&source_path_with_name[path_len], ":", 1);
51     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
52   } else {
53     CeedCall(CeedCalloc(1, &source_path_with_name));
54   }
55 
56   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
57   {
58     CeedQFunctionContext ctx;
59 
60     CeedCall(CeedQFunctionGetContext(qf, &ctx));
61     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
62   }
63   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
64     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
65   }
66   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
67     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
68   }
69   CeedCall(CeedFree(&source_path_with_name));
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   Ceed         ceed_fallback;
84   bool         is_composite;
85   CeedOperator op_fallback;
86 
87   // Check not already created
88   if (op->op_fallback) return CEED_ERROR_SUCCESS;
89 
90   // Fallback Ceed
91   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
92   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
93 
94   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
95   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
96 
97   // Clone Op
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114 
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
117     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
118     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
119     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
120       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
121                                     op->input_fields[i]->vec));
122     }
123     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
124       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
125                                     op->output_fields[i]->vec));
126     }
127     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
128     // Cleanup
129     CeedCall(CeedQFunctionDestroy(&qf_fallback));
130     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
131     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
132   }
133   CeedCall(CeedOperatorSetName(op_fallback, op->name));
134   CeedCall(CeedOperatorCheckReady(op_fallback));
135   // Note: No ref-counting here so we don't get caught in a reference loop.
136   //       The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback.
137   op->op_fallback                 = op_fallback;
138   op_fallback->op_fallback_parent = op;
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Select correct basis matrix pointer based on CeedEvalMode
144 
145   @param[in]  basis     CeedBasis from which to get the basis matrix
146   @param[in]  eval_mode Current basis evaluation mode
147   @param[in]  identity  Pointer to identity matrix
148   @param[out] basis_ptr Basis pointer to set
149 
150   @ref Developer
151 **/
152 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
153   switch (eval_mode) {
154     case CEED_EVAL_NONE:
155       *basis_ptr = identity;
156       break;
157     case CEED_EVAL_INTERP:
158       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
159       break;
160     case CEED_EVAL_GRAD:
161       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
162       break;
163     case CEED_EVAL_DIV:
164       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
165       break;
166     case CEED_EVAL_CURL:
167       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
168       break;
169     case CEED_EVAL_WEIGHT:
170       break;  // Caught by QF Assembly
171   }
172   assert(*basis_ptr != NULL);
173   return CEED_ERROR_SUCCESS;
174 }
175 
176 /**
177   @brief Core logic for assembling operator diagonal or point block diagonal
178 
179   @param[in]  op             CeedOperator to assemble point block diagonal
180   @param[in]  request        Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
181   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
182   @param[out] assembled      CeedVector to store assembled diagonal
183 
184   @return An error code: 0 - success, otherwise - failure
185 
186   @ref Developer
187 **/
188 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_point_block, CeedVector assembled) {
189   Ceed ceed;
190   bool is_composite;
191 
192   CeedCall(CeedOperatorGetCeed(op, &ceed));
193   CeedCall(CeedOperatorIsComposite(op, &is_composite));
194   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
195 
196   // Assemble QFunction
197   CeedInt             layout_qf[3];
198   const CeedScalar   *assembled_qf_array;
199   CeedVector          assembled_qf        = NULL;
200   CeedElemRestriction assembled_elem_rstr = NULL;
201 
202   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
203   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf));
204   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
205   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
206 
207   // Get assembly data
208   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
209   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
210   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
211   CeedBasis               *active_bases_in, *active_bases_out;
212   CeedElemRestriction     *active_elem_rstrs_in, *active_elem_rstrs_out;
213   CeedOperatorAssemblyData data;
214 
215   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
216   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
217                                                 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out,
218                                                 &num_output_components));
219   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL));
220   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out));
221 
222   // Loop over all active bases (find matching input/output pairs)
223   for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) {
224     CeedInt             b_in, b_out, num_elem, num_nodes, num_qpts, num_comp;
225     bool                has_eval_none = false;
226     CeedScalar         *elem_diag_array, *identity = NULL;
227     CeedVector          elem_diag;
228     CeedElemRestriction diag_elem_rstr;
229 
230     if (num_active_bases_in <= num_active_bases_out) {
231       b_in = b;
232       for (b_out = 0; b_out < num_active_bases_out; b_out++) {
233         if (active_bases_in[b_in] == active_bases_out[b_out]) {
234           break;
235         }
236       }
237       if (b_out == num_active_bases_out) {
238         continue;
239       }  // No matching output basis found
240     } else {
241       b_out = b;
242       for (b_in = 0; b_in < num_active_bases_in; b_in++) {
243         if (active_bases_in[b_in] == active_bases_out[b_out]) {
244           break;
245         }
246       }
247       if (b_in == num_active_bases_in) {
248         continue;
249       }  // No matching output basis found
250     }
251     CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], ceed, CEED_ERROR_UNSUPPORTED,
252               "Cannot assemble operator diagonal with different input and output active element restrictions");
253 
254     // Assemble point block diagonal restriction, if needed
255     if (is_point_block) {
256       CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr));
257     } else {
258       CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr));
259     }
260 
261     // Create diagonal vector
262     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
263 
264     // Assemble element operator diagonals
265     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
266     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
267     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
268     CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes));
269     CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp));
270     if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes;
271     else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts));
272 
273     // Construct identity matrix for basis if required
274     for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) {
275       has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE);
276     }
277     for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) {
278       has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE);
279     }
280     if (has_eval_none) {
281       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
282       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
283     }
284 
285     // Compute the diagonal of B^T D B
286     // Each element
287     for (CeedSize e = 0; e < num_elem; e++) {
288       // Each basis eval mode pair
289       CeedInt      d_out              = 0, q_comp_out;
290       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
291 
292       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) {
293         CeedInt           d_in              = 0, q_comp_in;
294         const CeedScalar *B_t               = NULL;
295         CeedEvalMode      eval_mode_in_prev = CEED_EVAL_NONE;
296 
297         CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t));
298         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out));
299         if (q_comp_out > 1) {
300           if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0;
301           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
302         }
303         eval_mode_out_prev = eval_modes_out[b_out][e_out];
304 
305         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) {
306           const CeedScalar *B = NULL;
307 
308           CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B));
309           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in));
310           if (q_comp_in > 1) {
311             if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0;
312             else B = &B[(++d_in) * num_qpts * num_nodes];
313           }
314           eval_mode_in_prev = eval_modes_in[b_in][e_in];
315 
316           // Each component
317           for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
318             // Each qpt/node pair
319             for (CeedInt q = 0; q < num_qpts; q++) {
320               if (is_point_block) {
321                 // Point Block Diagonal
322                 for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
323                   const CeedSize c_offset =
324                       (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
325                   const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
326 
327                   for (CeedInt n = 0; n < num_nodes; n++) {
328                     elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
329                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
330                   }
331                 }
332               } else {
333                 // Diagonal Only
334                 const CeedInt c_offset =
335                     (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
336                 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
337 
338                 for (CeedInt n = 0; n < num_nodes; n++) {
339                   elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
340                 }
341               }
342             }
343           }
344         }
345       }
346     }
347     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
348 
349     // Assemble local operator diagonal
350     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
351 
352     // Cleanup
353     CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
354     CeedCall(CeedVectorDestroy(&elem_diag));
355     CeedCall(CeedFree(&identity));
356   }
357   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
358   CeedCall(CeedVectorDestroy(&assembled_qf));
359   return CEED_ERROR_SUCCESS;
360 }
361 
362 /**
363   @brief Core logic for assembling composite operator diagonal
364 
365   @param[in]  op             CeedOperator to assemble point block diagonal
366   @param[in]  request        Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
367   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
368   @param[out] assembled      CeedVector to store assembled diagonal
369 
370   @return An error code: 0 - success, otherwise - failure
371 
372   @ref Developer
373 **/
374 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block,
375                                                                  CeedVector assembled) {
376   CeedInt       num_sub;
377   CeedOperator *suboperators;
378 
379   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
380   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
381   for (CeedInt i = 0; i < num_sub; i++) {
382     if (is_point_block) {
383       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
384     } else {
385       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
386     }
387   }
388   return CEED_ERROR_SUCCESS;
389 }
390 
391 /**
392   @brief Build nonzero pattern for non-composite operator
393 
394   Users should generally use CeedOperatorLinearAssembleSymbolic()
395 
396   @param[in]  op     CeedOperator to assemble nonzero pattern
397   @param[in]  offset Offset for number of entries
398   @param[out] rows   Row number for each entry
399   @param[out] cols   Column number for each entry
400 
401   @return An error code: 0 - success, otherwise - failure
402 
403   @ref Developer
404 **/
405 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
406   Ceed                ceed;
407   bool                is_composite;
408   CeedSize            num_nodes_in, num_nodes_out, count = 0;
409   CeedInt             num_elem_in, elem_size_in, num_comp_in, layout_er_in[3];
410   CeedInt             num_elem_out, elem_size_out, num_comp_out, layout_er_out[3], local_num_entries;
411   CeedScalar         *array;
412   const CeedScalar   *elem_dof_a_in, *elem_dof_a_out;
413   CeedVector          index_vec_in, index_vec_out, elem_dof_in, elem_dof_out;
414   CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out;
415 
416   CeedCall(CeedOperatorGetCeed(op, &ceed));
417   CeedCall(CeedOperatorIsComposite(op, &is_composite));
418   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
419 
420   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out));
421   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
422   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
423   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
424   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
425   CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, &layout_er_in));
426 
427   // Determine elem_dof relation for input
428   CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in));
429   CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array));
430   for (CeedInt i = 0; i < num_nodes_in; i++) array[i] = i;
431   CeedCall(CeedVectorRestoreArray(index_vec_in, &array));
432   CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in));
433   CeedCall(CeedVectorSetValue(elem_dof_in, 0.0));
434   CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in));
435   CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE));
436   CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in));
437   CeedCall(CeedVectorDestroy(&index_vec_in));
438   CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in));
439 
440   if (elem_rstr_in != elem_rstr_out) {
441     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
442     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
443               "Active input and output operator restrictions must have the same number of elements");
444     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
445     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
446     CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, &layout_er_out));
447 
448     // Determine elem_dof relation for output
449     CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out));
450     CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array));
451     for (CeedInt i = 0; i < num_nodes_out; i++) array[i] = i;
452     CeedCall(CeedVectorRestoreArray(index_vec_out, &array));
453     CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out));
454     CeedCall(CeedVectorSetValue(elem_dof_out, 0.0));
455     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out));
456     CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE));
457     CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out));
458     CeedCall(CeedVectorDestroy(&index_vec_out));
459     CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out));
460   } else {
461     num_elem_out     = num_elem_in;
462     elem_size_out    = elem_size_in;
463     num_comp_out     = num_comp_in;
464     layout_er_out[0] = layout_er_in[0];
465     layout_er_out[1] = layout_er_in[1];
466     layout_er_out[2] = layout_er_in[2];
467     elem_dof_a_out   = elem_dof_a_in;
468   }
469   local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
470 
471   // Determine i, j locations for element matrices
472   for (CeedInt e = 0; e < num_elem_in; e++) {
473     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
474       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
475         for (CeedInt i = 0; i < elem_size_out; i++) {
476           for (CeedInt j = 0; j < elem_size_in; j++) {
477             const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2];
478             const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2];
479             const CeedInt row                = elem_dof_a_out[elem_dof_index_row];
480             const CeedInt col                = elem_dof_a_in[elem_dof_index_col];
481 
482             rows[offset + count] = row;
483             cols[offset + count] = col;
484             count++;
485           }
486         }
487       }
488     }
489   }
490   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
491   CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in));
492   CeedCall(CeedVectorDestroy(&elem_dof_in));
493   if (elem_rstr_in != elem_rstr_out) {
494     CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out));
495     CeedCall(CeedVectorDestroy(&elem_dof_out));
496   }
497   return CEED_ERROR_SUCCESS;
498 }
499 
500 /**
501   @brief Assemble nonzero entries for non-composite operator
502 
503   Users should generally use CeedOperatorLinearAssemble()
504 
505   @param[in]  op     CeedOperator to assemble
506   @param[in]  offset Offset for number of entries
507   @param[out] values Values to assemble into matrix
508 
509   @return An error code: 0 - success, otherwise - failure
510 
511   @ref Developer
512 **/
513 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
514   Ceed ceed;
515   bool is_composite;
516 
517   CeedCall(CeedOperatorGetCeed(op, &ceed));
518   CeedCall(CeedOperatorIsComposite(op, &is_composite));
519   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
520 
521   // Early exit for empty operator
522   {
523     CeedInt num_elem = 0;
524 
525     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
526     if (num_elem == 0) return CEED_ERROR_SUCCESS;
527   }
528 
529   if (op->LinearAssembleSingle) {
530     // Backend version
531     CeedCall(op->LinearAssembleSingle(op, offset, values));
532     return CEED_ERROR_SUCCESS;
533   } else {
534     // Operator fallback
535     CeedOperator op_fallback;
536 
537     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
538     if (op_fallback) {
539       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
540       return CEED_ERROR_SUCCESS;
541     }
542   }
543 
544   // Assemble QFunction
545   CeedInt             layout_qf[3];
546   const CeedScalar   *assembled_qf_array;
547   CeedVector          assembled_qf        = NULL;
548   CeedElemRestriction assembled_elem_rstr = NULL;
549 
550   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE));
551   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf));
552   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
553   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
554 
555   // Get assembly data
556   CeedInt                  num_elem_in, elem_size_in, num_comp_in, num_qpts_in;
557   CeedInt                  num_elem_out, elem_size_out, num_comp_out, num_qpts_out, local_num_entries;
558   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
559   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
560   CeedBasis               *active_bases_in, *active_bases_out, basis_in, basis_out;
561   const CeedScalar       **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out;
562   CeedElemRestriction      elem_rstr_in, elem_rstr_out;
563   CeedRestrictionType      elem_rstr_type_in, elem_rstr_type_out;
564   const bool              *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL;
565   const CeedInt8          *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL;
566   CeedOperatorAssemblyData data;
567 
568   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
569   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out,
570                                                 &num_eval_modes_out, &eval_modes_out, NULL, NULL));
571 
572   CeedCheck(num_active_bases_in == num_active_bases_out && num_active_bases_in == 1, ceed, CEED_ERROR_UNSUPPORTED,
573             "Cannot assemble operator with multiple active bases");
574   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs");
575 
576   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out));
577   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
578   basis_in  = active_bases_in[0];
579   basis_out = active_bases_out[0];
580   B_mat_in  = B_mats_in[0];
581   B_mat_out = B_mats_out[0];
582 
583   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
584   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
585   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
586   if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in;
587   else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in));
588 
589   CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in));
590   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
591     CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in));
592   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
593     CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in));
594   }
595 
596   if (elem_rstr_in != elem_rstr_out) {
597     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
598     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
599               "Active input and output operator restrictions must have the same number of elements");
600     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
601     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
602     if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out;
603     else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out));
604     CeedCheck(num_qpts_in == num_qpts_out, ceed, CEED_ERROR_UNSUPPORTED,
605               "Active input and output bases must have the same number of quadrature points");
606 
607     CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out));
608     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
609       CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out));
610     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
611       CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out));
612     }
613   } else {
614     num_elem_out  = num_elem_in;
615     elem_size_out = elem_size_in;
616     num_comp_out  = num_comp_in;
617     num_qpts_out  = num_qpts_in;
618 
619     elem_rstr_orients_out      = elem_rstr_orients_in;
620     elem_rstr_curl_orients_out = elem_rstr_curl_orients_in;
621   }
622   local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
623 
624   // Loop over elements and put in data structure
625   // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
626   CeedSize           count = 0;
627   CeedTensorContract contract;
628   CeedScalar        *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL;
629 
630   CeedCall(CeedTensorContractCreate(ceed, &contract));
631   CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat));
632   CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat));
633   if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b));
634 
635   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
636   for (CeedSize e = 0; e < num_elem_in; e++) {
637     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
638       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
639         // Compute B^T*D
640         for (CeedSize n = 0; n < elem_size_out; n++) {
641           for (CeedSize q = 0; q < num_qpts_in; q++) {
642             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
643               const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in;
644               CeedScalar     sum       = 0.0;
645 
646               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
647                 const CeedSize b_out_index     = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n;
648                 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out;
649                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
650 
651                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
652               }
653               BTD_mat[btd_index] = sum;
654             }
655           }
656         }
657 
658         // Form element matrix itself (for each block component)
659         CeedCall(CeedTensorContractApply(contract, 1, num_qpts_in * num_eval_modes_in[0], elem_size_in, elem_size_out, BTD_mat, CEED_NOTRANSPOSE,
660                                          false, B_mat_in, elem_mat));
661 
662         // Transform the element matrix if required
663         if (elem_rstr_orients_out) {
664           const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out];
665 
666           for (CeedInt i = 0; i < elem_size_out; i++) {
667             const double orient = elem_orients[i] ? -1.0 : 1.0;
668 
669             for (CeedInt j = 0; j < elem_size_in; j++) {
670               elem_mat[i * elem_size_in + j] *= orient;
671             }
672           }
673         } else if (elem_rstr_curl_orients_out) {
674           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out];
675 
676           // T^T*(B^T*D*B)
677           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
678           for (CeedInt i = 0; i < elem_size_out; i++) {
679             for (CeedInt j = 0; j < elem_size_in; j++) {
680               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] +
681                                                (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) +
682                                                (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0);
683             }
684           }
685         }
686         if (elem_rstr_orients_in) {
687           const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in];
688 
689           for (CeedInt i = 0; i < elem_size_out; i++) {
690             for (CeedInt j = 0; j < elem_size_in; j++) {
691               elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0;
692             }
693           }
694         } else if (elem_rstr_curl_orients_in) {
695           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in];
696 
697           // (B^T*D*B)*T
698           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
699           for (CeedInt i = 0; i < elem_size_out; i++) {
700             for (CeedInt j = 0; j < elem_size_in; j++) {
701               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] +
702                                                (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) +
703                                                (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0);
704             }
705           }
706         }
707 
708         // Put element matrix in coordinate data structure
709         for (CeedInt i = 0; i < elem_size_out; i++) {
710           for (CeedInt j = 0; j < elem_size_in; j++) {
711             vals[offset + count] = elem_mat[i * elem_size_in + j];
712             count++;
713           }
714         }
715       }
716     }
717   }
718   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries");
719   CeedCall(CeedVectorRestoreArray(values, &vals));
720 
721   // Cleanup
722   CeedCall(CeedTensorContractDestroy(&contract));
723   CeedCall(CeedFree(&BTD_mat));
724   CeedCall(CeedFree(&elem_mat));
725   CeedCall(CeedFree(&elem_mat_b));
726   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
727     CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in));
728   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
729     CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in));
730   }
731   if (elem_rstr_in != elem_rstr_out) {
732     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
733       CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out));
734     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
735       CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out));
736     }
737   }
738   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
739   CeedCall(CeedVectorDestroy(&assembled_qf));
740   return CEED_ERROR_SUCCESS;
741 }
742 
743 /**
744   @brief Count number of entries for assembled CeedOperator
745 
746   @param[in]  op          CeedOperator to assemble
747   @param[out] num_entries Number of entries in assembled representation
748 
749   @return An error code: 0 - success, otherwise - failure
750 
751   @ref Utility
752 **/
753 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) {
754   bool                is_composite;
755   CeedInt             num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out;
756   CeedElemRestriction rstr_in, rstr_out;
757 
758   CeedCall(CeedOperatorIsComposite(op, &is_composite));
759   CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
760 
761   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out));
762   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in));
763   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in));
764   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
765   if (rstr_in != rstr_out) {
766     CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out));
767     CeedCheck(num_elem_in == num_elem_out, op->ceed, CEED_ERROR_UNSUPPORTED,
768               "Active input and output operator restrictions must have the same number of elements");
769     CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out));
770     CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
771   } else {
772     num_elem_out  = num_elem_in;
773     elem_size_out = elem_size_in;
774     num_comp_out  = num_comp_in;
775   }
776   *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in;
777   return CEED_ERROR_SUCCESS;
778 }
779 
780 /**
781   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
782 
783   @param[in]  op_fine      Fine grid operator
784   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
785   @param[in]  rstr_coarse  Coarse grid restriction
786   @param[in]  basis_coarse Coarse grid active vector basis
787   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
788   @param[out] op_coarse    Coarse grid operator
789   @param[out] op_prolong   Coarse to fine operator, or NULL
790   @param[out] op_restrict  Fine to coarse operator, or NULL
791 
792   @return An error code: 0 - success, otherwise - failure
793 
794   @ref Developer
795 **/
796 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
797                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
798   bool                is_composite;
799   Ceed                ceed;
800   CeedInt             num_comp;
801   CeedVector          mult_vec         = NULL;
802   CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL;
803 
804   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
805 
806   // Check for composite operator
807   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
808   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
809 
810   // Coarse Grid
811   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
812   // -- Clone input fields
813   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
814     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
815       rstr_fine = op_fine->input_fields[i]->elem_rstr;
816       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
817     } else {
818       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
819                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
820     }
821   }
822   // -- Clone output fields
823   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
824     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
825       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
826     } else {
827       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
828                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
829     }
830   }
831   // -- Clone QFunctionAssemblyData
832   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
833 
834   // Multiplicity vector
835   if (op_restrict || op_prolong) {
836     CeedVector          mult_e_vec;
837     CeedRestrictionType rstr_type;
838 
839     CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type));
840     CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED,
841               "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported");
842     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
843     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
844     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
845     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
846     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
847     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
848     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
849     CeedCall(CeedVectorDestroy(&mult_e_vec));
850     CeedCall(CeedVectorReciprocal(mult_vec));
851   }
852 
853   // Clone name
854   bool   has_name = op_fine->name;
855   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
856   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
857 
858   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
859   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
860             "Prolongation or restriction operator creation requires coarse-to-fine basis");
861 
862   // Restriction/Prolongation Operators
863   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
864 
865   // Restriction
866   if (op_restrict) {
867     CeedInt             *num_comp_r_data;
868     CeedQFunctionContext ctx_r;
869     CeedQFunction        qf_restrict;
870 
871     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
872     CeedCall(CeedCalloc(1, &num_comp_r_data));
873     num_comp_r_data[0] = num_comp;
874     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
875     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
876     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
877     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
878     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
879     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
880     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
881     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
882 
883     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
884     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
885     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
886     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
887 
888     // Set name
889     char *restriction_name;
890 
891     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
892     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
893     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
894     CeedCall(CeedFree(&restriction_name));
895 
896     // Check
897     CeedCall(CeedOperatorCheckReady(*op_restrict));
898 
899     // Cleanup
900     CeedCall(CeedQFunctionDestroy(&qf_restrict));
901   }
902 
903   // Prolongation
904   if (op_prolong) {
905     CeedInt             *num_comp_p_data;
906     CeedQFunctionContext ctx_p;
907     CeedQFunction        qf_prolong;
908 
909     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
910     CeedCall(CeedCalloc(1, &num_comp_p_data));
911     num_comp_p_data[0] = num_comp;
912     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
913     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
914     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
915     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
916     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
917     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
918     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
919     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
920 
921     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
922     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
923     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
924     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
925 
926     // Set name
927     char *prolongation_name;
928 
929     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
930     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
931     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
932     CeedCall(CeedFree(&prolongation_name));
933 
934     // Check
935     CeedCall(CeedOperatorCheckReady(*op_prolong));
936 
937     // Cleanup
938     CeedCall(CeedQFunctionDestroy(&qf_prolong));
939   }
940 
941   // Check
942   CeedCall(CeedOperatorCheckReady(*op_coarse));
943 
944   // Cleanup
945   CeedCall(CeedVectorDestroy(&mult_vec));
946   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
947   CeedCall(CeedBasisDestroy(&basis_c_to_f));
948   return CEED_ERROR_SUCCESS;
949 }
950 
951 /**
952   @brief Build 1D mass matrix and Laplacian with perturbation
953 
954   @param[in]  interp_1d   Interpolation matrix in one dimension
955   @param[in]  grad_1d     Gradient matrix in one dimension
956   @param[in]  q_weight_1d Quadrature weights in one dimension
957   @param[in]  P_1d        Number of basis nodes in one dimension
958   @param[in]  Q_1d        Number of quadrature points in one dimension
959   @param[in]  dim         Dimension of basis
960   @param[out] mass        Assembled mass matrix in one dimension
961   @param[out] laplace     Assembled perturbed Laplacian in one dimension
962 
963   @return An error code: 0 - success, otherwise - failure
964 
965   @ref Developer
966 **/
967 CeedPragmaOptimizeOff
968 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
969                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
970   for (CeedInt i = 0; i < P_1d; i++) {
971     for (CeedInt j = 0; j < P_1d; j++) {
972       CeedScalar sum = 0.0;
973       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
974       mass[i + j * P_1d] = sum;
975     }
976   }
977   // -- Laplacian
978   for (CeedInt i = 0; i < P_1d; i++) {
979     for (CeedInt j = 0; j < P_1d; j++) {
980       CeedScalar sum = 0.0;
981 
982       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
983       laplace[i + j * P_1d] = sum;
984     }
985   }
986   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
987   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
988   return CEED_ERROR_SUCCESS;
989 }
990 CeedPragmaOptimizeOn
991 
992 /// @}
993 
994 /// ----------------------------------------------------------------------------
995 /// CeedOperator Backend API
996 /// ----------------------------------------------------------------------------
997 /// @addtogroup CeedOperatorBackend
998 /// @{
999 
1000 /**
1001   @brief Create point block restriction for active operator field
1002 
1003   @param[in]  rstr             Original CeedElemRestriction for active field
1004   @param[out] point_block_rstr Address of the variable where the newly created CeedElemRestriction will be stored
1005 
1006   @return An error code: 0 - success, otherwise - failure
1007 
1008   @ref Backend
1009 **/
1010 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) {
1011   Ceed           ceed;
1012   CeedInt        num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets;
1013   CeedSize       l_size;
1014   const CeedInt *offsets;
1015 
1016   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
1017   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
1018 
1019   // Expand offsets
1020   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
1021   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
1022   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
1023   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
1024   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
1025   shift = num_comp;
1026   if (comp_stride != 1) shift *= num_comp;
1027   CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets));
1028   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
1029     point_block_offsets[i] = offsets[i] * shift;
1030   }
1031 
1032   // Create new restriction
1033   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
1034                                      point_block_offsets, point_block_rstr));
1035 
1036   // Cleanup
1037   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
1038   return CEED_ERROR_SUCCESS;
1039 }
1040 
1041 /**
1042   @brief Create object holding CeedQFunction assembly data for CeedOperator
1043 
1044   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
1045   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
1046 
1047   @return An error code: 0 - success, otherwise - failure
1048 
1049   @ref Backend
1050 **/
1051 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
1052   CeedCall(CeedCalloc(1, data));
1053   (*data)->ref_count = 1;
1054   (*data)->ceed      = ceed;
1055   CeedCall(CeedReference(ceed));
1056   return CEED_ERROR_SUCCESS;
1057 }
1058 
1059 /**
1060   @brief Increment the reference counter for a CeedQFunctionAssemblyData
1061 
1062   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
1063 
1064   @return An error code: 0 - success, otherwise - failure
1065 
1066   @ref Backend
1067 **/
1068 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1069   data->ref_count++;
1070   return CEED_ERROR_SUCCESS;
1071 }
1072 
1073 /**
1074   @brief Set re-use of CeedQFunctionAssemblyData
1075 
1076   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
1077   @param[in]     reuse_data Boolean flag indicating data re-use
1078 
1079   @return An error code: 0 - success, otherwise - failure
1080 
1081   @ref Backend
1082 **/
1083 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
1084   data->reuse_data        = reuse_data;
1085   data->needs_data_update = true;
1086   return CEED_ERROR_SUCCESS;
1087 }
1088 
1089 /**
1090   @brief Mark QFunctionAssemblyData as stale
1091 
1092   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
1093   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
1094 
1095   @return An error code: 0 - success, otherwise - failure
1096 
1097   @ref Backend
1098 **/
1099 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
1100   data->needs_data_update = needs_data_update;
1101   return CEED_ERROR_SUCCESS;
1102 }
1103 
1104 /**
1105   @brief Determine if QFunctionAssemblyData needs update
1106 
1107   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
1108   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1109 
1110   @return An error code: 0 - success, otherwise - failure
1111 
1112   @ref Backend
1113 **/
1114 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1115   *is_update_needed = !data->reuse_data || data->needs_data_update;
1116   return CEED_ERROR_SUCCESS;
1117 }
1118 
1119 /**
1120   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1121 
1122   Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1123 
1124   Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
1125         CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
1126         CeedQFunctionAssemblyData.
1127 
1128   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1129   @param[in,out] data_copy Variable to store copied reference
1130 
1131   @return An error code: 0 - success, otherwise - failure
1132 
1133   @ref Backend
1134 **/
1135 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1136   CeedCall(CeedQFunctionAssemblyDataReference(data));
1137   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1138   *data_copy = data;
1139   return CEED_ERROR_SUCCESS;
1140 }
1141 
1142 /**
1143   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1144 
1145   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1146   @param[out] is_setup Boolean flag for setup status
1147 
1148   @return An error code: 0 - success, otherwise - failure
1149 
1150   @ref Backend
1151 **/
1152 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1153   *is_setup = data->is_setup;
1154   return CEED_ERROR_SUCCESS;
1155 }
1156 
1157 /**
1158   @brief Set internal objects for CeedQFunctionAssemblyData
1159 
1160   @param[in,out] data CeedQFunctionAssemblyData to set objects
1161   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1162   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1163 
1164   @return An error code: 0 - success, otherwise - failure
1165 
1166   @ref Backend
1167 **/
1168 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1169   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1170   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1171 
1172   data->is_setup = true;
1173   return CEED_ERROR_SUCCESS;
1174 }
1175 
1176 /**
1177   @brief Get internal objects for CeedQFunctionAssemblyData
1178 
1179   @param[in,out] data CeedQFunctionAssemblyData to set objects
1180   @param[out]    vec  CeedVector to store assembled CeedQFunction at quadrature points
1181   @param[out]    rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1182 
1183   @return An error code: 0 - success, otherwise - failure
1184 
1185   @ref Backend
1186 **/
1187 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1188   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1189 
1190   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1191   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1192   return CEED_ERROR_SUCCESS;
1193 }
1194 
1195 /**
1196   @brief Destroy CeedQFunctionAssemblyData
1197 
1198   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1199 
1200   @return An error code: 0 - success, otherwise - failure
1201 
1202   @ref Backend
1203 **/
1204 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1205   if (!*data || --(*data)->ref_count > 0) {
1206     *data = NULL;
1207     return CEED_ERROR_SUCCESS;
1208   }
1209   CeedCall(CeedDestroy(&(*data)->ceed));
1210   CeedCall(CeedVectorDestroy(&(*data)->vec));
1211   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1212 
1213   CeedCall(CeedFree(data));
1214   return CEED_ERROR_SUCCESS;
1215 }
1216 
1217 /**
1218   @brief Get CeedOperatorAssemblyData
1219 
1220   @param[in]  op   CeedOperator to assemble
1221   @param[out] data CeedQFunctionAssemblyData
1222 
1223   @return An error code: 0 - success, otherwise - failure
1224 
1225   @ref Backend
1226 **/
1227 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1228   if (!op->op_assembled) {
1229     CeedOperatorAssemblyData data;
1230 
1231     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1232     op->op_assembled = data;
1233   }
1234   *data = op->op_assembled;
1235   return CEED_ERROR_SUCCESS;
1236 }
1237 
1238 /**
1239   @brief Create object holding CeedOperator assembly data.
1240 
1241   The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1242   An array with references to the corresponding active CeedElemRestrictions is also stored.
1243   For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1244   The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1245 CeedEvalMode.
1246   The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1247   Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1248 
1249   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1250   @param[in]  op   CeedOperator to be assembled
1251   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1252 
1253   @return An error code: 0 - success, otherwise - failure
1254 
1255   @ref Backend
1256 **/
1257 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1258   CeedInt             num_active_bases_in = 0, num_active_bases_out = 0, offset = 0;
1259   CeedInt             num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL;
1260   CeedSize          **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1261   CeedEvalMode      **eval_modes_in = NULL, **eval_modes_out = NULL;
1262   CeedQFunctionField *qf_fields;
1263   CeedQFunction       qf;
1264   CeedOperatorField  *op_fields;
1265   bool                is_composite;
1266 
1267   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1268   CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators.");
1269 
1270   // Allocate
1271   CeedCall(CeedCalloc(1, data));
1272   (*data)->ceed = ceed;
1273   CeedCall(CeedReference(ceed));
1274 
1275   // Build OperatorAssembly data
1276   CeedCall(CeedOperatorGetQFunction(op, &qf));
1277   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1278   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1279 
1280   // Determine active input basis
1281   for (CeedInt i = 0; i < num_input_fields; i++) {
1282     CeedVector vec;
1283 
1284     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1285     if (vec == CEED_VECTOR_ACTIVE) {
1286       CeedInt      index = -1, num_comp, q_comp;
1287       CeedEvalMode eval_mode;
1288       CeedBasis    basis_in = NULL;
1289 
1290       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1291       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1292       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1293       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1294       for (CeedInt i = 0; i < num_active_bases_in; i++) {
1295         if ((*data)->active_bases_in[i] == basis_in) index = i;
1296       }
1297       if (index == -1) {
1298         CeedElemRestriction elem_rstr_in;
1299 
1300         index = num_active_bases_in;
1301         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in));
1302         (*data)->active_bases_in[num_active_bases_in] = NULL;
1303         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in]));
1304         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in));
1305         (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL;
1306         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1307         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in]));
1308         CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in));
1309         num_eval_modes_in[index] = 0;
1310         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in));
1311         eval_modes_in[index] = NULL;
1312         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in));
1313         eval_mode_offsets_in[index] = NULL;
1314         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in));
1315         (*data)->assembled_bases_in[index] = NULL;
1316         num_active_bases_in++;
1317       }
1318       if (eval_mode != CEED_EVAL_WEIGHT) {
1319         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1320         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1321         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1322         for (CeedInt d = 0; d < q_comp; d++) {
1323           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1324           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1325           offset += num_comp;
1326         }
1327         num_eval_modes_in[index] += q_comp;
1328       }
1329     }
1330   }
1331 
1332   // Determine active output basis
1333   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1334   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1335   offset = 0;
1336   for (CeedInt i = 0; i < num_output_fields; i++) {
1337     CeedVector vec;
1338 
1339     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1340     if (vec == CEED_VECTOR_ACTIVE) {
1341       CeedInt      index = -1, num_comp, q_comp;
1342       CeedEvalMode eval_mode;
1343       CeedBasis    basis_out = NULL;
1344 
1345       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1346       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1347       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1348       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1349       for (CeedInt i = 0; i < num_active_bases_out; i++) {
1350         if ((*data)->active_bases_out[i] == basis_out) index = i;
1351       }
1352       if (index == -1) {
1353         CeedElemRestriction elem_rstr_out;
1354 
1355         index = num_active_bases_out;
1356         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out));
1357         (*data)->active_bases_out[num_active_bases_out] = NULL;
1358         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out]));
1359         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out));
1360         (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL;
1361         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1362         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out]));
1363         CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out));
1364         num_eval_modes_out[index] = 0;
1365         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out));
1366         eval_modes_out[index] = NULL;
1367         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out));
1368         eval_mode_offsets_out[index] = NULL;
1369         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out));
1370         (*data)->assembled_bases_out[index] = NULL;
1371         num_active_bases_out++;
1372       }
1373       if (eval_mode != CEED_EVAL_WEIGHT) {
1374         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1375         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1376         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1377         for (CeedInt d = 0; d < q_comp; d++) {
1378           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1379           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1380           offset += num_comp;
1381         }
1382         num_eval_modes_out[index] += q_comp;
1383       }
1384     }
1385   }
1386   (*data)->num_active_bases_in   = num_active_bases_in;
1387   (*data)->num_eval_modes_in     = num_eval_modes_in;
1388   (*data)->eval_modes_in         = eval_modes_in;
1389   (*data)->eval_mode_offsets_in  = eval_mode_offsets_in;
1390   (*data)->num_active_bases_out  = num_active_bases_out;
1391   (*data)->num_eval_modes_out    = num_eval_modes_out;
1392   (*data)->eval_modes_out        = eval_modes_out;
1393   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1394   (*data)->num_output_components = offset;
1395   return CEED_ERROR_SUCCESS;
1396 }
1397 
1398 /**
1399   @brief Get CeedOperator CeedEvalModes for assembly.
1400 
1401   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1402 
1403   @param[in]  data                  CeedOperatorAssemblyData
1404   @param[out] num_active_bases_in   Total number of active bases for input
1405   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1406                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1407   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1408   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1409   @param[out] num_active_bases_out  Total number of active bases for output
1410   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1411   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1412   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1413   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1414                                       including contributions of all active bases
1415 
1416   @return An error code: 0 - success, otherwise - failure
1417 
1418   @ref Backend
1419 **/
1420 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in,
1421                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out,
1422                                          CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out,
1423                                          CeedSize *num_output_components) {
1424   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1425   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1426   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1427   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1428   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1429   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1430   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1431   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1432   if (num_output_components) *num_output_components = data->num_output_components;
1433   return CEED_ERROR_SUCCESS;
1434 }
1435 
1436 /**
1437   @brief Get CeedOperator CeedBasis data for assembly.
1438 
1439   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1440 
1441   @param[in]  data                 CeedOperatorAssemblyData
1442   @param[out] num_active_bases_in  Number of active input bases, or NULL
1443   @param[out] active_bases_in      Pointer to hold active input CeedBasis, or NULL
1444   @param[out] assembled_bases_in   Pointer to hold assembled active input B, or NULL
1445   @param[out] num_active_bases_out Number of active output bases, or NULL
1446   @param[out] active_bases_out     Pointer to hold active output CeedBasis, or NULL
1447   @param[out] assembled_bases_out  Pointer to hold assembled active output B, or NULL
1448 
1449   @return An error code: 0 - success, otherwise - failure
1450 
1451   @ref Backend
1452 **/
1453 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in,
1454                                      const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out,
1455                                      const CeedScalar ***assembled_bases_out) {
1456   // Assemble B_in, B_out if needed
1457   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1458     CeedInt num_qpts;
1459 
1460     if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts));
1461     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts));
1462     for (CeedInt b = 0; b < data->num_active_bases_in; b++) {
1463       bool        has_eval_none = false;
1464       CeedInt     num_nodes;
1465       CeedScalar *B_in = NULL, *identity = NULL;
1466 
1467       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes));
1468       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1469 
1470       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1471         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1472       }
1473       if (has_eval_none) {
1474         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1475         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1476           identity[i * num_nodes + i] = 1.0;
1477         }
1478       }
1479 
1480       for (CeedInt q = 0; q < num_qpts; q++) {
1481         for (CeedInt n = 0; n < num_nodes; n++) {
1482           CeedInt      d_in              = 0, q_comp_in;
1483           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1484 
1485           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1486             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1487             const CeedScalar *B  = NULL;
1488 
1489             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B));
1490             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in));
1491             if (q_comp_in > 1) {
1492               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1493               else B = &B[(++d_in) * num_qpts * num_nodes];
1494             }
1495             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1496             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1497           }
1498         }
1499       }
1500       if (identity) CeedCall(CeedFree(&identity));
1501       data->assembled_bases_in[b] = B_in;
1502     }
1503   }
1504 
1505   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1506     CeedInt num_qpts;
1507 
1508     if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts));
1509     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts));
1510     for (CeedInt b = 0; b < data->num_active_bases_out; b++) {
1511       bool        has_eval_none = false;
1512       CeedInt     num_nodes;
1513       CeedScalar *B_out = NULL, *identity = NULL;
1514 
1515       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes));
1516       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1517 
1518       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1519         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1520       }
1521       if (has_eval_none) {
1522         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1523         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1524           identity[i * num_nodes + i] = 1.0;
1525         }
1526       }
1527 
1528       for (CeedInt q = 0; q < num_qpts; q++) {
1529         for (CeedInt n = 0; n < num_nodes; n++) {
1530           CeedInt      d_out              = 0, q_comp_out;
1531           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1532 
1533           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1534             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1535             const CeedScalar *B  = NULL;
1536 
1537             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B));
1538             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out));
1539             if (q_comp_out > 1) {
1540               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1541               else B = &B[(++d_out) * num_qpts * num_nodes];
1542             }
1543             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1544             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1545           }
1546         }
1547       }
1548       if (identity) CeedCall(CeedFree(&identity));
1549       data->assembled_bases_out[b] = B_out;
1550     }
1551   }
1552 
1553   // Pass out assembled data
1554   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1555   if (active_bases_in) *active_bases_in = data->active_bases_in;
1556   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1557   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1558   if (active_bases_out) *active_bases_out = data->active_bases_out;
1559   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1560   return CEED_ERROR_SUCCESS;
1561 }
1562 
1563 /**
1564   @brief Get CeedOperator CeedBasis data for assembly.
1565 
1566   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1567 
1568   @param[in]  data                      CeedOperatorAssemblyData
1569   @param[out] num_active_elem_rstrs_in  Number of active input element restrictions, or NULL
1570   @param[out] active_elem_rstrs_in      Pointer to hold active input CeedElemRestrictions, or NULL
1571   @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or NULL
1572   @param[out] active_elem_rstrs_out     Pointer to hold active output CeedElemRestrictions, or NULL
1573 
1574   @return An error code: 0 - success, otherwise - failure
1575 
1576   @ref Backend
1577 **/
1578 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in,
1579                                                 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out,
1580                                                 CeedElemRestriction **active_elem_rstrs_out) {
1581   if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in;
1582   if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in;
1583   if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out;
1584   if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out;
1585   return CEED_ERROR_SUCCESS;
1586 }
1587 
1588 /**
1589   @brief Destroy CeedOperatorAssemblyData
1590 
1591   @param[in,out] data CeedOperatorAssemblyData to destroy
1592 
1593   @return An error code: 0 - success, otherwise - failure
1594 
1595   @ref Backend
1596 **/
1597 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1598   if (!*data) {
1599     *data = NULL;
1600     return CEED_ERROR_SUCCESS;
1601   }
1602   CeedCall(CeedDestroy(&(*data)->ceed));
1603   for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) {
1604     CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b]));
1605     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b]));
1606     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1607     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1608     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1609   }
1610   for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) {
1611     CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b]));
1612     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b]));
1613     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1614     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1615     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1616   }
1617   CeedCall(CeedFree(&(*data)->active_bases_in));
1618   CeedCall(CeedFree(&(*data)->active_bases_out));
1619   CeedCall(CeedFree(&(*data)->active_elem_rstrs_in));
1620   CeedCall(CeedFree(&(*data)->active_elem_rstrs_out));
1621   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1622   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1623   CeedCall(CeedFree(&(*data)->eval_modes_in));
1624   CeedCall(CeedFree(&(*data)->eval_modes_out));
1625   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1626   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1627   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1628   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1629 
1630   CeedCall(CeedFree(data));
1631   return CEED_ERROR_SUCCESS;
1632 }
1633 
1634 /**
1635   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
1636 
1637   @param[in]  op          CeedOperator to retrieve fallback for
1638   @param[out] op_fallback Fallback CeedOperator
1639 
1640   @return An error code: 0 - success, otherwise - failure
1641 
1642   @ref Backend
1643 **/
1644 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
1645   // Create if needed
1646   if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op));
1647   if (op->op_fallback) {
1648     bool is_debug;
1649 
1650     CeedCall(CeedIsDebug(op->ceed, &is_debug));
1651     if (is_debug) {
1652       Ceed        ceed, ceed_fallback;
1653       const char *resource, *resource_fallback;
1654 
1655       CeedCall(CeedOperatorGetCeed(op, &ceed));
1656       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
1657       CeedCall(CeedGetResource(ceed, &resource));
1658       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
1659 
1660       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
1661       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
1662                 op->op_fallback);
1663     }
1664   }
1665   *op_fallback = op->op_fallback;
1666   return CEED_ERROR_SUCCESS;
1667 }
1668 
1669 /**
1670   @brief Get the parent CeedOperator for a fallback CeedOperator
1671 
1672   @param[in]  op     CeedOperator context
1673   @param[out] parent Variable to store parent CeedOperator context
1674 
1675   @return An error code: 0 - success, otherwise - failure
1676 
1677   @ref Backend
1678 **/
1679 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) {
1680   *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL;
1681   return CEED_ERROR_SUCCESS;
1682 }
1683 
1684 /**
1685   @brief Get the Ceed context of the parent CeedOperator for a fallback CeedOperator
1686 
1687   @param[in]  op     CeedOperator context
1688   @param[out] parent Variable to store parent Ceed context
1689 
1690   @return An error code: 0 - success, otherwise - failure
1691 
1692   @ref Backend
1693 **/
1694 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) {
1695   *parent = op->op_fallback_parent ? op->op_fallback_parent->ceed : op->ceed;
1696   return CEED_ERROR_SUCCESS;
1697 }
1698 
1699 /// @}
1700 
1701 /// ----------------------------------------------------------------------------
1702 /// CeedOperator Public API
1703 /// ----------------------------------------------------------------------------
1704 /// @addtogroup CeedOperatorUser
1705 /// @{
1706 
1707 /**
1708   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1709 
1710   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1711   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1712 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1713 
1714   Inputs and outputs are in the order provided by the user when adding CeedOperator fields.
1715   For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction
1716 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1717 
1718   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1719 
1720   @param[in]  op        CeedOperator to assemble CeedQFunction
1721   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1722   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1723   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1724 
1725   @return An error code: 0 - success, otherwise - failure
1726 
1727   @ref User
1728 **/
1729 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1730   CeedCall(CeedOperatorCheckReady(op));
1731 
1732   if (op->LinearAssembleQFunction) {
1733     // Backend version
1734     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1735   } else {
1736     // Operator fallback
1737     CeedOperator op_fallback;
1738 
1739     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1740     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1741     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1742   }
1743   return CEED_ERROR_SUCCESS;
1744 }
1745 
1746 /**
1747   @brief Assemble CeedQFunction and store result internally.
1748 
1749   Return copied references of stored data to the caller.
1750   Caller is responsible for ownership and destruction of the copied references.
1751   See also @ref CeedOperatorLinearAssembleQFunction
1752 
1753   Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers.
1754         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
1755 
1756   @param[in]  op        CeedOperator to assemble CeedQFunction
1757   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1758   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1759   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1760 
1761   @return An error code: 0 - success, otherwise - failure
1762 
1763   @ref User
1764 **/
1765 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1766   int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL;
1767   CeedOperator op_assemble                                                                           = NULL;
1768   CeedOperator op_fallback_parent                                                                    = NULL;
1769 
1770   CeedCall(CeedOperatorCheckReady(op));
1771 
1772   // Determine if fallback parent or operator has implementation
1773   CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent));
1774   if (op_fallback_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) {
1775     // -- Backend version for op fallback parent is faster, if it exists
1776     LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate;
1777     op_assemble                   = op_fallback_parent;
1778   } else if (op->LinearAssembleQFunctionUpdate) {
1779     // -- Backend version for op
1780     LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate;
1781     op_assemble                   = op;
1782   }
1783 
1784   // Assemble QFunction
1785   if (LinearAssembleQFunctionUpdate) {
1786     // Backend or fallback parent version
1787     bool                qf_assembled_is_setup;
1788     CeedVector          assembled_vec  = NULL;
1789     CeedElemRestriction assembled_rstr = NULL;
1790 
1791     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1792     if (qf_assembled_is_setup) {
1793       bool update_needed;
1794 
1795       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1796       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1797       if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request));
1798     } else {
1799       CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request));
1800       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1801     }
1802     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1803 
1804     // Copy reference from internally held copy
1805     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1806     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1807     CeedCall(CeedVectorDestroy(&assembled_vec));
1808     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1809   } else {
1810     // Operator fallback
1811     CeedOperator op_fallback;
1812 
1813     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1814     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1815     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1816   }
1817   return CEED_ERROR_SUCCESS;
1818 }
1819 
1820 /**
1821   @brief Assemble the diagonal of a square linear CeedOperator
1822 
1823   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1824 
1825   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1826 
1827   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1828 
1829   @param[in]  op        CeedOperator to assemble CeedQFunction
1830   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1831   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1832 
1833   @return An error code: 0 - success, otherwise - failure
1834 
1835   @ref User
1836 **/
1837 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1838   bool     is_composite;
1839   CeedSize input_size = 0, output_size = 0;
1840 
1841   CeedCall(CeedOperatorCheckReady(op));
1842   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1843 
1844   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1845   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1846 
1847   // Early exit for empty operator
1848   if (!is_composite) {
1849     CeedInt num_elem = 0;
1850 
1851     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1852     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1853   }
1854 
1855   if (op->LinearAssembleDiagonal) {
1856     // Backend version
1857     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1858     return CEED_ERROR_SUCCESS;
1859   } else if (op->LinearAssembleAddDiagonal) {
1860     // Backend version with zeroing first
1861     CeedCall(CeedVectorSetValue(assembled, 0.0));
1862     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1863     return CEED_ERROR_SUCCESS;
1864   } else {
1865     // Operator fallback
1866     CeedOperator op_fallback;
1867 
1868     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1869     if (op_fallback) {
1870       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1871       return CEED_ERROR_SUCCESS;
1872     }
1873   }
1874   // Default interface implementation
1875   CeedCall(CeedVectorSetValue(assembled, 0.0));
1876   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1877   return CEED_ERROR_SUCCESS;
1878 }
1879 
1880 /**
1881   @brief Assemble the diagonal of a square linear CeedOperator
1882 
1883   This sums into a CeedVector the diagonal of a linear CeedOperator.
1884 
1885   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1886 
1887   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1888 
1889   @param[in]  op        CeedOperator to assemble CeedQFunction
1890   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1891   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1892 
1893   @return An error code: 0 - success, otherwise - failure
1894 
1895   @ref User
1896 **/
1897 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1898   bool     is_composite;
1899   CeedSize input_size = 0, output_size = 0;
1900 
1901   CeedCall(CeedOperatorCheckReady(op));
1902   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1903 
1904   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1905   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1906 
1907   // Early exit for empty operator
1908   if (!is_composite) {
1909     CeedInt num_elem = 0;
1910 
1911     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1912     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1913   }
1914 
1915   if (op->LinearAssembleAddDiagonal) {
1916     // Backend version
1917     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1918     return CEED_ERROR_SUCCESS;
1919   } else {
1920     // Operator fallback
1921     CeedOperator op_fallback;
1922 
1923     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1924     if (op_fallback) {
1925       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1926       return CEED_ERROR_SUCCESS;
1927     }
1928   }
1929   // Default interface implementation
1930   if (is_composite) {
1931     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1932   } else {
1933     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1934   }
1935   return CEED_ERROR_SUCCESS;
1936 }
1937 
1938 /**
1939    @brief Fully assemble the point-block diagonal pattern of a linear operator.
1940 
1941    Expected to be used in conjunction with CeedOperatorLinearAssemblePointBlockDiagonal().
1942 
1943    The assembly routines use coordinate format, with `num_entries` tuples of the form (i, j, value) which indicate that value should be added to the
1944 matrix in entry (i, j).
1945   Note that the (i, j) pairs are unique.
1946   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in
1947 the same ordering.
1948 
1949    This will generally be slow unless your operator is low-order.
1950 
1951    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1952 
1953    @param[in]  op          CeedOperator to assemble
1954    @param[out] num_entries Number of entries in coordinate nonzero pattern
1955    @param[out] rows        Row number for each entry
1956    @param[out] cols        Column number for each entry
1957 
1958    @ref User
1959 **/
1960 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1961   Ceed          ceed;
1962   bool          is_composite;
1963   CeedInt       num_active_components, num_sub_operators;
1964   CeedOperator *sub_operators;
1965 
1966   CeedCall(CeedOperatorGetCeed(op, &ceed));
1967   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1968 
1969   CeedSize input_size = 0, output_size = 0;
1970   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1971   CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1972 
1973   if (is_composite) {
1974     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_operators));
1975     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1976   } else {
1977     sub_operators     = &op;
1978     num_sub_operators = 1;
1979   }
1980 
1981   // Verify operator can be assembled correctly
1982   {
1983     CeedOperatorAssemblyData data;
1984     CeedInt                  num_active_elem_rstrs, comp_stride;
1985     CeedElemRestriction     *active_elem_rstrs;
1986 
1987     // Get initial values to check against
1988     CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data));
1989     CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
1990     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride));
1991     CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components));
1992 
1993     // Verify that all active element restrictions have same component stride and number of components
1994     for (CeedInt k = 0; k < num_sub_operators; k++) {
1995       CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data));
1996       CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
1997       for (CeedInt i = 0; i < num_active_elem_rstrs; i++) {
1998         CeedInt comp_stride_sub, num_active_components_sub;
1999 
2000         CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub));
2001         CeedCheck(comp_stride == comp_stride_sub, ceed, CEED_ERROR_DIMENSION,
2002                   "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub);
2003         CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub));
2004         CeedCheck(num_active_components == num_active_components_sub, ceed, CEED_ERROR_INCOMPATIBLE,
2005                   "All suboperators must have the same number of output components");
2006       }
2007     }
2008   }
2009   *num_entries = input_size * num_active_components;
2010   CeedCall(CeedCalloc(*num_entries, rows));
2011   CeedCall(CeedCalloc(*num_entries, cols));
2012 
2013   for (CeedInt o = 0; o < num_sub_operators; o++) {
2014     CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr;
2015     CeedInt             comp_stride, num_elem, elem_size;
2016     const CeedInt      *offsets, *point_block_offsets;
2017 
2018     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr));
2019     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride));
2020     CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem));
2021     CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size));
2022     CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets));
2023 
2024     CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr));
2025     CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets));
2026 
2027     for (CeedSize i = 0; i < num_elem * elem_size; i++) {
2028       for (CeedInt c_out = 0; c_out < num_active_components; c_out++) {
2029         for (CeedInt c_in = 0; c_in < num_active_components; c_in++) {
2030           (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride;
2031           (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride;
2032         }
2033       }
2034     }
2035 
2036     CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets));
2037     CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets));
2038     CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr));
2039   }
2040   return CEED_ERROR_SUCCESS;
2041 }
2042 
2043 /**
2044   @brief Assemble the point block diagonal of a square linear CeedOperator
2045 
2046   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
2047 
2048   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
2049 
2050   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2051 
2052   @param[in]  op        CeedOperator to assemble CeedQFunction
2053   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
2054 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
2055 component in].
2056   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2057 
2058   @return An error code: 0 - success, otherwise - failure
2059 
2060   @ref User
2061 **/
2062 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2063   bool     is_composite;
2064   CeedSize input_size = 0, output_size = 0;
2065 
2066   CeedCall(CeedOperatorCheckReady(op));
2067   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2068 
2069   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2070   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
2071 
2072   // Early exit for empty operator
2073   if (!is_composite) {
2074     CeedInt num_elem = 0;
2075 
2076     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2077     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2078   }
2079 
2080   if (op->LinearAssemblePointBlockDiagonal) {
2081     // Backend version
2082     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
2083     return CEED_ERROR_SUCCESS;
2084   } else if (op->LinearAssembleAddPointBlockDiagonal) {
2085     // Backend version with zeroing first
2086     CeedCall(CeedVectorSetValue(assembled, 0.0));
2087     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2088     return CEED_ERROR_SUCCESS;
2089   } else {
2090     // Operator fallback
2091     CeedOperator op_fallback;
2092 
2093     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2094     if (op_fallback) {
2095       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
2096       return CEED_ERROR_SUCCESS;
2097     }
2098   }
2099   // Default interface implementation
2100   CeedCall(CeedVectorSetValue(assembled, 0.0));
2101   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2102   return CEED_ERROR_SUCCESS;
2103 }
2104 
2105 /**
2106   @brief Assemble the point block diagonal of a square linear CeedOperator
2107 
2108   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
2109 
2110   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
2111 
2112   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2113 
2114   @param[in]  op        CeedOperator to assemble CeedQFunction
2115   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
2116 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
2117 component in].
2118   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2119 
2120   @return An error code: 0 - success, otherwise - failure
2121 
2122   @ref User
2123 **/
2124 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2125   bool     is_composite;
2126   CeedSize input_size = 0, output_size = 0;
2127 
2128   CeedCall(CeedOperatorCheckReady(op));
2129   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2130 
2131   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2132   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
2133 
2134   // Early exit for empty operator
2135   if (!is_composite) {
2136     CeedInt num_elem = 0;
2137 
2138     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2139     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2140   }
2141 
2142   if (op->LinearAssembleAddPointBlockDiagonal) {
2143     // Backend version
2144     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
2145     return CEED_ERROR_SUCCESS;
2146   } else {
2147     // Operator fallback
2148     CeedOperator op_fallback;
2149 
2150     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2151     if (op_fallback) {
2152       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
2153       return CEED_ERROR_SUCCESS;
2154     }
2155   }
2156   // Default interface implementation
2157   if (is_composite) {
2158     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
2159   } else {
2160     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
2161   }
2162   return CEED_ERROR_SUCCESS;
2163 }
2164 
2165 /**
2166    @brief Fully assemble the nonzero pattern of a linear operator.
2167 
2168    Expected to be used in conjunction with CeedOperatorLinearAssemble().
2169 
2170    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
2171 matrix in entry (i, j).
2172   Note that the (i, j) pairs are not unique and may repeat.
2173   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
2174 
2175    This will generally be slow unless your operator is low-order.
2176 
2177    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2178 
2179    @param[in]  op          CeedOperator to assemble
2180    @param[out] num_entries Number of entries in coordinate nonzero pattern
2181    @param[out] rows        Row number for each entry
2182    @param[out] cols        Column number for each entry
2183 
2184    @ref User
2185 **/
2186 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2187   bool          is_composite;
2188   CeedInt       num_suboperators, offset = 0;
2189   CeedSize      single_entries;
2190   CeedOperator *sub_operators;
2191 
2192   CeedCall(CeedOperatorCheckReady(op));
2193   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2194 
2195   if (op->LinearAssembleSymbolic) {
2196     // Backend version
2197     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
2198     return CEED_ERROR_SUCCESS;
2199   } else {
2200     // Operator fallback
2201     CeedOperator op_fallback;
2202 
2203     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2204     if (op_fallback) {
2205       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
2206       return CEED_ERROR_SUCCESS;
2207     }
2208   }
2209 
2210   // Default interface implementation
2211 
2212   // Count entries and allocate rows, cols arrays
2213   *num_entries = 0;
2214   if (is_composite) {
2215     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2216     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2217     for (CeedInt k = 0; k < num_suboperators; ++k) {
2218       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2219       *num_entries += single_entries;
2220     }
2221   } else {
2222     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
2223     *num_entries += single_entries;
2224   }
2225   CeedCall(CeedCalloc(*num_entries, rows));
2226   CeedCall(CeedCalloc(*num_entries, cols));
2227 
2228   // Assemble nonzero locations
2229   if (is_composite) {
2230     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2231     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2232     for (CeedInt k = 0; k < num_suboperators; ++k) {
2233       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
2234       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2235       offset += single_entries;
2236     }
2237   } else {
2238     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
2239   }
2240   return CEED_ERROR_SUCCESS;
2241 }
2242 
2243 /**
2244    @brief Fully assemble the nonzero entries of a linear operator.
2245 
2246    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
2247 
2248    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
2249 matrix in entry (i, j).
2250   Note that the (i, j) pairs are not unique and may repeat.
2251   This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
2252 
2253    This will generally be slow unless your operator is low-order.
2254 
2255    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2256 
2257    @param[in]  op     CeedOperator to assemble
2258    @param[out] values Values to assemble into matrix
2259 
2260    @ref User
2261 **/
2262 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
2263   bool          is_composite;
2264   CeedInt       num_suboperators, offset = 0;
2265   CeedSize      single_entries = 0;
2266   CeedOperator *sub_operators;
2267 
2268   CeedCall(CeedOperatorCheckReady(op));
2269   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2270 
2271   // Early exit for empty operator
2272   if (!is_composite) {
2273     CeedInt num_elem = 0;
2274 
2275     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2276     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2277   }
2278 
2279   if (op->LinearAssemble) {
2280     // Backend version
2281     CeedCall(op->LinearAssemble(op, values));
2282     return CEED_ERROR_SUCCESS;
2283   } else {
2284     // Operator fallback
2285     CeedOperator op_fallback;
2286 
2287     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2288     if (op_fallback) {
2289       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2290       return CEED_ERROR_SUCCESS;
2291     }
2292   }
2293 
2294   // Default interface implementation
2295   CeedCall(CeedVectorSetValue(values, 0.0));
2296   if (is_composite) {
2297     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2298     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2299     for (CeedInt k = 0; k < num_suboperators; k++) {
2300       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2301       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2302       offset += single_entries;
2303     }
2304   } else {
2305     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2306   }
2307   return CEED_ERROR_SUCCESS;
2308 }
2309 
2310 /**
2311   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2312 
2313   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2314 
2315   @param[in]  op               Composite CeedOperator
2316   @param[in]  num_skip_indices Number of suboperators to skip
2317   @param[in]  skip_indices     Array of indices of suboperators to skip
2318   @param[out] mult             Vector to store multiplicity (of size l_size)
2319 
2320   @return An error code: 0 - success, otherwise - failure
2321 
2322   @ref User
2323 **/
2324 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2325   Ceed                ceed;
2326   CeedInt             num_suboperators;
2327   CeedSize            l_vec_len;
2328   CeedScalar         *mult_array;
2329   CeedVector          ones_l_vec;
2330   CeedElemRestriction elem_rstr, mult_elem_rstr;
2331   CeedOperator       *sub_operators;
2332 
2333   CeedCall(CeedOperatorCheckReady(op));
2334 
2335   CeedCall(CeedOperatorGetCeed(op, &ceed));
2336 
2337   // Zero mult vector
2338   CeedCall(CeedVectorSetValue(mult, 0.0));
2339 
2340   // Get suboperators
2341   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2342   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2343   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2344 
2345   // Work vector
2346   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2347   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2348   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2349   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2350 
2351   // Compute multiplicity across suboperators
2352   for (CeedInt i = 0; i < num_suboperators; i++) {
2353     const CeedScalar *sub_mult_array;
2354     CeedVector        sub_mult_l_vec, ones_e_vec;
2355 
2356     // -- Check for suboperator to skip
2357     for (CeedInt j = 0; j < num_skip_indices; j++) {
2358       if (skip_indices[j] == i) continue;
2359     }
2360 
2361     // -- Sub operator multiplicity
2362     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2363     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr));
2364     CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2365     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2366     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2367     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2368     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2369     // ---- Flag every node present in the current suboperator
2370     for (CeedInt j = 0; j < l_vec_len; j++) {
2371       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2372     }
2373     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2374     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2375     CeedCall(CeedVectorDestroy(&ones_e_vec));
2376     CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr));
2377   }
2378   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2379   CeedCall(CeedVectorDestroy(&ones_l_vec));
2380   return CEED_ERROR_SUCCESS;
2381 }
2382 
2383 /**
2384   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2385 grid interpolation
2386 
2387   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2388 
2389   @param[in]  op_fine      Fine grid operator
2390   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2391   @param[in]  rstr_coarse  Coarse grid restriction
2392   @param[in]  basis_coarse Coarse grid active vector basis
2393   @param[out] op_coarse    Coarse grid operator
2394   @param[out] op_prolong   Coarse to fine operator, or NULL
2395   @param[out] op_restrict  Fine to coarse operator, or NULL
2396 
2397   @return An error code: 0 - success, otherwise - failure
2398 
2399   @ref User
2400 **/
2401 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2402                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2403   CeedBasis basis_c_to_f = NULL;
2404 
2405   CeedCall(CeedOperatorCheckReady(op_fine));
2406 
2407   // Build prolongation matrix, if required
2408   if (op_prolong || op_restrict) {
2409     CeedBasis basis_fine;
2410 
2411     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2412     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2413   }
2414 
2415   // Core code
2416   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2417   return CEED_ERROR_SUCCESS;
2418 }
2419 
2420 /**
2421   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2422 
2423   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2424 
2425   @param[in]  op_fine       Fine grid operator
2426   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2427   @param[in]  rstr_coarse   Coarse grid restriction
2428   @param[in]  basis_coarse  Coarse grid active vector basis
2429   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2430   @param[out] op_coarse     Coarse grid operator
2431   @param[out] op_prolong    Coarse to fine operator, or NULL
2432   @param[out] op_restrict   Fine to coarse operator, or NULL
2433 
2434   @return An error code: 0 - success, otherwise - failure
2435 
2436   @ref User
2437 **/
2438 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2439                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2440                                              CeedOperator *op_restrict) {
2441   Ceed      ceed;
2442   CeedInt   Q_f, Q_c;
2443   CeedBasis basis_fine, basis_c_to_f = NULL;
2444 
2445   CeedCall(CeedOperatorCheckReady(op_fine));
2446   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2447 
2448   // Check for compatible quadrature spaces
2449   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2450   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2451   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2452   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2453 
2454   // Create coarse to fine basis, if required
2455   if (op_prolong || op_restrict) {
2456     CeedInt     dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2457     CeedScalar *q_ref, *q_weight, *grad;
2458 
2459     // Check if interpolation matrix is provided
2460     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2461               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2462     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2463     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2464     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2465     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2466     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2467     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2468     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2469     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2470     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2471     CeedCall(CeedFree(&q_ref));
2472     CeedCall(CeedFree(&q_weight));
2473     CeedCall(CeedFree(&grad));
2474   }
2475 
2476   // Core code
2477   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2478   return CEED_ERROR_SUCCESS;
2479 }
2480 
2481 /**
2482   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2483 
2484   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2485 
2486   @param[in]  op_fine       Fine grid operator
2487   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2488   @param[in]  rstr_coarse   Coarse grid restriction
2489   @param[in]  basis_coarse  Coarse grid active vector basis
2490   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2491   @param[out] op_coarse     Coarse grid operator
2492   @param[out] op_prolong    Coarse to fine operator, or NULL
2493   @param[out] op_restrict   Fine to coarse operator, or NULL
2494 
2495   @return An error code: 0 - success, otherwise - failure
2496 
2497   @ref User
2498 **/
2499 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2500                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2501                                        CeedOperator *op_restrict) {
2502   Ceed      ceed;
2503   CeedInt   Q_f, Q_c;
2504   CeedBasis basis_fine, basis_c_to_f = NULL;
2505 
2506   CeedCall(CeedOperatorCheckReady(op_fine));
2507   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2508 
2509   // Check for compatible quadrature spaces
2510   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2511   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2512   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2513   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2514 
2515   // Coarse to fine basis
2516   if (op_prolong || op_restrict) {
2517     CeedInt          dim, num_comp, num_nodes_c, num_nodes_f;
2518     CeedScalar      *q_ref, *q_weight, *grad;
2519     CeedElemTopology topo;
2520 
2521     // Check if interpolation matrix is provided
2522     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2523               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2524     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2525     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2526     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2527     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2528     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2529     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2530     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2531     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2532     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2533     CeedCall(CeedFree(&q_ref));
2534     CeedCall(CeedFree(&q_weight));
2535     CeedCall(CeedFree(&grad));
2536   }
2537 
2538   // Core code
2539   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2540   return CEED_ERROR_SUCCESS;
2541 }
2542 
2543 /**
2544   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2545 
2546   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2547   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2548   The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2549 \hat S V\f$.
2550   The CeedOperator must be linear and non-composite.
2551   The associated CeedQFunction must therefore also be linear.
2552 
2553   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2554 
2555   @param[in]  op      CeedOperator to create element inverses
2556   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2557   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2558 
2559   @return An error code: 0 - success, otherwise - failure
2560 
2561   @ref User
2562 **/
2563 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2564   Ceed                 ceed, ceed_parent;
2565   bool                 interp = false, grad = false, is_tensor_basis = true;
2566   CeedInt              num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2567   CeedSize             l_size = 1;
2568   CeedScalar          *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg;
2569   const CeedScalar    *interp_1d, *grad_1d, *q_weight_1d;
2570   CeedVector           q_data;
2571   CeedElemRestriction  rstr  = NULL, rstr_qd_i;
2572   CeedBasis            basis = NULL, fdm_basis;
2573   CeedQFunctionContext ctx_fdm;
2574   CeedQFunctionField  *qf_fields;
2575   CeedQFunction        qf, qf_fdm;
2576   CeedOperatorField   *op_fields;
2577 
2578   CeedCall(CeedOperatorCheckReady(op));
2579 
2580   if (op->CreateFDMElementInverse) {
2581     // Backend version
2582     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2583     return CEED_ERROR_SUCCESS;
2584   } else {
2585     // Operator fallback
2586     CeedOperator op_fallback;
2587 
2588     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2589     if (op_fallback) {
2590       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2591       return CEED_ERROR_SUCCESS;
2592     }
2593   }
2594 
2595   // Default interface implementation
2596   CeedCall(CeedOperatorGetCeed(op, &ceed));
2597   CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent));
2598   CeedCall(CeedOperatorGetQFunction(op, &qf));
2599 
2600   // Determine active input basis
2601   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2602   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2603   for (CeedInt i = 0; i < num_input_fields; i++) {
2604     CeedVector vec;
2605 
2606     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2607     if (vec == CEED_VECTOR_ACTIVE) {
2608       CeedEvalMode eval_mode;
2609 
2610       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2611       interp = interp || eval_mode == CEED_EVAL_INTERP;
2612       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2613       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2614       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2615     }
2616   }
2617   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2618   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2619   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2620   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2621   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2622   CeedCall(CeedBasisGetDimension(basis, &dim));
2623   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2624   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2625   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2626 
2627   // Build and diagonalize 1D Mass and Laplacian
2628   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2629   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2630   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2631   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2632   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2633   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2634   CeedCall(CeedCalloc(P_1d, &lambda));
2635   // -- Build matrices
2636   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2637   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2638   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2639   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2640 
2641   // -- Diagonalize
2642   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2643   CeedCall(CeedFree(&mass));
2644   CeedCall(CeedFree(&laplace));
2645   for (CeedInt i = 0; i < P_1d; i++) {
2646     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2647   }
2648   CeedCall(CeedFree(&x));
2649 
2650   {
2651     CeedInt             layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2652     CeedScalar          max_norm = 0;
2653     const CeedScalar   *assembled_array, *q_weight_array;
2654     CeedVector          assembled = NULL, q_weight;
2655     CeedElemRestriction rstr_qf   = NULL;
2656 
2657     // Assemble QFunction
2658     CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2659     CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2660     CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2661     CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2662 
2663     // Calculate element averages
2664     CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2665     CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2666     CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2667     CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2668     CeedCall(CeedCalloc(num_elem, &elem_avg));
2669     const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2670 
2671     for (CeedInt e = 0; e < num_elem; e++) {
2672       CeedInt count = 0;
2673 
2674       for (CeedInt q = 0; q < num_qpts; q++) {
2675         for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2676           if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2677             elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2678             count++;
2679           }
2680         }
2681       }
2682       if (count) {
2683         elem_avg[e] /= count;
2684       } else {
2685         elem_avg[e] = 1.0;
2686       }
2687     }
2688     CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2689     CeedCall(CeedVectorDestroy(&assembled));
2690     CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2691     CeedCall(CeedVectorDestroy(&q_weight));
2692   }
2693 
2694   // Build FDM diagonal
2695   {
2696     CeedScalar *q_data_array, *fdm_diagonal;
2697 
2698     CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2699     const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2700     for (CeedInt c = 0; c < num_comp; c++) {
2701       for (CeedInt n = 0; n < num_nodes; n++) {
2702         if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2703         if (grad) {
2704           for (CeedInt d = 0; d < dim; d++) {
2705             CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2706             fdm_diagonal[c * num_nodes + n] += lambda[i];
2707           }
2708         }
2709         if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2710       }
2711     }
2712     CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2713     CeedCall(CeedVectorSetValue(q_data, 0.0));
2714     CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2715     for (CeedInt e = 0; e < num_elem; e++) {
2716       for (CeedInt c = 0; c < num_comp; c++) {
2717         for (CeedInt n = 0; n < num_nodes; n++)
2718           q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2719       }
2720     }
2721     CeedCall(CeedFree(&elem_avg));
2722     CeedCall(CeedFree(&fdm_diagonal));
2723     CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2724   }
2725 
2726   // Setup FDM operator
2727   // -- Basis
2728   {
2729     CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2730 
2731     CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2732     CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2733     CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2734     CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2735     CeedCall(CeedFree(&fdm_interp));
2736     CeedCall(CeedFree(&grad_dummy));
2737     CeedCall(CeedFree(&q_ref_dummy));
2738     CeedCall(CeedFree(&q_weight_dummy));
2739     CeedCall(CeedFree(&lambda));
2740   }
2741 
2742   // -- Restriction
2743   {
2744     CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp};
2745     CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2746   }
2747 
2748   // -- QFunction
2749   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2750   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2751   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2752   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2753   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2754 
2755   // -- QFunction context
2756   {
2757     CeedInt *num_comp_data;
2758 
2759     CeedCall(CeedCalloc(1, &num_comp_data));
2760     num_comp_data[0] = num_comp;
2761     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2762     CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2763   }
2764   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2765   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2766 
2767   // -- Operator
2768   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2769   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2770   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data));
2771   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2772 
2773   // Cleanup
2774   CeedCall(CeedVectorDestroy(&q_data));
2775   CeedCall(CeedBasisDestroy(&fdm_basis));
2776   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2777   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2778   return CEED_ERROR_SUCCESS;
2779 }
2780 
2781 /// @}
2782