xref: /libCEED/interface/ceed-preconditioning.c (revision 5cd6c1fb67d52eb6a42b887bb79c183682dd86ca)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   char *source_path_with_name = NULL;
39 
40   // Check if NULL qf passed in
41   if (!qf) return CEED_ERROR_SUCCESS;
42 
43   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
44   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
45 
46   if (qf->source_path) {
47     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
48     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
49     memcpy(source_path_with_name, qf->source_path, path_len);
50     memcpy(&source_path_with_name[path_len], ":", 1);
51     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
52   } else {
53     CeedCall(CeedCalloc(1, &source_path_with_name));
54   }
55 
56   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
57   {
58     CeedQFunctionContext ctx;
59 
60     CeedCall(CeedQFunctionGetContext(qf, &ctx));
61     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
62   }
63   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
64     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
65   }
66   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
67     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
68   }
69   CeedCall(CeedFree(&source_path_with_name));
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   Ceed         ceed_fallback;
84   bool         is_composite;
85   CeedOperator op_fallback;
86 
87   // Check not already created
88   if (op->op_fallback) return CEED_ERROR_SUCCESS;
89 
90   // Fallback Ceed
91   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
92   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
93 
94   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
95   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
96 
97   // Clone Op
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114 
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
117     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
118     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
119     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
120       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
121                                     op->input_fields[i]->vec));
122     }
123     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
124       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
125                                     op->output_fields[i]->vec));
126     }
127     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
128     // Cleanup
129     CeedCall(CeedQFunctionDestroy(&qf_fallback));
130     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
131     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
132   }
133   CeedCall(CeedOperatorSetName(op_fallback, op->name));
134   CeedCall(CeedOperatorCheckReady(op_fallback));
135   // Note: No ref-counting here so we don't get caught in a reference loop.
136   //       The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback.
137   op->op_fallback                 = op_fallback;
138   op_fallback->op_fallback_parent = op;
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Select correct basis matrix pointer based on CeedEvalMode
144 
145   @param[in]  basis     CeedBasis from which to get the basis matrix
146   @param[in]  eval_mode Current basis evaluation mode
147   @param[in]  identity  Pointer to identity matrix
148   @param[out] basis_ptr Basis pointer to set
149 
150   @ref Developer
151 **/
152 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
153   switch (eval_mode) {
154     case CEED_EVAL_NONE:
155       *basis_ptr = identity;
156       break;
157     case CEED_EVAL_INTERP:
158       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
159       break;
160     case CEED_EVAL_GRAD:
161       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
162       break;
163     case CEED_EVAL_DIV:
164       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
165       break;
166     case CEED_EVAL_CURL:
167       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
168       break;
169     case CEED_EVAL_WEIGHT:
170       break;  // Caught by QF Assembly
171   }
172   assert(*basis_ptr != NULL);
173   return CEED_ERROR_SUCCESS;
174 }
175 
176 /**
177   @brief Core logic for assembling operator diagonal or point block diagonal
178 
179   @param[in]  op             CeedOperator to assemble point block diagonal
180   @param[in]  request        Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
181   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
182   @param[out] assembled      CeedVector to store assembled diagonal
183 
184   @return An error code: 0 - success, otherwise - failure
185 
186   @ref Developer
187 **/
188 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_point_block, CeedVector assembled) {
189   Ceed ceed;
190   bool is_composite;
191 
192   CeedCall(CeedOperatorGetCeed(op, &ceed));
193   CeedCall(CeedOperatorIsComposite(op, &is_composite));
194   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
195 
196   // Assemble QFunction
197   CeedInt             layout_qf[3];
198   const CeedScalar   *assembled_qf_array;
199   CeedVector          assembled_qf        = NULL;
200   CeedElemRestriction assembled_elem_rstr = NULL;
201 
202   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
203   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf));
204   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
205   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
206 
207   // Get assembly data
208   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
209   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
210   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
211   CeedBasis               *active_bases_in, *active_bases_out;
212   CeedElemRestriction     *active_elem_rstrs_in, *active_elem_rstrs_out;
213   CeedOperatorAssemblyData data;
214 
215   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
216   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
217                                                 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out,
218                                                 &num_output_components));
219   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL));
220   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out));
221 
222   // Loop over all active bases (find matching input/output pairs)
223   for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) {
224     CeedInt             b_in, b_out, num_elem, num_nodes, num_qpts, num_comp;
225     bool                has_eval_none = false;
226     CeedScalar         *elem_diag_array, *identity = NULL;
227     CeedVector          elem_diag;
228     CeedElemRestriction diag_elem_rstr;
229 
230     if (num_active_bases_in <= num_active_bases_out) {
231       b_in = b;
232       for (b_out = 0; b_out < num_active_bases_out; b_out++) {
233         if (active_bases_in[b_in] == active_bases_out[b_out]) {
234           break;
235         }
236       }
237       if (b_out == num_active_bases_out) {
238         continue;
239       }  // No matching output basis found
240     } else {
241       b_out = b;
242       for (b_in = 0; b_in < num_active_bases_in; b_in++) {
243         if (active_bases_in[b_in] == active_bases_out[b_out]) {
244           break;
245         }
246       }
247       if (b_in == num_active_bases_in) {
248         continue;
249       }  // No matching output basis found
250     }
251     CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], ceed, CEED_ERROR_UNSUPPORTED,
252               "Cannot assemble operator diagonal with different input and output active element restrictions");
253 
254     // Assemble point block diagonal restriction, if needed
255     if (is_point_block) {
256       CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr));
257     } else {
258       CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr));
259     }
260 
261     // Create diagonal vector
262     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
263 
264     // Assemble element operator diagonals
265     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
266     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
267     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
268     CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes));
269     CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp));
270     if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes;
271     else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts));
272 
273     // Construct identity matrix for basis if required
274     for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) {
275       has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE);
276     }
277     for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) {
278       has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE);
279     }
280     if (has_eval_none) {
281       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
282       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
283     }
284 
285     // Compute the diagonal of B^T D B
286     // Each element
287     for (CeedSize e = 0; e < num_elem; e++) {
288       // Each basis eval mode pair
289       CeedInt      d_out              = 0, q_comp_out;
290       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
291 
292       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) {
293         CeedInt           d_in              = 0, q_comp_in;
294         const CeedScalar *B_t               = NULL;
295         CeedEvalMode      eval_mode_in_prev = CEED_EVAL_NONE;
296 
297         CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t));
298         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out));
299         if (q_comp_out > 1) {
300           if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0;
301           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
302         }
303         eval_mode_out_prev = eval_modes_out[b_out][e_out];
304 
305         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) {
306           const CeedScalar *B = NULL;
307 
308           CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B));
309           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in));
310           if (q_comp_in > 1) {
311             if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0;
312             else B = &B[(++d_in) * num_qpts * num_nodes];
313           }
314           eval_mode_in_prev = eval_modes_in[b_in][e_in];
315 
316           // Each component
317           for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
318             // Each qpt/node pair
319             for (CeedInt q = 0; q < num_qpts; q++) {
320               if (is_point_block) {
321                 // Point Block Diagonal
322                 for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
323                   const CeedSize c_offset =
324                       (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
325                   const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
326 
327                   for (CeedInt n = 0; n < num_nodes; n++) {
328                     elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
329                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
330                   }
331                 }
332               } else {
333                 // Diagonal Only
334                 const CeedInt c_offset =
335                     (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
336                 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
337 
338                 for (CeedInt n = 0; n < num_nodes; n++) {
339                   elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
340                 }
341               }
342             }
343           }
344         }
345       }
346     }
347     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
348 
349     // Assemble local operator diagonal
350     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
351 
352     // Cleanup
353     CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
354     CeedCall(CeedVectorDestroy(&elem_diag));
355     CeedCall(CeedFree(&identity));
356   }
357   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
358   CeedCall(CeedVectorDestroy(&assembled_qf));
359   return CEED_ERROR_SUCCESS;
360 }
361 
362 /**
363   @brief Core logic for assembling composite operator diagonal
364 
365   @param[in]  op             CeedOperator to assemble point block diagonal
366   @param[in]  request        Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
367   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
368   @param[out] assembled      CeedVector to store assembled diagonal
369 
370   @return An error code: 0 - success, otherwise - failure
371 
372   @ref Developer
373 **/
374 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block,
375                                                                  CeedVector assembled) {
376   CeedInt       num_sub;
377   CeedOperator *suboperators;
378 
379   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
380   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
381   for (CeedInt i = 0; i < num_sub; i++) {
382     if (is_point_block) {
383       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
384     } else {
385       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
386     }
387   }
388   return CEED_ERROR_SUCCESS;
389 }
390 
391 /**
392   @brief Build nonzero pattern for non-composite operator
393 
394   Users should generally use CeedOperatorLinearAssembleSymbolic()
395 
396   @param[in]  op     CeedOperator to assemble nonzero pattern
397   @param[in]  offset Offset for number of entries
398   @param[out] rows   Row number for each entry
399   @param[out] cols   Column number for each entry
400 
401   @return An error code: 0 - success, otherwise - failure
402 
403   @ref Developer
404 **/
405 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
406   Ceed                ceed;
407   bool                is_composite;
408   CeedSize            num_nodes_in, num_nodes_out, count = 0;
409   CeedInt             num_elem_in, elem_size_in, num_comp_in, layout_er_in[3];
410   CeedInt             num_elem_out, elem_size_out, num_comp_out, layout_er_out[3], local_num_entries;
411   CeedScalar         *array;
412   const CeedScalar   *elem_dof_a_in, *elem_dof_a_out;
413   CeedVector          index_vec_in, index_vec_out, elem_dof_in, elem_dof_out;
414   CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out;
415 
416   CeedCall(CeedOperatorGetCeed(op, &ceed));
417   CeedCall(CeedOperatorIsComposite(op, &is_composite));
418   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
419 
420   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out));
421   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
422   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
423   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
424   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
425   CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, &layout_er_in));
426 
427   // Determine elem_dof relation for input
428   CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in));
429   CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array));
430   for (CeedInt i = 0; i < num_nodes_in; i++) array[i] = i;
431   CeedCall(CeedVectorRestoreArray(index_vec_in, &array));
432   CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in));
433   CeedCall(CeedVectorSetValue(elem_dof_in, 0.0));
434   CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in));
435   CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE));
436   CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in));
437   CeedCall(CeedVectorDestroy(&index_vec_in));
438   CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in));
439 
440   if (elem_rstr_in != elem_rstr_out) {
441     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
442     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
443               "Active input and output operator restrictions must have the same number of elements");
444     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
445     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
446     CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, &layout_er_out));
447 
448     // Determine elem_dof relation for output
449     CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out));
450     CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array));
451     for (CeedInt i = 0; i < num_nodes_out; i++) array[i] = i;
452     CeedCall(CeedVectorRestoreArray(index_vec_out, &array));
453     CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out));
454     CeedCall(CeedVectorSetValue(elem_dof_out, 0.0));
455     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out));
456     CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE));
457     CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out));
458     CeedCall(CeedVectorDestroy(&index_vec_out));
459     CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out));
460   } else {
461     num_elem_out     = num_elem_in;
462     elem_size_out    = elem_size_in;
463     num_comp_out     = num_comp_in;
464     layout_er_out[0] = layout_er_in[0];
465     layout_er_out[1] = layout_er_in[1];
466     layout_er_out[2] = layout_er_in[2];
467     elem_dof_a_out   = elem_dof_a_in;
468   }
469   local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
470 
471   // Determine i, j locations for element matrices
472   for (CeedInt e = 0; e < num_elem_in; e++) {
473     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
474       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
475         for (CeedInt i = 0; i < elem_size_out; i++) {
476           for (CeedInt j = 0; j < elem_size_in; j++) {
477             const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2];
478             const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2];
479             const CeedInt row                = elem_dof_a_out[elem_dof_index_row];
480             const CeedInt col                = elem_dof_a_in[elem_dof_index_col];
481 
482             rows[offset + count] = row;
483             cols[offset + count] = col;
484             count++;
485           }
486         }
487       }
488     }
489   }
490   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
491   CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in));
492   CeedCall(CeedVectorDestroy(&elem_dof_in));
493   if (elem_rstr_in != elem_rstr_out) {
494     CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out));
495     CeedCall(CeedVectorDestroy(&elem_dof_out));
496   }
497   return CEED_ERROR_SUCCESS;
498 }
499 
500 /**
501   @brief Assemble nonzero entries for non-composite operator
502 
503   Users should generally use CeedOperatorLinearAssemble()
504 
505   @param[in]  op     CeedOperator to assemble
506   @param[in]  offset Offset for number of entries
507   @param[out] values Values to assemble into matrix
508 
509   @return An error code: 0 - success, otherwise - failure
510 
511   @ref Developer
512 **/
513 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
514   Ceed ceed;
515   bool is_composite;
516 
517   CeedCall(CeedOperatorGetCeed(op, &ceed));
518   CeedCall(CeedOperatorIsComposite(op, &is_composite));
519   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
520 
521   // Early exit for empty operator
522   {
523     CeedInt num_elem = 0;
524 
525     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
526     if (num_elem == 0) return CEED_ERROR_SUCCESS;
527   }
528 
529   if (op->LinearAssembleSingle) {
530     // Backend version
531     CeedCall(op->LinearAssembleSingle(op, offset, values));
532     return CEED_ERROR_SUCCESS;
533   } else {
534     // Operator fallback
535     CeedOperator op_fallback;
536 
537     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
538     if (op_fallback) {
539       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
540       return CEED_ERROR_SUCCESS;
541     }
542   }
543 
544   // Assemble QFunction
545   CeedInt             layout_qf[3];
546   const CeedScalar   *assembled_qf_array;
547   CeedVector          assembled_qf        = NULL;
548   CeedElemRestriction assembled_elem_rstr = NULL;
549 
550   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE));
551   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf));
552   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
553   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
554 
555   // Get assembly data
556   CeedInt                  num_elem_in, elem_size_in, num_comp_in, num_qpts_in;
557   CeedInt                  num_elem_out, elem_size_out, num_comp_out, num_qpts_out, local_num_entries;
558   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
559   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
560   CeedBasis               *active_bases_in, *active_bases_out, basis_in, basis_out;
561   const CeedScalar       **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out;
562   CeedElemRestriction      elem_rstr_in, elem_rstr_out;
563   CeedRestrictionType      elem_rstr_type_in, elem_rstr_type_out;
564   const bool              *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL;
565   const CeedInt8          *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL;
566   CeedOperatorAssemblyData data;
567 
568   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
569   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out,
570                                                 &num_eval_modes_out, &eval_modes_out, NULL, NULL));
571 
572   CeedCheck(num_active_bases_in == num_active_bases_out && num_active_bases_in == 1, ceed, CEED_ERROR_UNSUPPORTED,
573             "Cannot assemble operator with multiple active bases");
574   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs");
575 
576   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out));
577   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
578   basis_in  = active_bases_in[0];
579   basis_out = active_bases_out[0];
580   B_mat_in  = B_mats_in[0];
581   B_mat_out = B_mats_out[0];
582 
583   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
584   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
585   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
586   if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in;
587   else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in));
588 
589   CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in));
590   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
591     CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in));
592   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
593     CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in));
594   }
595 
596   if (elem_rstr_in != elem_rstr_out) {
597     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
598     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
599               "Active input and output operator restrictions must have the same number of elements");
600     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
601     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
602     if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out;
603     else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out));
604     CeedCheck(num_qpts_in == num_qpts_out, ceed, CEED_ERROR_UNSUPPORTED,
605               "Active input and output bases must have the same number of quadrature points");
606 
607     CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out));
608     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
609       CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out));
610     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
611       CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out));
612     }
613   } else {
614     num_elem_out  = num_elem_in;
615     elem_size_out = elem_size_in;
616     num_comp_out  = num_comp_in;
617     num_qpts_out  = num_qpts_in;
618 
619     elem_rstr_orients_out      = elem_rstr_orients_in;
620     elem_rstr_curl_orients_out = elem_rstr_curl_orients_in;
621   }
622   local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
623 
624   // Loop over elements and put in data structure
625   // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
626   CeedTensorContract contract;
627   CeedSize           count = 0;
628   CeedScalar        *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL;
629 
630   CeedCall(CeedBasisGetTensorContract(basis_in, &contract));
631   CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat));
632   CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat));
633   if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b));
634 
635   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
636   for (CeedSize e = 0; e < num_elem_in; e++) {
637     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
638       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
639         // Compute B^T*D
640         for (CeedSize n = 0; n < elem_size_out; n++) {
641           for (CeedSize q = 0; q < num_qpts_in; q++) {
642             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
643               const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in;
644               CeedScalar     sum       = 0.0;
645 
646               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
647                 const CeedSize b_out_index     = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n;
648                 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out;
649                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
650 
651                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
652               }
653               BTD_mat[btd_index] = sum;
654             }
655           }
656         }
657 
658         // Form element matrix itself (for each block component)
659         if (contract) {
660           CeedCall(CeedTensorContractApply(contract, 1, num_qpts_in * num_eval_modes_in[0], elem_size_in, elem_size_out, BTD_mat, CEED_NOTRANSPOSE,
661                                            false, B_mat_in, elem_mat));
662         } else {
663           CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size_out, elem_size_in, num_qpts_in * num_eval_modes_in[0]));
664         }
665 
666         // Transform the element matrix if required
667         if (elem_rstr_orients_out) {
668           const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out];
669 
670           for (CeedInt i = 0; i < elem_size_out; i++) {
671             const double orient = elem_orients[i] ? -1.0 : 1.0;
672 
673             for (CeedInt j = 0; j < elem_size_in; j++) {
674               elem_mat[i * elem_size_in + j] *= orient;
675             }
676           }
677         } else if (elem_rstr_curl_orients_out) {
678           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out];
679 
680           // T^T*(B^T*D*B)
681           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
682           for (CeedInt i = 0; i < elem_size_out; i++) {
683             for (CeedInt j = 0; j < elem_size_in; j++) {
684               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] +
685                                                (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) +
686                                                (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0);
687             }
688           }
689         }
690         if (elem_rstr_orients_in) {
691           const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in];
692 
693           for (CeedInt i = 0; i < elem_size_out; i++) {
694             for (CeedInt j = 0; j < elem_size_in; j++) {
695               elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0;
696             }
697           }
698         } else if (elem_rstr_curl_orients_in) {
699           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in];
700 
701           // (B^T*D*B)*T
702           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
703           for (CeedInt i = 0; i < elem_size_out; i++) {
704             for (CeedInt j = 0; j < elem_size_in; j++) {
705               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] +
706                                                (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) +
707                                                (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0);
708             }
709           }
710         }
711 
712         // Put element matrix in coordinate data structure
713         for (CeedInt i = 0; i < elem_size_out; i++) {
714           for (CeedInt j = 0; j < elem_size_in; j++) {
715             vals[offset + count] = elem_mat[i * elem_size_in + j];
716             count++;
717           }
718         }
719       }
720     }
721   }
722   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries");
723   CeedCall(CeedVectorRestoreArray(values, &vals));
724 
725   // Cleanup
726   CeedCall(CeedFree(&BTD_mat));
727   CeedCall(CeedFree(&elem_mat));
728   CeedCall(CeedFree(&elem_mat_b));
729   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
730     CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in));
731   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
732     CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in));
733   }
734   if (elem_rstr_in != elem_rstr_out) {
735     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
736       CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out));
737     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
738       CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out));
739     }
740   }
741   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
742   CeedCall(CeedVectorDestroy(&assembled_qf));
743   return CEED_ERROR_SUCCESS;
744 }
745 
746 /**
747   @brief Count number of entries for assembled CeedOperator
748 
749   @param[in]  op          CeedOperator to assemble
750   @param[out] num_entries Number of entries in assembled representation
751 
752   @return An error code: 0 - success, otherwise - failure
753 
754   @ref Utility
755 **/
756 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) {
757   bool                is_composite;
758   CeedInt             num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out;
759   CeedElemRestriction rstr_in, rstr_out;
760 
761   CeedCall(CeedOperatorIsComposite(op, &is_composite));
762   CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
763 
764   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out));
765   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in));
766   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in));
767   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
768   if (rstr_in != rstr_out) {
769     CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out));
770     CeedCheck(num_elem_in == num_elem_out, op->ceed, CEED_ERROR_UNSUPPORTED,
771               "Active input and output operator restrictions must have the same number of elements");
772     CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out));
773     CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
774   } else {
775     num_elem_out  = num_elem_in;
776     elem_size_out = elem_size_in;
777     num_comp_out  = num_comp_in;
778   }
779   *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in;
780   return CEED_ERROR_SUCCESS;
781 }
782 
783 /**
784   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
785 
786   @param[in]  op_fine      Fine grid operator
787   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
788   @param[in]  rstr_coarse  Coarse grid restriction
789   @param[in]  basis_coarse Coarse grid active vector basis
790   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
791   @param[out] op_coarse    Coarse grid operator
792   @param[out] op_prolong   Coarse to fine operator, or NULL
793   @param[out] op_restrict  Fine to coarse operator, or NULL
794 
795   @return An error code: 0 - success, otherwise - failure
796 
797   @ref Developer
798 **/
799 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
800                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
801   bool                is_composite;
802   Ceed                ceed;
803   CeedInt             num_comp;
804   CeedVector          mult_vec         = NULL;
805   CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL;
806 
807   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
808 
809   // Check for composite operator
810   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
811   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
812 
813   // Coarse Grid
814   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
815   // -- Clone input fields
816   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
817     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
818       rstr_fine = op_fine->input_fields[i]->elem_rstr;
819       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
820     } else {
821       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
822                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
823     }
824   }
825   // -- Clone output fields
826   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
827     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
828       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
829     } else {
830       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
831                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
832     }
833   }
834   // -- Clone QFunctionAssemblyData
835   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
836 
837   // Multiplicity vector
838   if (op_restrict || op_prolong) {
839     CeedVector          mult_e_vec;
840     CeedRestrictionType rstr_type;
841 
842     CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type));
843     CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED,
844               "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported");
845     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
846     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
847     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
848     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
849     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
850     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
851     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
852     CeedCall(CeedVectorDestroy(&mult_e_vec));
853     CeedCall(CeedVectorReciprocal(mult_vec));
854   }
855 
856   // Clone name
857   bool   has_name = op_fine->name;
858   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
859   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
860 
861   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
862   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
863             "Prolongation or restriction operator creation requires coarse-to-fine basis");
864 
865   // Restriction/Prolongation Operators
866   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
867 
868   // Restriction
869   if (op_restrict) {
870     CeedInt             *num_comp_r_data;
871     CeedQFunctionContext ctx_r;
872     CeedQFunction        qf_restrict;
873 
874     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
875     CeedCall(CeedCalloc(1, &num_comp_r_data));
876     num_comp_r_data[0] = num_comp;
877     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
878     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
879     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
880     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
881     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
882     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
883     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
884     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
885 
886     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
887     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
888     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
889     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
890 
891     // Set name
892     char *restriction_name;
893 
894     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
895     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
896     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
897     CeedCall(CeedFree(&restriction_name));
898 
899     // Check
900     CeedCall(CeedOperatorCheckReady(*op_restrict));
901 
902     // Cleanup
903     CeedCall(CeedQFunctionDestroy(&qf_restrict));
904   }
905 
906   // Prolongation
907   if (op_prolong) {
908     CeedInt             *num_comp_p_data;
909     CeedQFunctionContext ctx_p;
910     CeedQFunction        qf_prolong;
911 
912     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
913     CeedCall(CeedCalloc(1, &num_comp_p_data));
914     num_comp_p_data[0] = num_comp;
915     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
916     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
917     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
918     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
919     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
920     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
921     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
922     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
923 
924     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
925     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
926     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
927     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
928 
929     // Set name
930     char *prolongation_name;
931 
932     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
933     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
934     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
935     CeedCall(CeedFree(&prolongation_name));
936 
937     // Check
938     CeedCall(CeedOperatorCheckReady(*op_prolong));
939 
940     // Cleanup
941     CeedCall(CeedQFunctionDestroy(&qf_prolong));
942   }
943 
944   // Check
945   CeedCall(CeedOperatorCheckReady(*op_coarse));
946 
947   // Cleanup
948   CeedCall(CeedVectorDestroy(&mult_vec));
949   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
950   CeedCall(CeedBasisDestroy(&basis_c_to_f));
951   return CEED_ERROR_SUCCESS;
952 }
953 
954 /**
955   @brief Build 1D mass matrix and Laplacian with perturbation
956 
957   @param[in]  interp_1d   Interpolation matrix in one dimension
958   @param[in]  grad_1d     Gradient matrix in one dimension
959   @param[in]  q_weight_1d Quadrature weights in one dimension
960   @param[in]  P_1d        Number of basis nodes in one dimension
961   @param[in]  Q_1d        Number of quadrature points in one dimension
962   @param[in]  dim         Dimension of basis
963   @param[out] mass        Assembled mass matrix in one dimension
964   @param[out] laplace     Assembled perturbed Laplacian in one dimension
965 
966   @return An error code: 0 - success, otherwise - failure
967 
968   @ref Developer
969 **/
970 CeedPragmaOptimizeOff
971 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
972                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
973   for (CeedInt i = 0; i < P_1d; i++) {
974     for (CeedInt j = 0; j < P_1d; j++) {
975       CeedScalar sum = 0.0;
976       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
977       mass[i + j * P_1d] = sum;
978     }
979   }
980   // -- Laplacian
981   for (CeedInt i = 0; i < P_1d; i++) {
982     for (CeedInt j = 0; j < P_1d; j++) {
983       CeedScalar sum = 0.0;
984 
985       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
986       laplace[i + j * P_1d] = sum;
987     }
988   }
989   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
990   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
991   return CEED_ERROR_SUCCESS;
992 }
993 CeedPragmaOptimizeOn
994 
995 /// @}
996 
997 /// ----------------------------------------------------------------------------
998 /// CeedOperator Backend API
999 /// ----------------------------------------------------------------------------
1000 /// @addtogroup CeedOperatorBackend
1001 /// @{
1002 
1003 /**
1004   @brief Create point block restriction for active operator field
1005 
1006   @param[in]  rstr             Original CeedElemRestriction for active field
1007   @param[out] point_block_rstr Address of the variable where the newly created CeedElemRestriction will be stored
1008 
1009   @return An error code: 0 - success, otherwise - failure
1010 
1011   @ref Backend
1012 **/
1013 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) {
1014   Ceed           ceed;
1015   CeedInt        num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets;
1016   CeedSize       l_size;
1017   const CeedInt *offsets;
1018 
1019   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
1020   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
1021 
1022   // Expand offsets
1023   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
1024   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
1025   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
1026   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
1027   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
1028   shift = num_comp;
1029   if (comp_stride != 1) shift *= num_comp;
1030   CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets));
1031   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
1032     point_block_offsets[i] = offsets[i] * shift;
1033   }
1034 
1035   // Create new restriction
1036   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
1037                                      point_block_offsets, point_block_rstr));
1038 
1039   // Cleanup
1040   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
1041   return CEED_ERROR_SUCCESS;
1042 }
1043 
1044 /**
1045   @brief Create object holding CeedQFunction assembly data for CeedOperator
1046 
1047   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
1048   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
1049 
1050   @return An error code: 0 - success, otherwise - failure
1051 
1052   @ref Backend
1053 **/
1054 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
1055   CeedCall(CeedCalloc(1, data));
1056   (*data)->ref_count = 1;
1057   (*data)->ceed      = ceed;
1058   CeedCall(CeedReference(ceed));
1059   return CEED_ERROR_SUCCESS;
1060 }
1061 
1062 /**
1063   @brief Increment the reference counter for a CeedQFunctionAssemblyData
1064 
1065   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
1066 
1067   @return An error code: 0 - success, otherwise - failure
1068 
1069   @ref Backend
1070 **/
1071 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1072   data->ref_count++;
1073   return CEED_ERROR_SUCCESS;
1074 }
1075 
1076 /**
1077   @brief Set re-use of CeedQFunctionAssemblyData
1078 
1079   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
1080   @param[in]     reuse_data Boolean flag indicating data re-use
1081 
1082   @return An error code: 0 - success, otherwise - failure
1083 
1084   @ref Backend
1085 **/
1086 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
1087   data->reuse_data        = reuse_data;
1088   data->needs_data_update = true;
1089   return CEED_ERROR_SUCCESS;
1090 }
1091 
1092 /**
1093   @brief Mark QFunctionAssemblyData as stale
1094 
1095   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
1096   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
1097 
1098   @return An error code: 0 - success, otherwise - failure
1099 
1100   @ref Backend
1101 **/
1102 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
1103   data->needs_data_update = needs_data_update;
1104   return CEED_ERROR_SUCCESS;
1105 }
1106 
1107 /**
1108   @brief Determine if QFunctionAssemblyData needs update
1109 
1110   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
1111   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1112 
1113   @return An error code: 0 - success, otherwise - failure
1114 
1115   @ref Backend
1116 **/
1117 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1118   *is_update_needed = !data->reuse_data || data->needs_data_update;
1119   return CEED_ERROR_SUCCESS;
1120 }
1121 
1122 /**
1123   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1124 
1125   Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1126 
1127   Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
1128         CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
1129         CeedQFunctionAssemblyData.
1130 
1131   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1132   @param[in,out] data_copy Variable to store copied reference
1133 
1134   @return An error code: 0 - success, otherwise - failure
1135 
1136   @ref Backend
1137 **/
1138 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1139   CeedCall(CeedQFunctionAssemblyDataReference(data));
1140   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1141   *data_copy = data;
1142   return CEED_ERROR_SUCCESS;
1143 }
1144 
1145 /**
1146   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1147 
1148   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1149   @param[out] is_setup Boolean flag for setup status
1150 
1151   @return An error code: 0 - success, otherwise - failure
1152 
1153   @ref Backend
1154 **/
1155 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1156   *is_setup = data->is_setup;
1157   return CEED_ERROR_SUCCESS;
1158 }
1159 
1160 /**
1161   @brief Set internal objects for CeedQFunctionAssemblyData
1162 
1163   @param[in,out] data CeedQFunctionAssemblyData to set objects
1164   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1165   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1166 
1167   @return An error code: 0 - success, otherwise - failure
1168 
1169   @ref Backend
1170 **/
1171 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1172   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1173   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1174 
1175   data->is_setup = true;
1176   return CEED_ERROR_SUCCESS;
1177 }
1178 
1179 /**
1180   @brief Get internal objects for CeedQFunctionAssemblyData
1181 
1182   @param[in,out] data CeedQFunctionAssemblyData to set objects
1183   @param[out]    vec  CeedVector to store assembled CeedQFunction at quadrature points
1184   @param[out]    rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1185 
1186   @return An error code: 0 - success, otherwise - failure
1187 
1188   @ref Backend
1189 **/
1190 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1191   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1192 
1193   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1194   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1195   return CEED_ERROR_SUCCESS;
1196 }
1197 
1198 /**
1199   @brief Destroy CeedQFunctionAssemblyData
1200 
1201   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1202 
1203   @return An error code: 0 - success, otherwise - failure
1204 
1205   @ref Backend
1206 **/
1207 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1208   if (!*data || --(*data)->ref_count > 0) {
1209     *data = NULL;
1210     return CEED_ERROR_SUCCESS;
1211   }
1212   CeedCall(CeedDestroy(&(*data)->ceed));
1213   CeedCall(CeedVectorDestroy(&(*data)->vec));
1214   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1215 
1216   CeedCall(CeedFree(data));
1217   return CEED_ERROR_SUCCESS;
1218 }
1219 
1220 /**
1221   @brief Get CeedOperatorAssemblyData
1222 
1223   @param[in]  op   CeedOperator to assemble
1224   @param[out] data CeedQFunctionAssemblyData
1225 
1226   @return An error code: 0 - success, otherwise - failure
1227 
1228   @ref Backend
1229 **/
1230 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1231   if (!op->op_assembled) {
1232     CeedOperatorAssemblyData data;
1233 
1234     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1235     op->op_assembled = data;
1236   }
1237   *data = op->op_assembled;
1238   return CEED_ERROR_SUCCESS;
1239 }
1240 
1241 /**
1242   @brief Create object holding CeedOperator assembly data.
1243 
1244   The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1245   An array with references to the corresponding active CeedElemRestrictions is also stored.
1246   For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1247   The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1248 CeedEvalMode.
1249   The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1250   Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1251 
1252   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1253   @param[in]  op   CeedOperator to be assembled
1254   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1255 
1256   @return An error code: 0 - success, otherwise - failure
1257 
1258   @ref Backend
1259 **/
1260 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1261   CeedInt             num_active_bases_in = 0, num_active_bases_out = 0, offset = 0;
1262   CeedInt             num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL;
1263   CeedSize          **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1264   CeedEvalMode      **eval_modes_in = NULL, **eval_modes_out = NULL;
1265   CeedQFunctionField *qf_fields;
1266   CeedQFunction       qf;
1267   CeedOperatorField  *op_fields;
1268   bool                is_composite;
1269 
1270   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1271   CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators.");
1272 
1273   // Allocate
1274   CeedCall(CeedCalloc(1, data));
1275   (*data)->ceed = ceed;
1276   CeedCall(CeedReference(ceed));
1277 
1278   // Build OperatorAssembly data
1279   CeedCall(CeedOperatorGetQFunction(op, &qf));
1280   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1281   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1282 
1283   // Determine active input basis
1284   for (CeedInt i = 0; i < num_input_fields; i++) {
1285     CeedVector vec;
1286 
1287     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1288     if (vec == CEED_VECTOR_ACTIVE) {
1289       CeedInt      index = -1, num_comp, q_comp;
1290       CeedEvalMode eval_mode;
1291       CeedBasis    basis_in = NULL;
1292 
1293       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1294       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1295       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1296       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1297       for (CeedInt i = 0; i < num_active_bases_in; i++) {
1298         if ((*data)->active_bases_in[i] == basis_in) index = i;
1299       }
1300       if (index == -1) {
1301         CeedElemRestriction elem_rstr_in;
1302 
1303         index = num_active_bases_in;
1304         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in));
1305         (*data)->active_bases_in[num_active_bases_in] = NULL;
1306         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in]));
1307         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in));
1308         (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL;
1309         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1310         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in]));
1311         CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in));
1312         num_eval_modes_in[index] = 0;
1313         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in));
1314         eval_modes_in[index] = NULL;
1315         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in));
1316         eval_mode_offsets_in[index] = NULL;
1317         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in));
1318         (*data)->assembled_bases_in[index] = NULL;
1319         num_active_bases_in++;
1320       }
1321       if (eval_mode != CEED_EVAL_WEIGHT) {
1322         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1323         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1324         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1325         for (CeedInt d = 0; d < q_comp; d++) {
1326           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1327           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1328           offset += num_comp;
1329         }
1330         num_eval_modes_in[index] += q_comp;
1331       }
1332     }
1333   }
1334 
1335   // Determine active output basis
1336   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1337   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1338   offset = 0;
1339   for (CeedInt i = 0; i < num_output_fields; i++) {
1340     CeedVector vec;
1341 
1342     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1343     if (vec == CEED_VECTOR_ACTIVE) {
1344       CeedInt      index = -1, num_comp, q_comp;
1345       CeedEvalMode eval_mode;
1346       CeedBasis    basis_out = NULL;
1347 
1348       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1349       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1350       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1351       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1352       for (CeedInt i = 0; i < num_active_bases_out; i++) {
1353         if ((*data)->active_bases_out[i] == basis_out) index = i;
1354       }
1355       if (index == -1) {
1356         CeedElemRestriction elem_rstr_out;
1357 
1358         index = num_active_bases_out;
1359         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out));
1360         (*data)->active_bases_out[num_active_bases_out] = NULL;
1361         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out]));
1362         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out));
1363         (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL;
1364         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1365         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out]));
1366         CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out));
1367         num_eval_modes_out[index] = 0;
1368         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out));
1369         eval_modes_out[index] = NULL;
1370         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out));
1371         eval_mode_offsets_out[index] = NULL;
1372         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out));
1373         (*data)->assembled_bases_out[index] = NULL;
1374         num_active_bases_out++;
1375       }
1376       if (eval_mode != CEED_EVAL_WEIGHT) {
1377         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1378         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1379         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1380         for (CeedInt d = 0; d < q_comp; d++) {
1381           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1382           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1383           offset += num_comp;
1384         }
1385         num_eval_modes_out[index] += q_comp;
1386       }
1387     }
1388   }
1389   (*data)->num_active_bases_in   = num_active_bases_in;
1390   (*data)->num_eval_modes_in     = num_eval_modes_in;
1391   (*data)->eval_modes_in         = eval_modes_in;
1392   (*data)->eval_mode_offsets_in  = eval_mode_offsets_in;
1393   (*data)->num_active_bases_out  = num_active_bases_out;
1394   (*data)->num_eval_modes_out    = num_eval_modes_out;
1395   (*data)->eval_modes_out        = eval_modes_out;
1396   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1397   (*data)->num_output_components = offset;
1398   return CEED_ERROR_SUCCESS;
1399 }
1400 
1401 /**
1402   @brief Get CeedOperator CeedEvalModes for assembly.
1403 
1404   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1405 
1406   @param[in]  data                  CeedOperatorAssemblyData
1407   @param[out] num_active_bases_in   Total number of active bases for input
1408   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1409                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1410   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1411   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1412   @param[out] num_active_bases_out  Total number of active bases for output
1413   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1414   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1415   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1416   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1417                                       including contributions of all active bases
1418 
1419   @return An error code: 0 - success, otherwise - failure
1420 
1421   @ref Backend
1422 **/
1423 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in,
1424                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out,
1425                                          CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out,
1426                                          CeedSize *num_output_components) {
1427   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1428   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1429   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1430   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1431   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1432   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1433   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1434   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1435   if (num_output_components) *num_output_components = data->num_output_components;
1436   return CEED_ERROR_SUCCESS;
1437 }
1438 
1439 /**
1440   @brief Get CeedOperator CeedBasis data for assembly.
1441 
1442   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1443 
1444   @param[in]  data                 CeedOperatorAssemblyData
1445   @param[out] num_active_bases_in  Number of active input bases, or NULL
1446   @param[out] active_bases_in      Pointer to hold active input CeedBasis, or NULL
1447   @param[out] assembled_bases_in   Pointer to hold assembled active input B, or NULL
1448   @param[out] num_active_bases_out Number of active output bases, or NULL
1449   @param[out] active_bases_out     Pointer to hold active output CeedBasis, or NULL
1450   @param[out] assembled_bases_out  Pointer to hold assembled active output B, or NULL
1451 
1452   @return An error code: 0 - success, otherwise - failure
1453 
1454   @ref Backend
1455 **/
1456 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in,
1457                                      const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out,
1458                                      const CeedScalar ***assembled_bases_out) {
1459   // Assemble B_in, B_out if needed
1460   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1461     CeedInt num_qpts;
1462 
1463     if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts));
1464     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts));
1465     for (CeedInt b = 0; b < data->num_active_bases_in; b++) {
1466       bool        has_eval_none = false;
1467       CeedInt     num_nodes;
1468       CeedScalar *B_in = NULL, *identity = NULL;
1469 
1470       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes));
1471       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1472 
1473       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1474         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1475       }
1476       if (has_eval_none) {
1477         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1478         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1479           identity[i * num_nodes + i] = 1.0;
1480         }
1481       }
1482 
1483       for (CeedInt q = 0; q < num_qpts; q++) {
1484         for (CeedInt n = 0; n < num_nodes; n++) {
1485           CeedInt      d_in              = 0, q_comp_in;
1486           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1487 
1488           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1489             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1490             const CeedScalar *B  = NULL;
1491 
1492             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B));
1493             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in));
1494             if (q_comp_in > 1) {
1495               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1496               else B = &B[(++d_in) * num_qpts * num_nodes];
1497             }
1498             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1499             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1500           }
1501         }
1502       }
1503       if (identity) CeedCall(CeedFree(&identity));
1504       data->assembled_bases_in[b] = B_in;
1505     }
1506   }
1507 
1508   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1509     CeedInt num_qpts;
1510 
1511     if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts));
1512     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts));
1513     for (CeedInt b = 0; b < data->num_active_bases_out; b++) {
1514       bool        has_eval_none = false;
1515       CeedInt     num_nodes;
1516       CeedScalar *B_out = NULL, *identity = NULL;
1517 
1518       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes));
1519       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1520 
1521       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1522         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1523       }
1524       if (has_eval_none) {
1525         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1526         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1527           identity[i * num_nodes + i] = 1.0;
1528         }
1529       }
1530 
1531       for (CeedInt q = 0; q < num_qpts; q++) {
1532         for (CeedInt n = 0; n < num_nodes; n++) {
1533           CeedInt      d_out              = 0, q_comp_out;
1534           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1535 
1536           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1537             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1538             const CeedScalar *B  = NULL;
1539 
1540             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B));
1541             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out));
1542             if (q_comp_out > 1) {
1543               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1544               else B = &B[(++d_out) * num_qpts * num_nodes];
1545             }
1546             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1547             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1548           }
1549         }
1550       }
1551       if (identity) CeedCall(CeedFree(&identity));
1552       data->assembled_bases_out[b] = B_out;
1553     }
1554   }
1555 
1556   // Pass out assembled data
1557   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1558   if (active_bases_in) *active_bases_in = data->active_bases_in;
1559   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1560   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1561   if (active_bases_out) *active_bases_out = data->active_bases_out;
1562   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1563   return CEED_ERROR_SUCCESS;
1564 }
1565 
1566 /**
1567   @brief Get CeedOperator CeedBasis data for assembly.
1568 
1569   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1570 
1571   @param[in]  data                      CeedOperatorAssemblyData
1572   @param[out] num_active_elem_rstrs_in  Number of active input element restrictions, or NULL
1573   @param[out] active_elem_rstrs_in      Pointer to hold active input CeedElemRestrictions, or NULL
1574   @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or NULL
1575   @param[out] active_elem_rstrs_out     Pointer to hold active output CeedElemRestrictions, or NULL
1576 
1577   @return An error code: 0 - success, otherwise - failure
1578 
1579   @ref Backend
1580 **/
1581 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in,
1582                                                 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out,
1583                                                 CeedElemRestriction **active_elem_rstrs_out) {
1584   if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in;
1585   if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in;
1586   if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out;
1587   if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out;
1588   return CEED_ERROR_SUCCESS;
1589 }
1590 
1591 /**
1592   @brief Destroy CeedOperatorAssemblyData
1593 
1594   @param[in,out] data CeedOperatorAssemblyData to destroy
1595 
1596   @return An error code: 0 - success, otherwise - failure
1597 
1598   @ref Backend
1599 **/
1600 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1601   if (!*data) {
1602     *data = NULL;
1603     return CEED_ERROR_SUCCESS;
1604   }
1605   CeedCall(CeedDestroy(&(*data)->ceed));
1606   for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) {
1607     CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b]));
1608     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b]));
1609     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1610     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1611     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1612   }
1613   for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) {
1614     CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b]));
1615     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b]));
1616     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1617     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1618     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1619   }
1620   CeedCall(CeedFree(&(*data)->active_bases_in));
1621   CeedCall(CeedFree(&(*data)->active_bases_out));
1622   CeedCall(CeedFree(&(*data)->active_elem_rstrs_in));
1623   CeedCall(CeedFree(&(*data)->active_elem_rstrs_out));
1624   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1625   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1626   CeedCall(CeedFree(&(*data)->eval_modes_in));
1627   CeedCall(CeedFree(&(*data)->eval_modes_out));
1628   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1629   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1630   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1631   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1632 
1633   CeedCall(CeedFree(data));
1634   return CEED_ERROR_SUCCESS;
1635 }
1636 
1637 /**
1638   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
1639 
1640   @param[in]  op          CeedOperator to retrieve fallback for
1641   @param[out] op_fallback Fallback CeedOperator
1642 
1643   @return An error code: 0 - success, otherwise - failure
1644 
1645   @ref Backend
1646 **/
1647 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
1648   // Create if needed
1649   if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op));
1650   if (op->op_fallback) {
1651     bool is_debug;
1652 
1653     CeedCall(CeedIsDebug(op->ceed, &is_debug));
1654     if (is_debug) {
1655       Ceed        ceed, ceed_fallback;
1656       const char *resource, *resource_fallback;
1657 
1658       CeedCall(CeedOperatorGetCeed(op, &ceed));
1659       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
1660       CeedCall(CeedGetResource(ceed, &resource));
1661       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
1662 
1663       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
1664       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
1665                 op->op_fallback);
1666     }
1667   }
1668   *op_fallback = op->op_fallback;
1669   return CEED_ERROR_SUCCESS;
1670 }
1671 
1672 /**
1673   @brief Get the parent CeedOperator for a fallback CeedOperator
1674 
1675   @param[in]  op     CeedOperator context
1676   @param[out] parent Variable to store parent CeedOperator context
1677 
1678   @return An error code: 0 - success, otherwise - failure
1679 
1680   @ref Backend
1681 **/
1682 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) {
1683   *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL;
1684   return CEED_ERROR_SUCCESS;
1685 }
1686 
1687 /**
1688   @brief Get the Ceed context of the parent CeedOperator for a fallback CeedOperator
1689 
1690   @param[in]  op     CeedOperator context
1691   @param[out] parent Variable to store parent Ceed context
1692 
1693   @return An error code: 0 - success, otherwise - failure
1694 
1695   @ref Backend
1696 **/
1697 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) {
1698   *parent = op->op_fallback_parent ? op->op_fallback_parent->ceed : op->ceed;
1699   return CEED_ERROR_SUCCESS;
1700 }
1701 
1702 /// @}
1703 
1704 /// ----------------------------------------------------------------------------
1705 /// CeedOperator Public API
1706 /// ----------------------------------------------------------------------------
1707 /// @addtogroup CeedOperatorUser
1708 /// @{
1709 
1710 /**
1711   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1712 
1713   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1714   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1715 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1716 
1717   Inputs and outputs are in the order provided by the user when adding CeedOperator fields.
1718   For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction
1719 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1720 
1721   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1722 
1723   @param[in]  op        CeedOperator to assemble CeedQFunction
1724   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1725   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1726   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1727 
1728   @return An error code: 0 - success, otherwise - failure
1729 
1730   @ref User
1731 **/
1732 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1733   CeedCall(CeedOperatorCheckReady(op));
1734 
1735   if (op->LinearAssembleQFunction) {
1736     // Backend version
1737     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1738   } else {
1739     // Operator fallback
1740     CeedOperator op_fallback;
1741 
1742     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1743     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1744     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1745   }
1746   return CEED_ERROR_SUCCESS;
1747 }
1748 
1749 /**
1750   @brief Assemble CeedQFunction and store result internally.
1751 
1752   Return copied references of stored data to the caller.
1753   Caller is responsible for ownership and destruction of the copied references.
1754   See also @ref CeedOperatorLinearAssembleQFunction
1755 
1756   Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers.
1757         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
1758 
1759   @param[in]  op        CeedOperator to assemble CeedQFunction
1760   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1761   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1762   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1763 
1764   @return An error code: 0 - success, otherwise - failure
1765 
1766   @ref User
1767 **/
1768 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1769   int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL;
1770   CeedOperator op_assemble                                                                           = NULL;
1771   CeedOperator op_fallback_parent                                                                    = NULL;
1772 
1773   CeedCall(CeedOperatorCheckReady(op));
1774 
1775   // Determine if fallback parent or operator has implementation
1776   CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent));
1777   if (op_fallback_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) {
1778     // -- Backend version for op fallback parent is faster, if it exists
1779     LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate;
1780     op_assemble                   = op_fallback_parent;
1781   } else if (op->LinearAssembleQFunctionUpdate) {
1782     // -- Backend version for op
1783     LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate;
1784     op_assemble                   = op;
1785   }
1786 
1787   // Assemble QFunction
1788   if (LinearAssembleQFunctionUpdate) {
1789     // Backend or fallback parent version
1790     bool                qf_assembled_is_setup;
1791     CeedVector          assembled_vec  = NULL;
1792     CeedElemRestriction assembled_rstr = NULL;
1793 
1794     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1795     if (qf_assembled_is_setup) {
1796       bool update_needed;
1797 
1798       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1799       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1800       if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request));
1801     } else {
1802       CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request));
1803       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1804     }
1805     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1806 
1807     // Copy reference from internally held copy
1808     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1809     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1810     CeedCall(CeedVectorDestroy(&assembled_vec));
1811     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1812   } else {
1813     // Operator fallback
1814     CeedOperator op_fallback;
1815 
1816     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1817     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1818     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1819   }
1820   return CEED_ERROR_SUCCESS;
1821 }
1822 
1823 /**
1824   @brief Assemble the diagonal of a square linear CeedOperator
1825 
1826   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1827 
1828   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1829 
1830   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1831 
1832   @param[in]  op        CeedOperator to assemble CeedQFunction
1833   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1834   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1835 
1836   @return An error code: 0 - success, otherwise - failure
1837 
1838   @ref User
1839 **/
1840 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1841   bool     is_composite;
1842   CeedSize input_size = 0, output_size = 0;
1843 
1844   CeedCall(CeedOperatorCheckReady(op));
1845   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1846 
1847   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1848   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1849 
1850   // Early exit for empty operator
1851   if (!is_composite) {
1852     CeedInt num_elem = 0;
1853 
1854     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1855     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1856   }
1857 
1858   if (op->LinearAssembleDiagonal) {
1859     // Backend version
1860     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1861     return CEED_ERROR_SUCCESS;
1862   } else if (op->LinearAssembleAddDiagonal) {
1863     // Backend version with zeroing first
1864     CeedCall(CeedVectorSetValue(assembled, 0.0));
1865     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1866     return CEED_ERROR_SUCCESS;
1867   } else {
1868     // Operator fallback
1869     CeedOperator op_fallback;
1870 
1871     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1872     if (op_fallback) {
1873       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1874       return CEED_ERROR_SUCCESS;
1875     }
1876   }
1877   // Default interface implementation
1878   CeedCall(CeedVectorSetValue(assembled, 0.0));
1879   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1880   return CEED_ERROR_SUCCESS;
1881 }
1882 
1883 /**
1884   @brief Assemble the diagonal of a square linear CeedOperator
1885 
1886   This sums into a CeedVector the diagonal of a linear CeedOperator.
1887 
1888   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1889 
1890   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1891 
1892   @param[in]  op        CeedOperator to assemble CeedQFunction
1893   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1894   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1895 
1896   @return An error code: 0 - success, otherwise - failure
1897 
1898   @ref User
1899 **/
1900 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1901   bool     is_composite;
1902   CeedSize input_size = 0, output_size = 0;
1903 
1904   CeedCall(CeedOperatorCheckReady(op));
1905   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1906 
1907   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1908   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1909 
1910   // Early exit for empty operator
1911   if (!is_composite) {
1912     CeedInt num_elem = 0;
1913 
1914     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1915     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1916   }
1917 
1918   if (op->LinearAssembleAddDiagonal) {
1919     // Backend version
1920     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1921     return CEED_ERROR_SUCCESS;
1922   } else {
1923     // Operator fallback
1924     CeedOperator op_fallback;
1925 
1926     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1927     if (op_fallback) {
1928       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1929       return CEED_ERROR_SUCCESS;
1930     }
1931   }
1932   // Default interface implementation
1933   if (is_composite) {
1934     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1935   } else {
1936     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1937   }
1938   return CEED_ERROR_SUCCESS;
1939 }
1940 
1941 /**
1942    @brief Fully assemble the point-block diagonal pattern of a linear operator.
1943 
1944    Expected to be used in conjunction with CeedOperatorLinearAssemblePointBlockDiagonal().
1945 
1946    The assembly routines use coordinate format, with `num_entries` tuples of the form (i, j, value) which indicate that value should be added to the
1947 matrix in entry (i, j).
1948   Note that the (i, j) pairs are unique.
1949   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in
1950 the same ordering.
1951 
1952    This will generally be slow unless your operator is low-order.
1953 
1954    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1955 
1956    @param[in]  op          CeedOperator to assemble
1957    @param[out] num_entries Number of entries in coordinate nonzero pattern
1958    @param[out] rows        Row number for each entry
1959    @param[out] cols        Column number for each entry
1960 
1961    @ref User
1962 **/
1963 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1964   Ceed          ceed;
1965   bool          is_composite;
1966   CeedInt       num_active_components, num_sub_operators;
1967   CeedOperator *sub_operators;
1968 
1969   CeedCall(CeedOperatorGetCeed(op, &ceed));
1970   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1971 
1972   CeedSize input_size = 0, output_size = 0;
1973   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1974   CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1975 
1976   if (is_composite) {
1977     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_operators));
1978     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1979   } else {
1980     sub_operators     = &op;
1981     num_sub_operators = 1;
1982   }
1983 
1984   // Verify operator can be assembled correctly
1985   {
1986     CeedOperatorAssemblyData data;
1987     CeedInt                  num_active_elem_rstrs, comp_stride;
1988     CeedElemRestriction     *active_elem_rstrs;
1989 
1990     // Get initial values to check against
1991     CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data));
1992     CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
1993     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride));
1994     CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components));
1995 
1996     // Verify that all active element restrictions have same component stride and number of components
1997     for (CeedInt k = 0; k < num_sub_operators; k++) {
1998       CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data));
1999       CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
2000       for (CeedInt i = 0; i < num_active_elem_rstrs; i++) {
2001         CeedInt comp_stride_sub, num_active_components_sub;
2002 
2003         CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub));
2004         CeedCheck(comp_stride == comp_stride_sub, ceed, CEED_ERROR_DIMENSION,
2005                   "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub);
2006         CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub));
2007         CeedCheck(num_active_components == num_active_components_sub, ceed, CEED_ERROR_INCOMPATIBLE,
2008                   "All suboperators must have the same number of output components");
2009       }
2010     }
2011   }
2012   *num_entries = input_size * num_active_components;
2013   CeedCall(CeedCalloc(*num_entries, rows));
2014   CeedCall(CeedCalloc(*num_entries, cols));
2015 
2016   for (CeedInt o = 0; o < num_sub_operators; o++) {
2017     CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr;
2018     CeedInt             comp_stride, num_elem, elem_size;
2019     const CeedInt      *offsets, *point_block_offsets;
2020 
2021     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr));
2022     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride));
2023     CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem));
2024     CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size));
2025     CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets));
2026 
2027     CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr));
2028     CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets));
2029 
2030     for (CeedSize i = 0; i < num_elem * elem_size; i++) {
2031       for (CeedInt c_out = 0; c_out < num_active_components; c_out++) {
2032         for (CeedInt c_in = 0; c_in < num_active_components; c_in++) {
2033           (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride;
2034           (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride;
2035         }
2036       }
2037     }
2038 
2039     CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets));
2040     CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets));
2041     CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr));
2042   }
2043   return CEED_ERROR_SUCCESS;
2044 }
2045 
2046 /**
2047   @brief Assemble the point block diagonal of a square linear CeedOperator
2048 
2049   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
2050 
2051   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
2052 
2053   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2054 
2055   @param[in]  op        CeedOperator to assemble CeedQFunction
2056   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
2057 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
2058 component in].
2059   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2060 
2061   @return An error code: 0 - success, otherwise - failure
2062 
2063   @ref User
2064 **/
2065 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2066   bool     is_composite;
2067   CeedSize input_size = 0, output_size = 0;
2068 
2069   CeedCall(CeedOperatorCheckReady(op));
2070   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2071 
2072   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2073   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
2074 
2075   // Early exit for empty operator
2076   if (!is_composite) {
2077     CeedInt num_elem = 0;
2078 
2079     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2080     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2081   }
2082 
2083   if (op->LinearAssemblePointBlockDiagonal) {
2084     // Backend version
2085     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
2086     return CEED_ERROR_SUCCESS;
2087   } else if (op->LinearAssembleAddPointBlockDiagonal) {
2088     // Backend version with zeroing first
2089     CeedCall(CeedVectorSetValue(assembled, 0.0));
2090     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2091     return CEED_ERROR_SUCCESS;
2092   } else {
2093     // Operator fallback
2094     CeedOperator op_fallback;
2095 
2096     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2097     if (op_fallback) {
2098       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
2099       return CEED_ERROR_SUCCESS;
2100     }
2101   }
2102   // Default interface implementation
2103   CeedCall(CeedVectorSetValue(assembled, 0.0));
2104   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2105   return CEED_ERROR_SUCCESS;
2106 }
2107 
2108 /**
2109   @brief Assemble the point block diagonal of a square linear CeedOperator
2110 
2111   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
2112 
2113   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
2114 
2115   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2116 
2117   @param[in]  op        CeedOperator to assemble CeedQFunction
2118   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
2119 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
2120 component in].
2121   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2122 
2123   @return An error code: 0 - success, otherwise - failure
2124 
2125   @ref User
2126 **/
2127 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2128   bool     is_composite;
2129   CeedSize input_size = 0, output_size = 0;
2130 
2131   CeedCall(CeedOperatorCheckReady(op));
2132   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2133 
2134   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2135   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
2136 
2137   // Early exit for empty operator
2138   if (!is_composite) {
2139     CeedInt num_elem = 0;
2140 
2141     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2142     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2143   }
2144 
2145   if (op->LinearAssembleAddPointBlockDiagonal) {
2146     // Backend version
2147     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
2148     return CEED_ERROR_SUCCESS;
2149   } else {
2150     // Operator fallback
2151     CeedOperator op_fallback;
2152 
2153     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2154     if (op_fallback) {
2155       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
2156       return CEED_ERROR_SUCCESS;
2157     }
2158   }
2159   // Default interface implementation
2160   if (is_composite) {
2161     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
2162   } else {
2163     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
2164   }
2165   return CEED_ERROR_SUCCESS;
2166 }
2167 
2168 /**
2169    @brief Fully assemble the nonzero pattern of a linear operator.
2170 
2171    Expected to be used in conjunction with CeedOperatorLinearAssemble().
2172 
2173    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
2174 matrix in entry (i, j).
2175   Note that the (i, j) pairs are not unique and may repeat.
2176   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
2177 
2178    This will generally be slow unless your operator is low-order.
2179 
2180    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2181 
2182    @param[in]  op          CeedOperator to assemble
2183    @param[out] num_entries Number of entries in coordinate nonzero pattern
2184    @param[out] rows        Row number for each entry
2185    @param[out] cols        Column number for each entry
2186 
2187    @ref User
2188 **/
2189 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2190   bool          is_composite;
2191   CeedInt       num_suboperators, offset = 0;
2192   CeedSize      single_entries;
2193   CeedOperator *sub_operators;
2194 
2195   CeedCall(CeedOperatorCheckReady(op));
2196   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2197 
2198   if (op->LinearAssembleSymbolic) {
2199     // Backend version
2200     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
2201     return CEED_ERROR_SUCCESS;
2202   } else {
2203     // Operator fallback
2204     CeedOperator op_fallback;
2205 
2206     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2207     if (op_fallback) {
2208       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
2209       return CEED_ERROR_SUCCESS;
2210     }
2211   }
2212 
2213   // Default interface implementation
2214 
2215   // Count entries and allocate rows, cols arrays
2216   *num_entries = 0;
2217   if (is_composite) {
2218     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2219     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2220     for (CeedInt k = 0; k < num_suboperators; ++k) {
2221       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2222       *num_entries += single_entries;
2223     }
2224   } else {
2225     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
2226     *num_entries += single_entries;
2227   }
2228   CeedCall(CeedCalloc(*num_entries, rows));
2229   CeedCall(CeedCalloc(*num_entries, cols));
2230 
2231   // Assemble nonzero locations
2232   if (is_composite) {
2233     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2234     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2235     for (CeedInt k = 0; k < num_suboperators; ++k) {
2236       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
2237       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2238       offset += single_entries;
2239     }
2240   } else {
2241     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
2242   }
2243   return CEED_ERROR_SUCCESS;
2244 }
2245 
2246 /**
2247    @brief Fully assemble the nonzero entries of a linear operator.
2248 
2249    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
2250 
2251    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
2252 matrix in entry (i, j).
2253   Note that the (i, j) pairs are not unique and may repeat.
2254   This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
2255 
2256    This will generally be slow unless your operator is low-order.
2257 
2258    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2259 
2260    @param[in]  op     CeedOperator to assemble
2261    @param[out] values Values to assemble into matrix
2262 
2263    @ref User
2264 **/
2265 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
2266   bool          is_composite;
2267   CeedInt       num_suboperators, offset = 0;
2268   CeedSize      single_entries = 0;
2269   CeedOperator *sub_operators;
2270 
2271   CeedCall(CeedOperatorCheckReady(op));
2272   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2273 
2274   // Early exit for empty operator
2275   if (!is_composite) {
2276     CeedInt num_elem = 0;
2277 
2278     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2279     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2280   }
2281 
2282   if (op->LinearAssemble) {
2283     // Backend version
2284     CeedCall(op->LinearAssemble(op, values));
2285     return CEED_ERROR_SUCCESS;
2286   } else {
2287     // Operator fallback
2288     CeedOperator op_fallback;
2289 
2290     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2291     if (op_fallback) {
2292       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2293       return CEED_ERROR_SUCCESS;
2294     }
2295   }
2296 
2297   // Default interface implementation
2298   CeedCall(CeedVectorSetValue(values, 0.0));
2299   if (is_composite) {
2300     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2301     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2302     for (CeedInt k = 0; k < num_suboperators; k++) {
2303       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2304       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2305       offset += single_entries;
2306     }
2307   } else {
2308     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2309   }
2310   return CEED_ERROR_SUCCESS;
2311 }
2312 
2313 /**
2314   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2315 
2316   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2317 
2318   @param[in]  op               Composite CeedOperator
2319   @param[in]  num_skip_indices Number of suboperators to skip
2320   @param[in]  skip_indices     Array of indices of suboperators to skip
2321   @param[out] mult             Vector to store multiplicity (of size l_size)
2322 
2323   @return An error code: 0 - success, otherwise - failure
2324 
2325   @ref User
2326 **/
2327 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2328   Ceed                ceed;
2329   CeedInt             num_suboperators;
2330   CeedSize            l_vec_len;
2331   CeedScalar         *mult_array;
2332   CeedVector          ones_l_vec;
2333   CeedElemRestriction elem_rstr, mult_elem_rstr;
2334   CeedOperator       *sub_operators;
2335 
2336   CeedCall(CeedOperatorCheckReady(op));
2337 
2338   CeedCall(CeedOperatorGetCeed(op, &ceed));
2339 
2340   // Zero mult vector
2341   CeedCall(CeedVectorSetValue(mult, 0.0));
2342 
2343   // Get suboperators
2344   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2345   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2346   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2347 
2348   // Work vector
2349   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2350   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2351   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2352   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2353 
2354   // Compute multiplicity across suboperators
2355   for (CeedInt i = 0; i < num_suboperators; i++) {
2356     const CeedScalar *sub_mult_array;
2357     CeedVector        sub_mult_l_vec, ones_e_vec;
2358 
2359     // -- Check for suboperator to skip
2360     for (CeedInt j = 0; j < num_skip_indices; j++) {
2361       if (skip_indices[j] == i) continue;
2362     }
2363 
2364     // -- Sub operator multiplicity
2365     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2366     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr));
2367     CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2368     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2369     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2370     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2371     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2372     // ---- Flag every node present in the current suboperator
2373     for (CeedInt j = 0; j < l_vec_len; j++) {
2374       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2375     }
2376     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2377     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2378     CeedCall(CeedVectorDestroy(&ones_e_vec));
2379     CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr));
2380   }
2381   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2382   CeedCall(CeedVectorDestroy(&ones_l_vec));
2383   return CEED_ERROR_SUCCESS;
2384 }
2385 
2386 /**
2387   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2388 grid interpolation
2389 
2390   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2391 
2392   @param[in]  op_fine      Fine grid operator
2393   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2394   @param[in]  rstr_coarse  Coarse grid restriction
2395   @param[in]  basis_coarse Coarse grid active vector basis
2396   @param[out] op_coarse    Coarse grid operator
2397   @param[out] op_prolong   Coarse to fine operator, or NULL
2398   @param[out] op_restrict  Fine to coarse operator, or NULL
2399 
2400   @return An error code: 0 - success, otherwise - failure
2401 
2402   @ref User
2403 **/
2404 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2405                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2406   CeedBasis basis_c_to_f = NULL;
2407 
2408   CeedCall(CeedOperatorCheckReady(op_fine));
2409 
2410   // Build prolongation matrix, if required
2411   if (op_prolong || op_restrict) {
2412     CeedBasis basis_fine;
2413 
2414     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2415     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2416   }
2417 
2418   // Core code
2419   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2420   return CEED_ERROR_SUCCESS;
2421 }
2422 
2423 /**
2424   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2425 
2426   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2427 
2428   @param[in]  op_fine       Fine grid operator
2429   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2430   @param[in]  rstr_coarse   Coarse grid restriction
2431   @param[in]  basis_coarse  Coarse grid active vector basis
2432   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2433   @param[out] op_coarse     Coarse grid operator
2434   @param[out] op_prolong    Coarse to fine operator, or NULL
2435   @param[out] op_restrict   Fine to coarse operator, or NULL
2436 
2437   @return An error code: 0 - success, otherwise - failure
2438 
2439   @ref User
2440 **/
2441 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2442                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2443                                              CeedOperator *op_restrict) {
2444   Ceed      ceed;
2445   CeedInt   Q_f, Q_c;
2446   CeedBasis basis_fine, basis_c_to_f = NULL;
2447 
2448   CeedCall(CeedOperatorCheckReady(op_fine));
2449   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2450 
2451   // Check for compatible quadrature spaces
2452   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2453   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2454   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2455   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2456 
2457   // Create coarse to fine basis, if required
2458   if (op_prolong || op_restrict) {
2459     CeedInt     dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2460     CeedScalar *q_ref, *q_weight, *grad;
2461 
2462     // Check if interpolation matrix is provided
2463     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2464               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2465     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2466     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2467     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2468     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2469     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2470     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2471     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2472     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2473     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2474     CeedCall(CeedFree(&q_ref));
2475     CeedCall(CeedFree(&q_weight));
2476     CeedCall(CeedFree(&grad));
2477   }
2478 
2479   // Core code
2480   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2481   return CEED_ERROR_SUCCESS;
2482 }
2483 
2484 /**
2485   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2486 
2487   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2488 
2489   @param[in]  op_fine       Fine grid operator
2490   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2491   @param[in]  rstr_coarse   Coarse grid restriction
2492   @param[in]  basis_coarse  Coarse grid active vector basis
2493   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2494   @param[out] op_coarse     Coarse grid operator
2495   @param[out] op_prolong    Coarse to fine operator, or NULL
2496   @param[out] op_restrict   Fine to coarse operator, or NULL
2497 
2498   @return An error code: 0 - success, otherwise - failure
2499 
2500   @ref User
2501 **/
2502 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2503                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2504                                        CeedOperator *op_restrict) {
2505   Ceed      ceed;
2506   CeedInt   Q_f, Q_c;
2507   CeedBasis basis_fine, basis_c_to_f = NULL;
2508 
2509   CeedCall(CeedOperatorCheckReady(op_fine));
2510   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2511 
2512   // Check for compatible quadrature spaces
2513   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2514   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2515   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2516   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2517 
2518   // Coarse to fine basis
2519   if (op_prolong || op_restrict) {
2520     CeedInt          dim, num_comp, num_nodes_c, num_nodes_f;
2521     CeedScalar      *q_ref, *q_weight, *grad;
2522     CeedElemTopology topo;
2523 
2524     // Check if interpolation matrix is provided
2525     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2526               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2527     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2528     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2529     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2530     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2531     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2532     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2533     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2534     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2535     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2536     CeedCall(CeedFree(&q_ref));
2537     CeedCall(CeedFree(&q_weight));
2538     CeedCall(CeedFree(&grad));
2539   }
2540 
2541   // Core code
2542   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2543   return CEED_ERROR_SUCCESS;
2544 }
2545 
2546 /**
2547   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2548 
2549   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2550   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2551   The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2552 \hat S V\f$.
2553   The CeedOperator must be linear and non-composite.
2554   The associated CeedQFunction must therefore also be linear.
2555 
2556   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2557 
2558   @param[in]  op      CeedOperator to create element inverses
2559   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2560   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2561 
2562   @return An error code: 0 - success, otherwise - failure
2563 
2564   @ref User
2565 **/
2566 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2567   Ceed                 ceed, ceed_parent;
2568   bool                 interp = false, grad = false, is_tensor_basis = true;
2569   CeedInt              num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2570   CeedSize             l_size = 1;
2571   CeedScalar          *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg;
2572   const CeedScalar    *interp_1d, *grad_1d, *q_weight_1d;
2573   CeedVector           q_data;
2574   CeedElemRestriction  rstr  = NULL, rstr_qd_i;
2575   CeedBasis            basis = NULL, fdm_basis;
2576   CeedQFunctionContext ctx_fdm;
2577   CeedQFunctionField  *qf_fields;
2578   CeedQFunction        qf, qf_fdm;
2579   CeedOperatorField   *op_fields;
2580 
2581   CeedCall(CeedOperatorCheckReady(op));
2582 
2583   if (op->CreateFDMElementInverse) {
2584     // Backend version
2585     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2586     return CEED_ERROR_SUCCESS;
2587   } else {
2588     // Operator fallback
2589     CeedOperator op_fallback;
2590 
2591     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2592     if (op_fallback) {
2593       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2594       return CEED_ERROR_SUCCESS;
2595     }
2596   }
2597 
2598   // Default interface implementation
2599   CeedCall(CeedOperatorGetCeed(op, &ceed));
2600   CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent));
2601   CeedCall(CeedOperatorGetQFunction(op, &qf));
2602 
2603   // Determine active input basis
2604   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2605   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2606   for (CeedInt i = 0; i < num_input_fields; i++) {
2607     CeedVector vec;
2608 
2609     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2610     if (vec == CEED_VECTOR_ACTIVE) {
2611       CeedEvalMode eval_mode;
2612 
2613       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2614       interp = interp || eval_mode == CEED_EVAL_INTERP;
2615       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2616       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2617       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2618     }
2619   }
2620   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2621   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2622   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2623   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2624   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2625   CeedCall(CeedBasisGetDimension(basis, &dim));
2626   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2627   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2628   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2629 
2630   // Build and diagonalize 1D Mass and Laplacian
2631   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2632   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2633   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2634   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2635   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2636   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2637   CeedCall(CeedCalloc(P_1d, &lambda));
2638   // -- Build matrices
2639   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2640   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2641   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2642   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2643 
2644   // -- Diagonalize
2645   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2646   CeedCall(CeedFree(&mass));
2647   CeedCall(CeedFree(&laplace));
2648   for (CeedInt i = 0; i < P_1d; i++) {
2649     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2650   }
2651   CeedCall(CeedFree(&x));
2652 
2653   {
2654     CeedInt             layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2655     CeedScalar          max_norm = 0;
2656     const CeedScalar   *assembled_array, *q_weight_array;
2657     CeedVector          assembled = NULL, q_weight;
2658     CeedElemRestriction rstr_qf   = NULL;
2659 
2660     // Assemble QFunction
2661     CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2662     CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2663     CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2664     CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2665 
2666     // Calculate element averages
2667     CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2668     CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2669     CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2670     CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2671     CeedCall(CeedCalloc(num_elem, &elem_avg));
2672     const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2673 
2674     for (CeedInt e = 0; e < num_elem; e++) {
2675       CeedInt count = 0;
2676 
2677       for (CeedInt q = 0; q < num_qpts; q++) {
2678         for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2679           if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2680             elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2681             count++;
2682           }
2683         }
2684       }
2685       if (count) {
2686         elem_avg[e] /= count;
2687       } else {
2688         elem_avg[e] = 1.0;
2689       }
2690     }
2691     CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2692     CeedCall(CeedVectorDestroy(&assembled));
2693     CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2694     CeedCall(CeedVectorDestroy(&q_weight));
2695   }
2696 
2697   // Build FDM diagonal
2698   {
2699     CeedScalar *q_data_array, *fdm_diagonal;
2700 
2701     CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2702     const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2703     for (CeedInt c = 0; c < num_comp; c++) {
2704       for (CeedInt n = 0; n < num_nodes; n++) {
2705         if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2706         if (grad) {
2707           for (CeedInt d = 0; d < dim; d++) {
2708             CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2709             fdm_diagonal[c * num_nodes + n] += lambda[i];
2710           }
2711         }
2712         if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2713       }
2714     }
2715     CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2716     CeedCall(CeedVectorSetValue(q_data, 0.0));
2717     CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2718     for (CeedInt e = 0; e < num_elem; e++) {
2719       for (CeedInt c = 0; c < num_comp; c++) {
2720         for (CeedInt n = 0; n < num_nodes; n++)
2721           q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2722       }
2723     }
2724     CeedCall(CeedFree(&elem_avg));
2725     CeedCall(CeedFree(&fdm_diagonal));
2726     CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2727   }
2728 
2729   // Setup FDM operator
2730   // -- Basis
2731   {
2732     CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2733 
2734     CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2735     CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2736     CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2737     CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2738     CeedCall(CeedFree(&fdm_interp));
2739     CeedCall(CeedFree(&grad_dummy));
2740     CeedCall(CeedFree(&q_ref_dummy));
2741     CeedCall(CeedFree(&q_weight_dummy));
2742     CeedCall(CeedFree(&lambda));
2743   }
2744 
2745   // -- Restriction
2746   {
2747     CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp};
2748     CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2749   }
2750 
2751   // -- QFunction
2752   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2753   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2754   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2755   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2756   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2757 
2758   // -- QFunction context
2759   {
2760     CeedInt *num_comp_data;
2761 
2762     CeedCall(CeedCalloc(1, &num_comp_data));
2763     num_comp_data[0] = num_comp;
2764     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2765     CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2766   }
2767   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2768   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2769 
2770   // -- Operator
2771   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2772   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2773   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data));
2774   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2775 
2776   // Cleanup
2777   CeedCall(CeedVectorDestroy(&q_data));
2778   CeedCall(CeedBasisDestroy(&fdm_basis));
2779   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2780   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2781   return CEED_ERROR_SUCCESS;
2782 }
2783 
2784 /// @}
2785