xref: /libCEED/interface/ceed-preconditioning.c (revision 1ff07f3dbd21e457c383230b3fb668f16d9d24e8)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   char *source_path_with_name = NULL;
39 
40   // Check if NULL qf passed in
41   if (!qf) return CEED_ERROR_SUCCESS;
42 
43   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
44   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
45 
46   if (qf->source_path) {
47     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
48     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
49     memcpy(source_path_with_name, qf->source_path, path_len);
50     memcpy(&source_path_with_name[path_len], ":", 1);
51     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
52   } else {
53     CeedCall(CeedCalloc(1, &source_path_with_name));
54   }
55 
56   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
57   {
58     CeedQFunctionContext ctx;
59 
60     CeedCall(CeedQFunctionGetContext(qf, &ctx));
61     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
62   }
63   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
64     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
65   }
66   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
67     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
68   }
69   CeedCall(CeedFree(&source_path_with_name));
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   Ceed         ceed_fallback;
84   bool         is_composite;
85   CeedOperator op_fallback;
86 
87   // Check not already created
88   if (op->op_fallback) return CEED_ERROR_SUCCESS;
89 
90   // Fallback Ceed
91   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
92   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
93 
94   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
95   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
96 
97   // Clone Op
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114 
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
117     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
118     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
119     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
120       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
121                                     op->input_fields[i]->vec));
122     }
123     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
124       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
125                                     op->output_fields[i]->vec));
126     }
127     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
128     // Cleanup
129     CeedCall(CeedQFunctionDestroy(&qf_fallback));
130     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
131     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
132   }
133   CeedCall(CeedOperatorSetName(op_fallback, op->name));
134   CeedCall(CeedOperatorCheckReady(op_fallback));
135   // Note: No ref-counting here so we don't get caught in a reference loop.
136   //       The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback.
137   op->op_fallback                 = op_fallback;
138   op_fallback->op_fallback_parent = op;
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Select correct basis matrix pointer based on CeedEvalMode
144 
145   @param[in]  basis     CeedBasis from which to get the basis matrix
146   @param[in]  eval_mode Current basis evaluation mode
147   @param[in]  identity  Pointer to identity matrix
148   @param[out] basis_ptr Basis pointer to set
149 
150   @ref Developer
151 **/
152 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
153   switch (eval_mode) {
154     case CEED_EVAL_NONE:
155       *basis_ptr = identity;
156       break;
157     case CEED_EVAL_INTERP:
158       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
159       break;
160     case CEED_EVAL_GRAD:
161       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
162       break;
163     case CEED_EVAL_DIV:
164       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
165       break;
166     case CEED_EVAL_CURL:
167       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
168       break;
169     case CEED_EVAL_WEIGHT:
170       break;  // Caught by QF Assembly
171   }
172   assert(*basis_ptr != NULL);
173   return CEED_ERROR_SUCCESS;
174 }
175 
176 /**
177   @brief Core logic for assembling operator diagonal or point block diagonal
178 
179   @param[in]  op             CeedOperator to assemble point block diagonal
180   @param[in]  request        Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
181   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
182   @param[out] assembled      CeedVector to store assembled diagonal
183 
184   @return An error code: 0 - success, otherwise - failure
185 
186   @ref Developer
187 **/
188 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_point_block, CeedVector assembled) {
189   Ceed ceed;
190   bool is_composite;
191 
192   CeedCall(CeedOperatorGetCeed(op, &ceed));
193   CeedCall(CeedOperatorIsComposite(op, &is_composite));
194   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
195 
196   // Assemble QFunction
197   CeedInt             layout_qf[3];
198   const CeedScalar   *assembled_qf_array;
199   CeedVector          assembled_qf        = NULL;
200   CeedElemRestriction assembled_elem_rstr = NULL;
201 
202   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
203   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf));
204   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
205   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
206 
207   // Get assembly data
208   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
209   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
210   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
211   CeedBasis               *active_bases_in, *active_bases_out;
212   CeedElemRestriction     *active_elem_rstrs_in, *active_elem_rstrs_out;
213   CeedOperatorAssemblyData data;
214 
215   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
216   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
217                                                 &num_active_bases_out, &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out,
218                                                 &num_output_components));
219   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, NULL, NULL, &active_bases_out, NULL));
220   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs_in, NULL, &active_elem_rstrs_out));
221 
222   // Loop over all active bases (find matching input/output pairs)
223   for (CeedInt b = 0; b < CeedIntMin(num_active_bases_in, num_active_bases_out); b++) {
224     CeedInt             b_in, b_out, num_elem, num_nodes, num_qpts, num_comp;
225     bool                has_eval_none = false;
226     CeedScalar         *elem_diag_array, *identity = NULL;
227     CeedVector          elem_diag;
228     CeedElemRestriction diag_elem_rstr;
229 
230     if (num_active_bases_in <= num_active_bases_out) {
231       b_in = b;
232       for (b_out = 0; b_out < num_active_bases_out; b_out++) {
233         if (active_bases_in[b_in] == active_bases_out[b_out]) {
234           break;
235         }
236       }
237       if (b_out == num_active_bases_out) {
238         continue;
239       }  // No matching output basis found
240     } else {
241       b_out = b;
242       for (b_in = 0; b_in < num_active_bases_in; b_in++) {
243         if (active_bases_in[b_in] == active_bases_out[b_out]) {
244           break;
245         }
246       }
247       if (b_in == num_active_bases_in) {
248         continue;
249       }  // No matching output basis found
250     }
251     CeedCheck(active_elem_rstrs_in[b_in] == active_elem_rstrs_out[b_out], ceed, CEED_ERROR_UNSUPPORTED,
252               "Cannot assemble operator diagonal with different input and output active element restrictions");
253 
254     // Assemble point block diagonal restriction, if needed
255     if (is_point_block) {
256       CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs_in[b_in], &diag_elem_rstr));
257     } else {
258       CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs_in[b_in], &diag_elem_rstr));
259     }
260 
261     // Create diagonal vector
262     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
263 
264     // Assemble element operator diagonals
265     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
266     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
267     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
268     CeedCall(CeedBasisGetNumNodes(active_bases_in[b_in], &num_nodes));
269     CeedCall(CeedBasisGetNumComponents(active_bases_in[b_in], &num_comp));
270     if (active_bases_in[b_in] == CEED_BASIS_NONE) num_qpts = num_nodes;
271     else CeedCall(CeedBasisGetNumQuadraturePoints(active_bases_in[b_in], &num_qpts));
272 
273     // Construct identity matrix for basis if required
274     for (CeedInt i = 0; i < num_eval_modes_in[b_in]; i++) {
275       has_eval_none = has_eval_none || (eval_modes_in[b_in][i] == CEED_EVAL_NONE);
276     }
277     for (CeedInt i = 0; i < num_eval_modes_out[b_out]; i++) {
278       has_eval_none = has_eval_none || (eval_modes_out[b_out][i] == CEED_EVAL_NONE);
279     }
280     if (has_eval_none) {
281       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
282       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
283     }
284 
285     // Compute the diagonal of B^T D B
286     // Each element
287     for (CeedSize e = 0; e < num_elem; e++) {
288       // Each basis eval mode pair
289       CeedInt      d_out              = 0, q_comp_out;
290       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
291 
292       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b_out]; e_out++) {
293         CeedInt           d_in              = 0, q_comp_in;
294         const CeedScalar *B_t               = NULL;
295         CeedEvalMode      eval_mode_in_prev = CEED_EVAL_NONE;
296 
297         CeedCall(CeedOperatorGetBasisPointer(active_bases_out[b_out], eval_modes_out[b_out][e_out], identity, &B_t));
298         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_out[b_out], eval_modes_out[b_out][e_out], &q_comp_out));
299         if (q_comp_out > 1) {
300           if (e_out == 0 || eval_modes_out[b_out][e_out] != eval_mode_out_prev) d_out = 0;
301           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
302         }
303         eval_mode_out_prev = eval_modes_out[b_out][e_out];
304 
305         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b_in]; e_in++) {
306           const CeedScalar *B = NULL;
307 
308           CeedCall(CeedOperatorGetBasisPointer(active_bases_in[b_in], eval_modes_in[b_in][e_in], identity, &B));
309           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases_in[b_in], eval_modes_in[b_in][e_in], &q_comp_in));
310           if (q_comp_in > 1) {
311             if (e_in == 0 || eval_modes_in[b_in][e_in] != eval_mode_in_prev) d_in = 0;
312             else B = &B[(++d_in) * num_qpts * num_nodes];
313           }
314           eval_mode_in_prev = eval_modes_in[b_in][e_in];
315 
316           // Each component
317           for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
318             // Each qpt/node pair
319             for (CeedInt q = 0; q < num_qpts; q++) {
320               if (is_point_block) {
321                 // Point Block Diagonal
322                 for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
323                   const CeedSize c_offset =
324                       (eval_mode_offsets_in[b_in][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
325                   const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
326 
327                   for (CeedInt n = 0; n < num_nodes; n++) {
328                     elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
329                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
330                   }
331                 }
332               } else {
333                 // Diagonal Only
334                 const CeedInt c_offset =
335                     (eval_mode_offsets_in[b_in][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b_out][e_out] + c_out;
336                 const CeedScalar qf_value = assembled_qf_array[q * layout_qf[0] + c_offset * layout_qf[1] + e * layout_qf[2]];
337 
338                 for (CeedInt n = 0; n < num_nodes; n++) {
339                   elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
340                 }
341               }
342             }
343           }
344         }
345       }
346     }
347     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
348 
349     // Assemble local operator diagonal
350     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
351 
352     // Cleanup
353     CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
354     CeedCall(CeedVectorDestroy(&elem_diag));
355     CeedCall(CeedFree(&identity));
356   }
357   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
358   CeedCall(CeedVectorDestroy(&assembled_qf));
359   return CEED_ERROR_SUCCESS;
360 }
361 
362 /**
363   @brief Core logic for assembling composite operator diagonal
364 
365   @param[in]  op             CeedOperator to assemble point block diagonal
366   @param[in]  request        Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
367   @param[in]  is_point_block Boolean flag to assemble diagonal or point block diagonal
368   @param[out] assembled      CeedVector to store assembled diagonal
369 
370   @return An error code: 0 - success, otherwise - failure
371 
372   @ref Developer
373 **/
374 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_point_block,
375                                                                  CeedVector assembled) {
376   CeedInt       num_sub;
377   CeedOperator *suboperators;
378 
379   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
380   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
381   for (CeedInt i = 0; i < num_sub; i++) {
382     if (is_point_block) {
383       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
384     } else {
385       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
386     }
387   }
388   return CEED_ERROR_SUCCESS;
389 }
390 
391 /**
392   @brief Build nonzero pattern for non-composite operator
393 
394   Users should generally use CeedOperatorLinearAssembleSymbolic()
395 
396   @param[in]  op     CeedOperator to assemble nonzero pattern
397   @param[in]  offset Offset for number of entries
398   @param[out] rows   Row number for each entry
399   @param[out] cols   Column number for each entry
400 
401   @return An error code: 0 - success, otherwise - failure
402 
403   @ref Developer
404 **/
405 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
406   Ceed                ceed;
407   bool                is_composite;
408   CeedSize            num_nodes_in, num_nodes_out, count = 0;
409   CeedInt             num_elem_in, elem_size_in, num_comp_in, layout_er_in[3];
410   CeedInt             num_elem_out, elem_size_out, num_comp_out, layout_er_out[3], local_num_entries;
411   CeedScalar         *array;
412   const CeedScalar   *elem_dof_a_in, *elem_dof_a_out;
413   CeedVector          index_vec_in, index_vec_out, elem_dof_in, elem_dof_out;
414   CeedElemRestriction elem_rstr_in, elem_rstr_out, index_elem_rstr_in, index_elem_rstr_out;
415 
416   CeedCall(CeedOperatorGetCeed(op, &ceed));
417   CeedCall(CeedOperatorIsComposite(op, &is_composite));
418   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
419 
420   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes_in, &num_nodes_out));
421   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
422   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
423   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
424   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
425   CeedCall(CeedElemRestrictionGetELayout(elem_rstr_in, &layout_er_in));
426 
427   // Determine elem_dof relation for input
428   CeedCall(CeedVectorCreate(ceed, num_nodes_in, &index_vec_in));
429   CeedCall(CeedVectorGetArrayWrite(index_vec_in, CEED_MEM_HOST, &array));
430   for (CeedInt i = 0; i < num_nodes_in; i++) array[i] = i;
431   CeedCall(CeedVectorRestoreArray(index_vec_in, &array));
432   CeedCall(CeedVectorCreate(ceed, num_elem_in * elem_size_in * num_comp_in, &elem_dof_in));
433   CeedCall(CeedVectorSetValue(elem_dof_in, 0.0));
434   CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_in, &index_elem_rstr_in));
435   CeedCall(CeedElemRestrictionApply(index_elem_rstr_in, CEED_NOTRANSPOSE, index_vec_in, elem_dof_in, CEED_REQUEST_IMMEDIATE));
436   CeedCall(CeedVectorGetArrayRead(elem_dof_in, CEED_MEM_HOST, &elem_dof_a_in));
437   CeedCall(CeedVectorDestroy(&index_vec_in));
438   CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_in));
439 
440   if (elem_rstr_in != elem_rstr_out) {
441     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
442     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
443               "Active input and output operator restrictions must have the same number of elements");
444     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
445     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
446     CeedCall(CeedElemRestrictionGetELayout(elem_rstr_out, &layout_er_out));
447 
448     // Determine elem_dof relation for output
449     CeedCall(CeedVectorCreate(ceed, num_nodes_out, &index_vec_out));
450     CeedCall(CeedVectorGetArrayWrite(index_vec_out, CEED_MEM_HOST, &array));
451     for (CeedInt i = 0; i < num_nodes_out; i++) array[i] = i;
452     CeedCall(CeedVectorRestoreArray(index_vec_out, &array));
453     CeedCall(CeedVectorCreate(ceed, num_elem_out * elem_size_out * num_comp_out, &elem_dof_out));
454     CeedCall(CeedVectorSetValue(elem_dof_out, 0.0));
455     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr_out, &index_elem_rstr_out));
456     CeedCall(CeedElemRestrictionApply(index_elem_rstr_out, CEED_NOTRANSPOSE, index_vec_out, elem_dof_out, CEED_REQUEST_IMMEDIATE));
457     CeedCall(CeedVectorGetArrayRead(elem_dof_out, CEED_MEM_HOST, &elem_dof_a_out));
458     CeedCall(CeedVectorDestroy(&index_vec_out));
459     CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr_out));
460   } else {
461     num_elem_out     = num_elem_in;
462     elem_size_out    = elem_size_in;
463     num_comp_out     = num_comp_in;
464     layout_er_out[0] = layout_er_in[0];
465     layout_er_out[1] = layout_er_in[1];
466     layout_er_out[2] = layout_er_in[2];
467     elem_dof_a_out   = elem_dof_a_in;
468   }
469   local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
470 
471   // Determine i, j locations for element matrices
472   for (CeedInt e = 0; e < num_elem_in; e++) {
473     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
474       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
475         for (CeedInt i = 0; i < elem_size_out; i++) {
476           for (CeedInt j = 0; j < elem_size_in; j++) {
477             const CeedInt elem_dof_index_row = i * layout_er_out[0] + comp_out * layout_er_out[1] + e * layout_er_out[2];
478             const CeedInt elem_dof_index_col = j * layout_er_in[0] + comp_in * layout_er_in[1] + e * layout_er_in[2];
479             const CeedInt row                = elem_dof_a_out[elem_dof_index_row];
480             const CeedInt col                = elem_dof_a_in[elem_dof_index_col];
481 
482             rows[offset + count] = row;
483             cols[offset + count] = col;
484             count++;
485           }
486         }
487       }
488     }
489   }
490   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
491   CeedCall(CeedVectorRestoreArrayRead(elem_dof_in, &elem_dof_a_in));
492   CeedCall(CeedVectorDestroy(&elem_dof_in));
493   if (elem_rstr_in != elem_rstr_out) {
494     CeedCall(CeedVectorRestoreArrayRead(elem_dof_out, &elem_dof_a_out));
495     CeedCall(CeedVectorDestroy(&elem_dof_out));
496   }
497   return CEED_ERROR_SUCCESS;
498 }
499 
500 /**
501   @brief Assemble nonzero entries for non-composite operator
502 
503   Users should generally use CeedOperatorLinearAssemble()
504 
505   @param[in]  op     CeedOperator to assemble
506   @param[in]  offset Offset for number of entries
507   @param[out] values Values to assemble into matrix
508 
509   @return An error code: 0 - success, otherwise - failure
510 
511   @ref Developer
512 **/
513 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
514   Ceed ceed;
515   bool is_composite;
516 
517   CeedCall(CeedOperatorGetCeed(op, &ceed));
518   CeedCall(CeedOperatorIsComposite(op, &is_composite));
519   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
520 
521   // Early exit for empty operator
522   {
523     CeedInt num_elem = 0;
524 
525     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
526     if (num_elem == 0) return CEED_ERROR_SUCCESS;
527   }
528 
529   if (op->LinearAssembleSingle) {
530     // Backend version
531     CeedCall(op->LinearAssembleSingle(op, offset, values));
532     return CEED_ERROR_SUCCESS;
533   } else {
534     // Operator fallback
535     CeedOperator op_fallback;
536 
537     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
538     if (op_fallback) {
539       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
540       return CEED_ERROR_SUCCESS;
541     }
542   }
543 
544   // Assemble QFunction
545   CeedInt             layout_qf[3];
546   const CeedScalar   *assembled_qf_array;
547   CeedVector          assembled_qf        = NULL;
548   CeedElemRestriction assembled_elem_rstr = NULL;
549 
550   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, CEED_REQUEST_IMMEDIATE));
551   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout_qf));
552   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
553   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
554 
555   // Get assembly data
556   CeedInt                  num_elem_in, elem_size_in, num_comp_in, num_qpts_in;
557   CeedInt                  num_elem_out, elem_size_out, num_comp_out, num_qpts_out, local_num_entries;
558   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
559   CeedInt                  num_active_bases_in, *num_eval_modes_in, num_active_bases_out, *num_eval_modes_out;
560   CeedBasis               *active_bases_in, *active_bases_out, basis_in, basis_out;
561   const CeedScalar       **B_mats_in, **B_mats_out, *B_mat_in, *B_mat_out;
562   CeedElemRestriction      elem_rstr_in, elem_rstr_out;
563   CeedRestrictionType      elem_rstr_type_in, elem_rstr_type_out;
564   const bool              *elem_rstr_orients_in = NULL, *elem_rstr_orients_out = NULL;
565   const CeedInt8          *elem_rstr_curl_orients_in = NULL, *elem_rstr_curl_orients_out = NULL;
566   CeedOperatorAssemblyData data;
567 
568   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
569   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases_in, &num_eval_modes_in, &eval_modes_in, NULL, &num_active_bases_out,
570                                                 &num_eval_modes_out, &eval_modes_out, NULL, NULL));
571 
572   CeedCheck(num_active_bases_in == num_active_bases_out && num_active_bases_in == 1, ceed, CEED_ERROR_UNSUPPORTED,
573             "Cannot assemble operator with multiple active bases");
574   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator without inputs/outputs");
575 
576   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases_in, &B_mats_in, NULL, &active_bases_out, &B_mats_out));
577   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &elem_rstr_in, &elem_rstr_out));
578   basis_in  = active_bases_in[0];
579   basis_out = active_bases_out[0];
580   B_mat_in  = B_mats_in[0];
581   B_mat_out = B_mats_out[0];
582 
583   CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_in, &num_elem_in));
584   CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_in, &elem_size_in));
585   CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_in, &num_comp_in));
586   if (basis_in == CEED_BASIS_NONE) num_qpts_in = elem_size_in;
587   else CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts_in));
588 
589   CeedCall(CeedElemRestrictionGetType(elem_rstr_in, &elem_rstr_type_in));
590   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
591     CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_orients_in));
592   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
593     CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_in, CEED_MEM_HOST, &elem_rstr_curl_orients_in));
594   }
595 
596   if (elem_rstr_in != elem_rstr_out) {
597     CeedCall(CeedElemRestrictionGetNumElements(elem_rstr_out, &num_elem_out));
598     CeedCheck(num_elem_in == num_elem_out, ceed, CEED_ERROR_UNSUPPORTED,
599               "Active input and output operator restrictions must have the same number of elements");
600     CeedCall(CeedElemRestrictionGetElementSize(elem_rstr_out, &elem_size_out));
601     CeedCall(CeedElemRestrictionGetNumComponents(elem_rstr_out, &num_comp_out));
602     if (basis_out == CEED_BASIS_NONE) num_qpts_out = elem_size_out;
603     else CeedCall(CeedBasisGetNumQuadraturePoints(basis_out, &num_qpts_out));
604     CeedCheck(num_qpts_in == num_qpts_out, ceed, CEED_ERROR_UNSUPPORTED,
605               "Active input and output bases must have the same number of quadrature points");
606 
607     CeedCall(CeedElemRestrictionGetType(elem_rstr_out, &elem_rstr_type_out));
608     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
609       CeedCall(CeedElemRestrictionGetOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_orients_out));
610     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
611       CeedCall(CeedElemRestrictionGetCurlOrientations(elem_rstr_out, CEED_MEM_HOST, &elem_rstr_curl_orients_out));
612     }
613   } else {
614     num_elem_out  = num_elem_in;
615     elem_size_out = elem_size_in;
616     num_comp_out  = num_comp_in;
617     num_qpts_out  = num_qpts_in;
618 
619     elem_rstr_orients_out      = elem_rstr_orients_in;
620     elem_rstr_curl_orients_out = elem_rstr_curl_orients_in;
621   }
622   local_num_entries = elem_size_out * num_comp_out * elem_size_in * num_comp_in * num_elem_in;
623 
624   // Loop over elements and put in data structure
625   // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
626   CeedSize    count = 0;
627   CeedScalar *vals, *BTD_mat = NULL, *elem_mat = NULL, *elem_mat_b = NULL;
628 
629   CeedCall(CeedCalloc(elem_size_out * num_qpts_in * num_eval_modes_in[0], &BTD_mat));
630   CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat));
631   if (elem_rstr_curl_orients_in || elem_rstr_curl_orients_out) CeedCall(CeedCalloc(elem_size_out * elem_size_in, &elem_mat_b));
632 
633   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
634   for (CeedSize e = 0; e < num_elem_in; e++) {
635     for (CeedInt comp_in = 0; comp_in < num_comp_in; comp_in++) {
636       for (CeedInt comp_out = 0; comp_out < num_comp_out; comp_out++) {
637         // Compute B^T*D
638         for (CeedSize n = 0; n < elem_size_out; n++) {
639           for (CeedSize q = 0; q < num_qpts_in; q++) {
640             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
641               const CeedSize btd_index = n * (num_qpts_in * num_eval_modes_in[0]) + q * num_eval_modes_in[0] + e_in;
642               CeedScalar     sum       = 0.0;
643 
644               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
645                 const CeedSize b_out_index     = (q * num_eval_modes_out[0] + e_out) * elem_size_out + n;
646                 const CeedSize eval_mode_index = ((e_in * num_comp_in + comp_in) * num_eval_modes_out[0] + e_out) * num_comp_out + comp_out;
647                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
648 
649                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
650               }
651               BTD_mat[btd_index] = sum;
652             }
653           }
654         }
655 
656         // Form element matrix itself (for each block component)
657         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size_out, elem_size_in, num_qpts_in * num_eval_modes_in[0]));
658 
659         // Transform the element matrix if required
660         if (elem_rstr_orients_out) {
661           const bool *elem_orients = &elem_rstr_orients_out[e * elem_size_out];
662 
663           for (CeedInt i = 0; i < elem_size_out; i++) {
664             const double orient = elem_orients[i] ? -1.0 : 1.0;
665 
666             for (CeedInt j = 0; j < elem_size_in; j++) {
667               elem_mat[i * elem_size_in + j] *= orient;
668             }
669           }
670         } else if (elem_rstr_curl_orients_out) {
671           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_out[e * 3 * elem_size_out];
672 
673           // T^T*(B^T*D*B)
674           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
675           for (CeedInt i = 0; i < elem_size_out; i++) {
676             for (CeedInt j = 0; j < elem_size_in; j++) {
677               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * i + 1] +
678                                                (i > 0 ? elem_mat_b[(i - 1) * elem_size_in + j] * elem_curl_orients[3 * i - 1] : 0.0) +
679                                                (i < elem_size_out - 1 ? elem_mat_b[(i + 1) * elem_size_in + j] * elem_curl_orients[3 * i + 3] : 0.0);
680             }
681           }
682         }
683         if (elem_rstr_orients_in) {
684           const bool *elem_orients = &elem_rstr_orients_in[e * elem_size_in];
685 
686           for (CeedInt i = 0; i < elem_size_out; i++) {
687             for (CeedInt j = 0; j < elem_size_in; j++) {
688               elem_mat[i * elem_size_in + j] *= elem_orients[j] ? -1.0 : 1.0;
689             }
690           }
691         } else if (elem_rstr_curl_orients_in) {
692           const CeedInt8 *elem_curl_orients = &elem_rstr_curl_orients_in[e * 3 * elem_size_in];
693 
694           // (B^T*D*B)*T
695           memcpy(elem_mat_b, elem_mat, elem_size_out * elem_size_in * sizeof(CeedScalar));
696           for (CeedInt i = 0; i < elem_size_out; i++) {
697             for (CeedInt j = 0; j < elem_size_in; j++) {
698               elem_mat[i * elem_size_in + j] = elem_mat_b[i * elem_size_in + j] * elem_curl_orients[3 * j + 1] +
699                                                (j > 0 ? elem_mat_b[i * elem_size_in + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) +
700                                                (j < elem_size_in - 1 ? elem_mat_b[i * elem_size_in + j + 1] * elem_curl_orients[3 * j + 3] : 0.0);
701             }
702           }
703         }
704 
705         // Put element matrix in coordinate data structure
706         for (CeedInt i = 0; i < elem_size_out; i++) {
707           for (CeedInt j = 0; j < elem_size_in; j++) {
708             vals[offset + count] = elem_mat[i * elem_size_in + j];
709             count++;
710           }
711         }
712       }
713     }
714   }
715   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries");
716   CeedCall(CeedVectorRestoreArray(values, &vals));
717 
718   // Cleanup
719   CeedCall(CeedFree(&BTD_mat));
720   CeedCall(CeedFree(&elem_mat));
721   CeedCall(CeedFree(&elem_mat_b));
722   if (elem_rstr_type_in == CEED_RESTRICTION_ORIENTED) {
723     CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_in, &elem_rstr_orients_in));
724   } else if (elem_rstr_type_in == CEED_RESTRICTION_CURL_ORIENTED) {
725     CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_in, &elem_rstr_curl_orients_in));
726   }
727   if (elem_rstr_in != elem_rstr_out) {
728     if (elem_rstr_type_out == CEED_RESTRICTION_ORIENTED) {
729       CeedCall(CeedElemRestrictionRestoreOrientations(elem_rstr_out, &elem_rstr_orients_out));
730     } else if (elem_rstr_type_out == CEED_RESTRICTION_CURL_ORIENTED) {
731       CeedCall(CeedElemRestrictionRestoreCurlOrientations(elem_rstr_out, &elem_rstr_curl_orients_out));
732     }
733   }
734   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
735   CeedCall(CeedVectorDestroy(&assembled_qf));
736   return CEED_ERROR_SUCCESS;
737 }
738 
739 /**
740   @brief Count number of entries for assembled CeedOperator
741 
742   @param[in]  op          CeedOperator to assemble
743   @param[out] num_entries Number of entries in assembled representation
744 
745   @return An error code: 0 - success, otherwise - failure
746 
747   @ref Utility
748 **/
749 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) {
750   bool                is_composite;
751   CeedInt             num_elem_in, elem_size_in, num_comp_in, num_elem_out, elem_size_out, num_comp_out;
752   CeedElemRestriction rstr_in, rstr_out;
753 
754   CeedCall(CeedOperatorIsComposite(op, &is_composite));
755   CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
756 
757   CeedCall(CeedOperatorGetActiveElemRestrictions(op, &rstr_in, &rstr_out));
758   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem_in));
759   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size_in));
760   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp_in));
761   if (rstr_in != rstr_out) {
762     CeedCall(CeedElemRestrictionGetNumElements(rstr_out, &num_elem_out));
763     CeedCheck(num_elem_in == num_elem_out, op->ceed, CEED_ERROR_UNSUPPORTED,
764               "Active input and output operator restrictions must have the same number of elements");
765     CeedCall(CeedElemRestrictionGetElementSize(rstr_out, &elem_size_out));
766     CeedCall(CeedElemRestrictionGetNumComponents(rstr_out, &num_comp_out));
767   } else {
768     num_elem_out  = num_elem_in;
769     elem_size_out = elem_size_in;
770     num_comp_out  = num_comp_in;
771   }
772   *num_entries = (CeedSize)elem_size_in * num_comp_in * elem_size_out * num_comp_out * num_elem_in;
773   return CEED_ERROR_SUCCESS;
774 }
775 
776 /**
777   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
778 
779   @param[in]  op_fine      Fine grid operator
780   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
781   @param[in]  rstr_coarse  Coarse grid restriction
782   @param[in]  basis_coarse Coarse grid active vector basis
783   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
784   @param[out] op_coarse    Coarse grid operator
785   @param[out] op_prolong   Coarse to fine operator, or NULL
786   @param[out] op_restrict  Fine to coarse operator, or NULL
787 
788   @return An error code: 0 - success, otherwise - failure
789 
790   @ref Developer
791 **/
792 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
793                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
794   bool                is_composite;
795   Ceed                ceed;
796   CeedInt             num_comp;
797   CeedVector          mult_vec         = NULL;
798   CeedElemRestriction rstr_p_mult_fine = NULL, rstr_fine = NULL;
799 
800   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
801 
802   // Check for composite operator
803   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
804   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
805 
806   // Coarse Grid
807   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
808   // -- Clone input fields
809   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
810     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
811       rstr_fine = op_fine->input_fields[i]->elem_rstr;
812       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
813     } else {
814       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
815                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
816     }
817   }
818   // -- Clone output fields
819   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
820     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
821       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
822     } else {
823       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
824                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
825     }
826   }
827   // -- Clone QFunctionAssemblyData
828   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
829 
830   // Multiplicity vector
831   if (op_restrict || op_prolong) {
832     CeedVector          mult_e_vec;
833     CeedRestrictionType rstr_type;
834 
835     CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type));
836     CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED,
837               "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported");
838     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
839     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
840     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
841     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
842     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
843     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
844     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
845     CeedCall(CeedVectorDestroy(&mult_e_vec));
846     CeedCall(CeedVectorReciprocal(mult_vec));
847   }
848 
849   // Clone name
850   bool   has_name = op_fine->name;
851   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
852   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
853 
854   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
855   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
856             "Prolongation or restriction operator creation requires coarse-to-fine basis");
857 
858   // Restriction/Prolongation Operators
859   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
860 
861   // Restriction
862   if (op_restrict) {
863     CeedInt             *num_comp_r_data;
864     CeedQFunctionContext ctx_r;
865     CeedQFunction        qf_restrict;
866 
867     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
868     CeedCall(CeedCalloc(1, &num_comp_r_data));
869     num_comp_r_data[0] = num_comp;
870     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
871     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
872     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
873     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
874     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
875     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
876     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
877     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
878 
879     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
880     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
881     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
882     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
883 
884     // Set name
885     char *restriction_name;
886 
887     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
888     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
889     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
890     CeedCall(CeedFree(&restriction_name));
891 
892     // Check
893     CeedCall(CeedOperatorCheckReady(*op_restrict));
894 
895     // Cleanup
896     CeedCall(CeedQFunctionDestroy(&qf_restrict));
897   }
898 
899   // Prolongation
900   if (op_prolong) {
901     CeedInt             *num_comp_p_data;
902     CeedQFunctionContext ctx_p;
903     CeedQFunction        qf_prolong;
904 
905     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
906     CeedCall(CeedCalloc(1, &num_comp_p_data));
907     num_comp_p_data[0] = num_comp;
908     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
909     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
910     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
911     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
912     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
913     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
914     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
915     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
916 
917     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
918     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
919     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_NONE, mult_vec));
920     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE));
921 
922     // Set name
923     char *prolongation_name;
924 
925     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
926     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
927     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
928     CeedCall(CeedFree(&prolongation_name));
929 
930     // Check
931     CeedCall(CeedOperatorCheckReady(*op_prolong));
932 
933     // Cleanup
934     CeedCall(CeedQFunctionDestroy(&qf_prolong));
935   }
936 
937   // Check
938   CeedCall(CeedOperatorCheckReady(*op_coarse));
939 
940   // Cleanup
941   CeedCall(CeedVectorDestroy(&mult_vec));
942   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
943   CeedCall(CeedBasisDestroy(&basis_c_to_f));
944   return CEED_ERROR_SUCCESS;
945 }
946 
947 /**
948   @brief Build 1D mass matrix and Laplacian with perturbation
949 
950   @param[in]  interp_1d   Interpolation matrix in one dimension
951   @param[in]  grad_1d     Gradient matrix in one dimension
952   @param[in]  q_weight_1d Quadrature weights in one dimension
953   @param[in]  P_1d        Number of basis nodes in one dimension
954   @param[in]  Q_1d        Number of quadrature points in one dimension
955   @param[in]  dim         Dimension of basis
956   @param[out] mass        Assembled mass matrix in one dimension
957   @param[out] laplace     Assembled perturbed Laplacian in one dimension
958 
959   @return An error code: 0 - success, otherwise - failure
960 
961   @ref Developer
962 **/
963 CeedPragmaOptimizeOff
964 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
965                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
966   for (CeedInt i = 0; i < P_1d; i++) {
967     for (CeedInt j = 0; j < P_1d; j++) {
968       CeedScalar sum = 0.0;
969       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
970       mass[i + j * P_1d] = sum;
971     }
972   }
973   // -- Laplacian
974   for (CeedInt i = 0; i < P_1d; i++) {
975     for (CeedInt j = 0; j < P_1d; j++) {
976       CeedScalar sum = 0.0;
977 
978       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
979       laplace[i + j * P_1d] = sum;
980     }
981   }
982   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
983   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
984   return CEED_ERROR_SUCCESS;
985 }
986 CeedPragmaOptimizeOn
987 
988 /// @}
989 
990 /// ----------------------------------------------------------------------------
991 /// CeedOperator Backend API
992 /// ----------------------------------------------------------------------------
993 /// @addtogroup CeedOperatorBackend
994 /// @{
995 
996 /**
997   @brief Create point block restriction for active operator field
998 
999   @param[in]  rstr             Original CeedElemRestriction for active field
1000   @param[out] point_block_rstr Address of the variable where the newly created CeedElemRestriction will be stored
1001 
1002   @return An error code: 0 - success, otherwise - failure
1003 
1004   @ref Backend
1005 **/
1006 int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *point_block_rstr) {
1007   Ceed           ceed;
1008   CeedInt        num_elem, num_comp, shift, elem_size, comp_stride, *point_block_offsets;
1009   CeedSize       l_size;
1010   const CeedInt *offsets;
1011 
1012   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
1013   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
1014 
1015   // Expand offsets
1016   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
1017   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
1018   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
1019   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
1020   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
1021   shift = num_comp;
1022   if (comp_stride != 1) shift *= num_comp;
1023   CeedCall(CeedCalloc(num_elem * elem_size, &point_block_offsets));
1024   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
1025     point_block_offsets[i] = offsets[i] * shift;
1026   }
1027 
1028   // Create new restriction
1029   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
1030                                      point_block_offsets, point_block_rstr));
1031 
1032   // Cleanup
1033   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
1034   return CEED_ERROR_SUCCESS;
1035 }
1036 
1037 /**
1038   @brief Create object holding CeedQFunction assembly data for CeedOperator
1039 
1040   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
1041   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
1042 
1043   @return An error code: 0 - success, otherwise - failure
1044 
1045   @ref Backend
1046 **/
1047 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
1048   CeedCall(CeedCalloc(1, data));
1049   (*data)->ref_count = 1;
1050   (*data)->ceed      = ceed;
1051   CeedCall(CeedReference(ceed));
1052   return CEED_ERROR_SUCCESS;
1053 }
1054 
1055 /**
1056   @brief Increment the reference counter for a CeedQFunctionAssemblyData
1057 
1058   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
1059 
1060   @return An error code: 0 - success, otherwise - failure
1061 
1062   @ref Backend
1063 **/
1064 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1065   data->ref_count++;
1066   return CEED_ERROR_SUCCESS;
1067 }
1068 
1069 /**
1070   @brief Set re-use of CeedQFunctionAssemblyData
1071 
1072   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
1073   @param[in]     reuse_data Boolean flag indicating data re-use
1074 
1075   @return An error code: 0 - success, otherwise - failure
1076 
1077   @ref Backend
1078 **/
1079 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
1080   data->reuse_data        = reuse_data;
1081   data->needs_data_update = true;
1082   return CEED_ERROR_SUCCESS;
1083 }
1084 
1085 /**
1086   @brief Mark QFunctionAssemblyData as stale
1087 
1088   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
1089   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
1090 
1091   @return An error code: 0 - success, otherwise - failure
1092 
1093   @ref Backend
1094 **/
1095 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
1096   data->needs_data_update = needs_data_update;
1097   return CEED_ERROR_SUCCESS;
1098 }
1099 
1100 /**
1101   @brief Determine if QFunctionAssemblyData needs update
1102 
1103   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
1104   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1105 
1106   @return An error code: 0 - success, otherwise - failure
1107 
1108   @ref Backend
1109 **/
1110 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1111   *is_update_needed = !data->reuse_data || data->needs_data_update;
1112   return CEED_ERROR_SUCCESS;
1113 }
1114 
1115 /**
1116   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1117 
1118   Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1119 
1120   Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
1121         CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
1122         CeedQFunctionAssemblyData.
1123 
1124   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1125   @param[in,out] data_copy Variable to store copied reference
1126 
1127   @return An error code: 0 - success, otherwise - failure
1128 
1129   @ref Backend
1130 **/
1131 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1132   CeedCall(CeedQFunctionAssemblyDataReference(data));
1133   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1134   *data_copy = data;
1135   return CEED_ERROR_SUCCESS;
1136 }
1137 
1138 /**
1139   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1140 
1141   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1142   @param[out] is_setup Boolean flag for setup status
1143 
1144   @return An error code: 0 - success, otherwise - failure
1145 
1146   @ref Backend
1147 **/
1148 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1149   *is_setup = data->is_setup;
1150   return CEED_ERROR_SUCCESS;
1151 }
1152 
1153 /**
1154   @brief Set internal objects for CeedQFunctionAssemblyData
1155 
1156   @param[in,out] data CeedQFunctionAssemblyData to set objects
1157   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1158   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1159 
1160   @return An error code: 0 - success, otherwise - failure
1161 
1162   @ref Backend
1163 **/
1164 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1165   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1166   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1167 
1168   data->is_setup = true;
1169   return CEED_ERROR_SUCCESS;
1170 }
1171 
1172 /**
1173   @brief Get internal objects for CeedQFunctionAssemblyData
1174 
1175   @param[in,out] data CeedQFunctionAssemblyData to set objects
1176   @param[out]    vec  CeedVector to store assembled CeedQFunction at quadrature points
1177   @param[out]    rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1178 
1179   @return An error code: 0 - success, otherwise - failure
1180 
1181   @ref Backend
1182 **/
1183 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1184   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1185 
1186   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1187   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1188   return CEED_ERROR_SUCCESS;
1189 }
1190 
1191 /**
1192   @brief Destroy CeedQFunctionAssemblyData
1193 
1194   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1195 
1196   @return An error code: 0 - success, otherwise - failure
1197 
1198   @ref Backend
1199 **/
1200 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1201   if (!*data || --(*data)->ref_count > 0) {
1202     *data = NULL;
1203     return CEED_ERROR_SUCCESS;
1204   }
1205   CeedCall(CeedDestroy(&(*data)->ceed));
1206   CeedCall(CeedVectorDestroy(&(*data)->vec));
1207   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1208 
1209   CeedCall(CeedFree(data));
1210   return CEED_ERROR_SUCCESS;
1211 }
1212 
1213 /**
1214   @brief Get CeedOperatorAssemblyData
1215 
1216   @param[in]  op   CeedOperator to assemble
1217   @param[out] data CeedQFunctionAssemblyData
1218 
1219   @return An error code: 0 - success, otherwise - failure
1220 
1221   @ref Backend
1222 **/
1223 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1224   if (!op->op_assembled) {
1225     CeedOperatorAssemblyData data;
1226 
1227     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1228     op->op_assembled = data;
1229   }
1230   *data = op->op_assembled;
1231   return CEED_ERROR_SUCCESS;
1232 }
1233 
1234 /**
1235   @brief Create object holding CeedOperator assembly data.
1236 
1237   The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1238   An array with references to the corresponding active CeedElemRestrictions is also stored.
1239   For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1240   The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1241 CeedEvalMode.
1242   The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1243   Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1244 
1245   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1246   @param[in]  op   CeedOperator to be assembled
1247   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1248 
1249   @return An error code: 0 - success, otherwise - failure
1250 
1251   @ref Backend
1252 **/
1253 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1254   CeedInt             num_active_bases_in = 0, num_active_bases_out = 0, offset = 0;
1255   CeedInt             num_input_fields, *num_eval_modes_in = NULL, num_output_fields, *num_eval_modes_out = NULL;
1256   CeedSize          **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1257   CeedEvalMode      **eval_modes_in = NULL, **eval_modes_out = NULL;
1258   CeedQFunctionField *qf_fields;
1259   CeedQFunction       qf;
1260   CeedOperatorField  *op_fields;
1261   bool                is_composite;
1262 
1263   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1264   CeedCheck(!is_composite, ceed, CEED_ERROR_INCOMPATIBLE, "Can only create CeedOperator assembly data for non-composite operators.");
1265 
1266   // Allocate
1267   CeedCall(CeedCalloc(1, data));
1268   (*data)->ceed = ceed;
1269   CeedCall(CeedReference(ceed));
1270 
1271   // Build OperatorAssembly data
1272   CeedCall(CeedOperatorGetQFunction(op, &qf));
1273   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1274   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1275 
1276   // Determine active input basis
1277   for (CeedInt i = 0; i < num_input_fields; i++) {
1278     CeedVector vec;
1279 
1280     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1281     if (vec == CEED_VECTOR_ACTIVE) {
1282       CeedInt      index = -1, num_comp, q_comp;
1283       CeedEvalMode eval_mode;
1284       CeedBasis    basis_in = NULL;
1285 
1286       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1287       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1288       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1289       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1290       for (CeedInt i = 0; i < num_active_bases_in; i++) {
1291         if ((*data)->active_bases_in[i] == basis_in) index = i;
1292       }
1293       if (index == -1) {
1294         CeedElemRestriction elem_rstr_in;
1295 
1296         index = num_active_bases_in;
1297         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_bases_in));
1298         (*data)->active_bases_in[num_active_bases_in] = NULL;
1299         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases_in[num_active_bases_in]));
1300         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->active_elem_rstrs_in));
1301         (*data)->active_elem_rstrs_in[num_active_bases_in] = NULL;
1302         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1303         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs_in[num_active_bases_in]));
1304         CeedCall(CeedRealloc(num_active_bases_in + 1, &num_eval_modes_in));
1305         num_eval_modes_in[index] = 0;
1306         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_modes_in));
1307         eval_modes_in[index] = NULL;
1308         CeedCall(CeedRealloc(num_active_bases_in + 1, &eval_mode_offsets_in));
1309         eval_mode_offsets_in[index] = NULL;
1310         CeedCall(CeedRealloc(num_active_bases_in + 1, &(*data)->assembled_bases_in));
1311         (*data)->assembled_bases_in[index] = NULL;
1312         num_active_bases_in++;
1313       }
1314       if (eval_mode != CEED_EVAL_WEIGHT) {
1315         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1316         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1317         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1318         for (CeedInt d = 0; d < q_comp; d++) {
1319           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1320           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1321           offset += num_comp;
1322         }
1323         num_eval_modes_in[index] += q_comp;
1324       }
1325     }
1326   }
1327 
1328   // Determine active output basis
1329   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1330   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1331   offset = 0;
1332   for (CeedInt i = 0; i < num_output_fields; i++) {
1333     CeedVector vec;
1334 
1335     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1336     if (vec == CEED_VECTOR_ACTIVE) {
1337       CeedInt      index = -1, num_comp, q_comp;
1338       CeedEvalMode eval_mode;
1339       CeedBasis    basis_out = NULL;
1340 
1341       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1342       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1343       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1344       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1345       for (CeedInt i = 0; i < num_active_bases_out; i++) {
1346         if ((*data)->active_bases_out[i] == basis_out) index = i;
1347       }
1348       if (index == -1) {
1349         CeedElemRestriction elem_rstr_out;
1350 
1351         index = num_active_bases_out;
1352         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_bases_out));
1353         (*data)->active_bases_out[num_active_bases_out] = NULL;
1354         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases_out[num_active_bases_out]));
1355         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->active_elem_rstrs_out));
1356         (*data)->active_elem_rstrs_out[num_active_bases_out] = NULL;
1357         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1358         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs_out[num_active_bases_out]));
1359         CeedCall(CeedRealloc(num_active_bases_out + 1, &num_eval_modes_out));
1360         num_eval_modes_out[index] = 0;
1361         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_modes_out));
1362         eval_modes_out[index] = NULL;
1363         CeedCall(CeedRealloc(num_active_bases_out + 1, &eval_mode_offsets_out));
1364         eval_mode_offsets_out[index] = NULL;
1365         CeedCall(CeedRealloc(num_active_bases_out + 1, &(*data)->assembled_bases_out));
1366         (*data)->assembled_bases_out[index] = NULL;
1367         num_active_bases_out++;
1368       }
1369       if (eval_mode != CEED_EVAL_WEIGHT) {
1370         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1371         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1372         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1373         for (CeedInt d = 0; d < q_comp; d++) {
1374           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1375           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1376           offset += num_comp;
1377         }
1378         num_eval_modes_out[index] += q_comp;
1379       }
1380     }
1381   }
1382   (*data)->num_active_bases_in   = num_active_bases_in;
1383   (*data)->num_eval_modes_in     = num_eval_modes_in;
1384   (*data)->eval_modes_in         = eval_modes_in;
1385   (*data)->eval_mode_offsets_in  = eval_mode_offsets_in;
1386   (*data)->num_active_bases_out  = num_active_bases_out;
1387   (*data)->num_eval_modes_out    = num_eval_modes_out;
1388   (*data)->eval_modes_out        = eval_modes_out;
1389   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1390   (*data)->num_output_components = offset;
1391   return CEED_ERROR_SUCCESS;
1392 }
1393 
1394 /**
1395   @brief Get CeedOperator CeedEvalModes for assembly.
1396 
1397   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1398 
1399   @param[in]  data                  CeedOperatorAssemblyData
1400   @param[out] num_active_bases_in   Total number of active bases for input
1401   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1402                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1403   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1404   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1405   @param[out] num_active_bases_out  Total number of active bases for output
1406   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1407   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1408   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1409   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1410                                       including contributions of all active bases
1411 
1412   @return An error code: 0 - success, otherwise - failure
1413 
1414   @ref Backend
1415 **/
1416 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedInt **num_eval_modes_in,
1417                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt *num_active_bases_out,
1418                                          CeedInt **num_eval_modes_out, const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out,
1419                                          CeedSize *num_output_components) {
1420   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1421   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1422   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1423   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1424   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1425   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1426   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1427   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1428   if (num_output_components) *num_output_components = data->num_output_components;
1429   return CEED_ERROR_SUCCESS;
1430 }
1431 
1432 /**
1433   @brief Get CeedOperator CeedBasis data for assembly.
1434 
1435   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1436 
1437   @param[in]  data                 CeedOperatorAssemblyData
1438   @param[out] num_active_bases_in  Number of active input bases, or NULL
1439   @param[out] active_bases_in      Pointer to hold active input CeedBasis, or NULL
1440   @param[out] assembled_bases_in   Pointer to hold assembled active input B, or NULL
1441   @param[out] num_active_bases_out Number of active output bases, or NULL
1442   @param[out] active_bases_out     Pointer to hold active output CeedBasis, or NULL
1443   @param[out] assembled_bases_out  Pointer to hold assembled active output B, or NULL
1444 
1445   @return An error code: 0 - success, otherwise - failure
1446 
1447   @ref Backend
1448 **/
1449 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases_in, CeedBasis **active_bases_in,
1450                                      const CeedScalar ***assembled_bases_in, CeedInt *num_active_bases_out, CeedBasis **active_bases_out,
1451                                      const CeedScalar ***assembled_bases_out) {
1452   // Assemble B_in, B_out if needed
1453   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1454     CeedInt num_qpts;
1455 
1456     if (data->active_bases_in[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[0], &num_qpts));
1457     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_in[0], &num_qpts));
1458     for (CeedInt b = 0; b < data->num_active_bases_in; b++) {
1459       bool        has_eval_none = false;
1460       CeedInt     num_nodes;
1461       CeedScalar *B_in = NULL, *identity = NULL;
1462 
1463       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_in[b], &num_nodes));
1464       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1465 
1466       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1467         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1468       }
1469       if (has_eval_none) {
1470         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1471         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1472           identity[i * num_nodes + i] = 1.0;
1473         }
1474       }
1475 
1476       for (CeedInt q = 0; q < num_qpts; q++) {
1477         for (CeedInt n = 0; n < num_nodes; n++) {
1478           CeedInt      d_in              = 0, q_comp_in;
1479           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1480 
1481           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1482             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1483             const CeedScalar *B  = NULL;
1484 
1485             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_in[b], data->eval_modes_in[b][e_in], identity, &B));
1486             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_in[b], data->eval_modes_in[b][e_in], &q_comp_in));
1487             if (q_comp_in > 1) {
1488               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1489               else B = &B[(++d_in) * num_qpts * num_nodes];
1490             }
1491             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1492             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1493           }
1494         }
1495       }
1496       if (identity) CeedCall(CeedFree(&identity));
1497       data->assembled_bases_in[b] = B_in;
1498     }
1499   }
1500 
1501   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1502     CeedInt num_qpts;
1503 
1504     if (data->active_bases_out[0] == CEED_BASIS_NONE) CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[0], &num_qpts));
1505     else CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases_out[0], &num_qpts));
1506     for (CeedInt b = 0; b < data->num_active_bases_out; b++) {
1507       bool        has_eval_none = false;
1508       CeedInt     num_nodes;
1509       CeedScalar *B_out = NULL, *identity = NULL;
1510 
1511       CeedCall(CeedElemRestrictionGetElementSize(data->active_elem_rstrs_out[b], &num_nodes));
1512       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1513 
1514       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1515         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1516       }
1517       if (has_eval_none) {
1518         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1519         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1520           identity[i * num_nodes + i] = 1.0;
1521         }
1522       }
1523 
1524       for (CeedInt q = 0; q < num_qpts; q++) {
1525         for (CeedInt n = 0; n < num_nodes; n++) {
1526           CeedInt      d_out              = 0, q_comp_out;
1527           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1528 
1529           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1530             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1531             const CeedScalar *B  = NULL;
1532 
1533             CeedCall(CeedOperatorGetBasisPointer(data->active_bases_out[b], data->eval_modes_out[b][e_out], identity, &B));
1534             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases_out[b], data->eval_modes_out[b][e_out], &q_comp_out));
1535             if (q_comp_out > 1) {
1536               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1537               else B = &B[(++d_out) * num_qpts * num_nodes];
1538             }
1539             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1540             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1541           }
1542         }
1543       }
1544       if (identity) CeedCall(CeedFree(&identity));
1545       data->assembled_bases_out[b] = B_out;
1546     }
1547   }
1548 
1549   // Pass out assembled data
1550   if (num_active_bases_in) *num_active_bases_in = data->num_active_bases_in;
1551   if (active_bases_in) *active_bases_in = data->active_bases_in;
1552   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1553   if (num_active_bases_out) *num_active_bases_out = data->num_active_bases_out;
1554   if (active_bases_out) *active_bases_out = data->active_bases_out;
1555   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1556   return CEED_ERROR_SUCCESS;
1557 }
1558 
1559 /**
1560   @brief Get CeedOperator CeedBasis data for assembly.
1561 
1562   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1563 
1564   @param[in]  data                      CeedOperatorAssemblyData
1565   @param[out] num_active_elem_rstrs_in  Number of active input element restrictions, or NULL
1566   @param[out] active_elem_rstrs_in      Pointer to hold active input CeedElemRestrictions, or NULL
1567   @param[out] num_active_elem_rstrs_out Number of active output element restrictions, or NULL
1568   @param[out] active_elem_rstrs_out     Pointer to hold active output CeedElemRestrictions, or NULL
1569 
1570   @return An error code: 0 - success, otherwise - failure
1571 
1572   @ref Backend
1573 **/
1574 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs_in,
1575                                                 CeedElemRestriction **active_elem_rstrs_in, CeedInt *num_active_elem_rstrs_out,
1576                                                 CeedElemRestriction **active_elem_rstrs_out) {
1577   if (num_active_elem_rstrs_in) *num_active_elem_rstrs_in = data->num_active_bases_in;
1578   if (active_elem_rstrs_in) *active_elem_rstrs_in = data->active_elem_rstrs_in;
1579   if (num_active_elem_rstrs_out) *num_active_elem_rstrs_out = data->num_active_bases_out;
1580   if (active_elem_rstrs_out) *active_elem_rstrs_out = data->active_elem_rstrs_out;
1581   return CEED_ERROR_SUCCESS;
1582 }
1583 
1584 /**
1585   @brief Destroy CeedOperatorAssemblyData
1586 
1587   @param[in,out] data CeedOperatorAssemblyData to destroy
1588 
1589   @return An error code: 0 - success, otherwise - failure
1590 
1591   @ref Backend
1592 **/
1593 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1594   if (!*data) {
1595     *data = NULL;
1596     return CEED_ERROR_SUCCESS;
1597   }
1598   CeedCall(CeedDestroy(&(*data)->ceed));
1599   for (CeedInt b = 0; b < (*data)->num_active_bases_in; b++) {
1600     CeedCall(CeedBasisDestroy(&(*data)->active_bases_in[b]));
1601     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_in[b]));
1602     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1603     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1604     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1605   }
1606   for (CeedInt b = 0; b < (*data)->num_active_bases_out; b++) {
1607     CeedCall(CeedBasisDestroy(&(*data)->active_bases_out[b]));
1608     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs_out[b]));
1609     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1610     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1611     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1612   }
1613   CeedCall(CeedFree(&(*data)->active_bases_in));
1614   CeedCall(CeedFree(&(*data)->active_bases_out));
1615   CeedCall(CeedFree(&(*data)->active_elem_rstrs_in));
1616   CeedCall(CeedFree(&(*data)->active_elem_rstrs_out));
1617   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1618   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1619   CeedCall(CeedFree(&(*data)->eval_modes_in));
1620   CeedCall(CeedFree(&(*data)->eval_modes_out));
1621   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1622   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1623   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1624   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1625 
1626   CeedCall(CeedFree(data));
1627   return CEED_ERROR_SUCCESS;
1628 }
1629 
1630 /**
1631   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
1632 
1633   @param[in]  op          CeedOperator to retrieve fallback for
1634   @param[out] op_fallback Fallback CeedOperator
1635 
1636   @return An error code: 0 - success, otherwise - failure
1637 
1638   @ref Backend
1639 **/
1640 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
1641   // Create if needed
1642   if (!op->op_fallback) CeedCall(CeedOperatorCreateFallback(op));
1643   if (op->op_fallback) {
1644     bool is_debug;
1645 
1646     CeedCall(CeedIsDebug(op->ceed, &is_debug));
1647     if (is_debug) {
1648       Ceed        ceed, ceed_fallback;
1649       const char *resource, *resource_fallback;
1650 
1651       CeedCall(CeedOperatorGetCeed(op, &ceed));
1652       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
1653       CeedCall(CeedGetResource(ceed, &resource));
1654       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
1655 
1656       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
1657       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
1658                 op->op_fallback);
1659     }
1660   }
1661   *op_fallback = op->op_fallback;
1662   return CEED_ERROR_SUCCESS;
1663 }
1664 
1665 /**
1666   @brief Get the parent CeedOperator for a fallback CeedOperator
1667 
1668   @param[in]  op     CeedOperator context
1669   @param[out] parent Variable to store parent CeedOperator context
1670 
1671   @return An error code: 0 - success, otherwise - failure
1672 
1673   @ref Backend
1674 **/
1675 int CeedOperatorGetFallbackParent(CeedOperator op, CeedOperator *parent) {
1676   *parent = op->op_fallback_parent ? op->op_fallback_parent : NULL;
1677   return CEED_ERROR_SUCCESS;
1678 }
1679 
1680 /**
1681   @brief Get the Ceed context of the parent CeedOperator for a fallback CeedOperator
1682 
1683   @param[in]  op     CeedOperator context
1684   @param[out] parent Variable to store parent Ceed context
1685 
1686   @return An error code: 0 - success, otherwise - failure
1687 
1688   @ref Backend
1689 **/
1690 int CeedOperatorGetFallbackParentCeed(CeedOperator op, Ceed *parent) {
1691   *parent = op->op_fallback_parent ? op->op_fallback_parent->ceed : op->ceed;
1692   return CEED_ERROR_SUCCESS;
1693 }
1694 
1695 /// @}
1696 
1697 /// ----------------------------------------------------------------------------
1698 /// CeedOperator Public API
1699 /// ----------------------------------------------------------------------------
1700 /// @addtogroup CeedOperatorUser
1701 /// @{
1702 
1703 /**
1704   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1705 
1706   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1707   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1708 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1709 
1710   Inputs and outputs are in the order provided by the user when adding CeedOperator fields.
1711   For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction
1712 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1713 
1714   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1715 
1716   @param[in]  op        CeedOperator to assemble CeedQFunction
1717   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1718   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1719   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1720 
1721   @return An error code: 0 - success, otherwise - failure
1722 
1723   @ref User
1724 **/
1725 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1726   CeedCall(CeedOperatorCheckReady(op));
1727 
1728   if (op->LinearAssembleQFunction) {
1729     // Backend version
1730     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1731   } else {
1732     // Operator fallback
1733     CeedOperator op_fallback;
1734 
1735     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1736     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1737     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1738   }
1739   return CEED_ERROR_SUCCESS;
1740 }
1741 
1742 /**
1743   @brief Assemble CeedQFunction and store result internally.
1744 
1745   Return copied references of stored data to the caller.
1746   Caller is responsible for ownership and destruction of the copied references.
1747   See also @ref CeedOperatorLinearAssembleQFunction
1748 
1749   Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers.
1750         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
1751 
1752   @param[in]  op        CeedOperator to assemble CeedQFunction
1753   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1754   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1755   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1756 
1757   @return An error code: 0 - success, otherwise - failure
1758 
1759   @ref User
1760 **/
1761 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1762   int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL;
1763   CeedOperator op_assemble                                                                           = NULL;
1764   CeedOperator op_fallback_parent                                                                    = NULL;
1765 
1766   CeedCall(CeedOperatorCheckReady(op));
1767 
1768   // Determine if fallback parent or operator has implementation
1769   CeedCall(CeedOperatorGetFallbackParent(op, &op_fallback_parent));
1770   if (op_fallback_parent && op_fallback_parent->LinearAssembleQFunctionUpdate) {
1771     // -- Backend version for op fallback parent is faster, if it exists
1772     LinearAssembleQFunctionUpdate = op_fallback_parent->LinearAssembleQFunctionUpdate;
1773     op_assemble                   = op_fallback_parent;
1774   } else if (op->LinearAssembleQFunctionUpdate) {
1775     // -- Backend version for op
1776     LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate;
1777     op_assemble                   = op;
1778   }
1779 
1780   // Assemble QFunction
1781   if (LinearAssembleQFunctionUpdate) {
1782     // Backend or fallback parent version
1783     bool                qf_assembled_is_setup;
1784     CeedVector          assembled_vec  = NULL;
1785     CeedElemRestriction assembled_rstr = NULL;
1786 
1787     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1788     if (qf_assembled_is_setup) {
1789       bool update_needed;
1790 
1791       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1792       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1793       if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request));
1794     } else {
1795       CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request));
1796       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1797     }
1798     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1799 
1800     // Copy reference from internally held copy
1801     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1802     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1803     CeedCall(CeedVectorDestroy(&assembled_vec));
1804     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1805   } else {
1806     // Operator fallback
1807     CeedOperator op_fallback;
1808 
1809     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1810     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1811     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1812   }
1813   return CEED_ERROR_SUCCESS;
1814 }
1815 
1816 /**
1817   @brief Assemble the diagonal of a square linear CeedOperator
1818 
1819   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1820 
1821   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1822 
1823   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1824 
1825   @param[in]  op        CeedOperator to assemble CeedQFunction
1826   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1827   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1828 
1829   @return An error code: 0 - success, otherwise - failure
1830 
1831   @ref User
1832 **/
1833 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1834   bool     is_composite;
1835   CeedSize input_size = 0, output_size = 0;
1836 
1837   CeedCall(CeedOperatorCheckReady(op));
1838   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1839 
1840   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1841   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1842 
1843   // Early exit for empty operator
1844   if (!is_composite) {
1845     CeedInt num_elem = 0;
1846 
1847     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1848     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1849   }
1850 
1851   if (op->LinearAssembleDiagonal) {
1852     // Backend version
1853     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1854     return CEED_ERROR_SUCCESS;
1855   } else if (op->LinearAssembleAddDiagonal) {
1856     // Backend version with zeroing first
1857     CeedCall(CeedVectorSetValue(assembled, 0.0));
1858     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1859     return CEED_ERROR_SUCCESS;
1860   } else {
1861     // Operator fallback
1862     CeedOperator op_fallback;
1863 
1864     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1865     if (op_fallback) {
1866       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1867       return CEED_ERROR_SUCCESS;
1868     }
1869   }
1870   // Default interface implementation
1871   CeedCall(CeedVectorSetValue(assembled, 0.0));
1872   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1873   return CEED_ERROR_SUCCESS;
1874 }
1875 
1876 /**
1877   @brief Assemble the diagonal of a square linear CeedOperator
1878 
1879   This sums into a CeedVector the diagonal of a linear CeedOperator.
1880 
1881   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1882 
1883   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1884 
1885   @param[in]  op        CeedOperator to assemble CeedQFunction
1886   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1887   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1888 
1889   @return An error code: 0 - success, otherwise - failure
1890 
1891   @ref User
1892 **/
1893 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1894   bool     is_composite;
1895   CeedSize input_size = 0, output_size = 0;
1896 
1897   CeedCall(CeedOperatorCheckReady(op));
1898   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1899 
1900   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1901   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1902 
1903   // Early exit for empty operator
1904   if (!is_composite) {
1905     CeedInt num_elem = 0;
1906 
1907     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1908     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1909   }
1910 
1911   if (op->LinearAssembleAddDiagonal) {
1912     // Backend version
1913     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1914     return CEED_ERROR_SUCCESS;
1915   } else {
1916     // Operator fallback
1917     CeedOperator op_fallback;
1918 
1919     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1920     if (op_fallback) {
1921       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1922       return CEED_ERROR_SUCCESS;
1923     }
1924   }
1925   // Default interface implementation
1926   if (is_composite) {
1927     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1928   } else {
1929     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1930   }
1931   return CEED_ERROR_SUCCESS;
1932 }
1933 
1934 /**
1935    @brief Fully assemble the point-block diagonal pattern of a linear operator.
1936 
1937    Expected to be used in conjunction with CeedOperatorLinearAssemblePointBlockDiagonal().
1938 
1939    The assembly routines use coordinate format, with `num_entries` tuples of the form (i, j, value) which indicate that value should be added to the
1940 matrix in entry (i, j).
1941   Note that the (i, j) pairs are unique.
1942   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemblePointBlockDiagonal() provides the values in
1943 the same ordering.
1944 
1945    This will generally be slow unless your operator is low-order.
1946 
1947    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1948 
1949    @param[in]  op          CeedOperator to assemble
1950    @param[out] num_entries Number of entries in coordinate nonzero pattern
1951    @param[out] rows        Row number for each entry
1952    @param[out] cols        Column number for each entry
1953 
1954    @ref User
1955 **/
1956 int CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1957   Ceed          ceed;
1958   bool          is_composite;
1959   CeedInt       num_active_components, num_sub_operators;
1960   CeedOperator *sub_operators;
1961 
1962   CeedCall(CeedOperatorGetCeed(op, &ceed));
1963   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1964 
1965   CeedSize input_size = 0, output_size = 0;
1966   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1967   CeedCheck(input_size == output_size, ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1968 
1969   if (is_composite) {
1970     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_operators));
1971     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1972   } else {
1973     sub_operators     = &op;
1974     num_sub_operators = 1;
1975   }
1976 
1977   // Verify operator can be assembled correctly
1978   {
1979     CeedOperatorAssemblyData data;
1980     CeedInt                  num_active_elem_rstrs, comp_stride;
1981     CeedElemRestriction     *active_elem_rstrs;
1982 
1983     // Get initial values to check against
1984     CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[0], &data));
1985     CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
1986     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[0], &comp_stride));
1987     CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[0], &num_active_components));
1988 
1989     // Verify that all active element restrictions have same component stride and number of components
1990     for (CeedInt k = 0; k < num_sub_operators; k++) {
1991       CeedCall(CeedOperatorGetOperatorAssemblyData(sub_operators[k], &data));
1992       CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, &num_active_elem_rstrs, &active_elem_rstrs, NULL, NULL));
1993       for (CeedInt i = 0; i < num_active_elem_rstrs; i++) {
1994         CeedInt comp_stride_sub, num_active_components_sub;
1995 
1996         CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstrs[i], &comp_stride_sub));
1997         CeedCheck(comp_stride == comp_stride_sub, ceed, CEED_ERROR_DIMENSION,
1998                   "Active element restrictions must have the same component stride: %d vs %d", comp_stride, comp_stride_sub);
1999         CeedCall(CeedElemRestrictionGetNumComponents(active_elem_rstrs[i], &num_active_components_sub));
2000         CeedCheck(num_active_components == num_active_components_sub, ceed, CEED_ERROR_INCOMPATIBLE,
2001                   "All suboperators must have the same number of output components");
2002       }
2003     }
2004   }
2005   *num_entries = input_size * num_active_components;
2006   CeedCall(CeedCalloc(*num_entries, rows));
2007   CeedCall(CeedCalloc(*num_entries, cols));
2008 
2009   for (CeedInt o = 0; o < num_sub_operators; o++) {
2010     CeedElemRestriction active_elem_rstr, point_block_active_elem_rstr;
2011     CeedInt             comp_stride, num_elem, elem_size;
2012     const CeedInt      *offsets, *point_block_offsets;
2013 
2014     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[o], &active_elem_rstr));
2015     CeedCall(CeedElemRestrictionGetCompStride(active_elem_rstr, &comp_stride));
2016     CeedCall(CeedElemRestrictionGetNumElements(active_elem_rstr, &num_elem));
2017     CeedCall(CeedElemRestrictionGetElementSize(active_elem_rstr, &elem_size));
2018     CeedCall(CeedElemRestrictionGetOffsets(active_elem_rstr, CEED_MEM_HOST, &offsets));
2019 
2020     CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstr, &point_block_active_elem_rstr));
2021     CeedCall(CeedElemRestrictionGetOffsets(point_block_active_elem_rstr, CEED_MEM_HOST, &point_block_offsets));
2022 
2023     for (CeedSize i = 0; i < num_elem * elem_size; i++) {
2024       for (CeedInt c_out = 0; c_out < num_active_components; c_out++) {
2025         for (CeedInt c_in = 0; c_in < num_active_components; c_in++) {
2026           (*rows)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_out * comp_stride;
2027           (*cols)[point_block_offsets[i] + c_out * num_active_components + c_in] = offsets[i] + c_in * comp_stride;
2028         }
2029       }
2030     }
2031 
2032     CeedCall(CeedElemRestrictionRestoreOffsets(active_elem_rstr, &offsets));
2033     CeedCall(CeedElemRestrictionRestoreOffsets(point_block_active_elem_rstr, &point_block_offsets));
2034     CeedCall(CeedElemRestrictionDestroy(&point_block_active_elem_rstr));
2035   }
2036   return CEED_ERROR_SUCCESS;
2037 }
2038 
2039 /**
2040   @brief Assemble the point block diagonal of a square linear CeedOperator
2041 
2042   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
2043 
2044   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
2045 
2046   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2047 
2048   @param[in]  op        CeedOperator to assemble CeedQFunction
2049   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
2050 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
2051 component in].
2052   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2053 
2054   @return An error code: 0 - success, otherwise - failure
2055 
2056   @ref User
2057 **/
2058 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2059   bool     is_composite;
2060   CeedSize input_size = 0, output_size = 0;
2061 
2062   CeedCall(CeedOperatorCheckReady(op));
2063   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2064 
2065   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2066   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
2067 
2068   // Early exit for empty operator
2069   if (!is_composite) {
2070     CeedInt num_elem = 0;
2071 
2072     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2073     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2074   }
2075 
2076   if (op->LinearAssemblePointBlockDiagonal) {
2077     // Backend version
2078     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
2079     return CEED_ERROR_SUCCESS;
2080   } else if (op->LinearAssembleAddPointBlockDiagonal) {
2081     // Backend version with zeroing first
2082     CeedCall(CeedVectorSetValue(assembled, 0.0));
2083     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2084     return CEED_ERROR_SUCCESS;
2085   } else {
2086     // Operator fallback
2087     CeedOperator op_fallback;
2088 
2089     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2090     if (op_fallback) {
2091       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
2092       return CEED_ERROR_SUCCESS;
2093     }
2094   }
2095   // Default interface implementation
2096   CeedCall(CeedVectorSetValue(assembled, 0.0));
2097   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
2098   return CEED_ERROR_SUCCESS;
2099 }
2100 
2101 /**
2102   @brief Assemble the point block diagonal of a square linear CeedOperator
2103 
2104   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
2105 
2106   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
2107 
2108   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2109 
2110   @param[in]  op        CeedOperator to assemble CeedQFunction
2111   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
2112 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
2113 component in].
2114   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2115 
2116   @return An error code: 0 - success, otherwise - failure
2117 
2118   @ref User
2119 **/
2120 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
2121   bool     is_composite;
2122   CeedSize input_size = 0, output_size = 0;
2123 
2124   CeedCall(CeedOperatorCheckReady(op));
2125   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2126 
2127   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
2128   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
2129 
2130   // Early exit for empty operator
2131   if (!is_composite) {
2132     CeedInt num_elem = 0;
2133 
2134     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2135     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2136   }
2137 
2138   if (op->LinearAssembleAddPointBlockDiagonal) {
2139     // Backend version
2140     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
2141     return CEED_ERROR_SUCCESS;
2142   } else {
2143     // Operator fallback
2144     CeedOperator op_fallback;
2145 
2146     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2147     if (op_fallback) {
2148       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
2149       return CEED_ERROR_SUCCESS;
2150     }
2151   }
2152   // Default interface implementation
2153   if (is_composite) {
2154     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
2155   } else {
2156     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
2157   }
2158   return CEED_ERROR_SUCCESS;
2159 }
2160 
2161 /**
2162    @brief Fully assemble the nonzero pattern of a linear operator.
2163 
2164    Expected to be used in conjunction with CeedOperatorLinearAssemble().
2165 
2166    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
2167 matrix in entry (i, j).
2168   Note that the (i, j) pairs are not unique and may repeat.
2169   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
2170 
2171    This will generally be slow unless your operator is low-order.
2172 
2173    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2174 
2175    @param[in]  op          CeedOperator to assemble
2176    @param[out] num_entries Number of entries in coordinate nonzero pattern
2177    @param[out] rows        Row number for each entry
2178    @param[out] cols        Column number for each entry
2179 
2180    @ref User
2181 **/
2182 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
2183   bool          is_composite;
2184   CeedInt       num_suboperators, offset = 0;
2185   CeedSize      single_entries;
2186   CeedOperator *sub_operators;
2187 
2188   CeedCall(CeedOperatorCheckReady(op));
2189   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2190 
2191   if (op->LinearAssembleSymbolic) {
2192     // Backend version
2193     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
2194     return CEED_ERROR_SUCCESS;
2195   } else {
2196     // Operator fallback
2197     CeedOperator op_fallback;
2198 
2199     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2200     if (op_fallback) {
2201       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
2202       return CEED_ERROR_SUCCESS;
2203     }
2204   }
2205 
2206   // Default interface implementation
2207 
2208   // Count entries and allocate rows, cols arrays
2209   *num_entries = 0;
2210   if (is_composite) {
2211     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2212     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2213     for (CeedInt k = 0; k < num_suboperators; ++k) {
2214       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2215       *num_entries += single_entries;
2216     }
2217   } else {
2218     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
2219     *num_entries += single_entries;
2220   }
2221   CeedCall(CeedCalloc(*num_entries, rows));
2222   CeedCall(CeedCalloc(*num_entries, cols));
2223 
2224   // Assemble nonzero locations
2225   if (is_composite) {
2226     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2227     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2228     for (CeedInt k = 0; k < num_suboperators; ++k) {
2229       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
2230       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2231       offset += single_entries;
2232     }
2233   } else {
2234     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
2235   }
2236   return CEED_ERROR_SUCCESS;
2237 }
2238 
2239 /**
2240    @brief Fully assemble the nonzero entries of a linear operator.
2241 
2242    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
2243 
2244    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
2245 matrix in entry (i, j).
2246   Note that the (i, j) pairs are not unique and may repeat.
2247   This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
2248 
2249    This will generally be slow unless your operator is low-order.
2250 
2251    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2252 
2253    @param[in]  op     CeedOperator to assemble
2254    @param[out] values Values to assemble into matrix
2255 
2256    @ref User
2257 **/
2258 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
2259   bool          is_composite;
2260   CeedInt       num_suboperators, offset = 0;
2261   CeedSize      single_entries = 0;
2262   CeedOperator *sub_operators;
2263 
2264   CeedCall(CeedOperatorCheckReady(op));
2265   CeedCall(CeedOperatorIsComposite(op, &is_composite));
2266 
2267   // Early exit for empty operator
2268   if (!is_composite) {
2269     CeedInt num_elem = 0;
2270 
2271     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
2272     if (num_elem == 0) return CEED_ERROR_SUCCESS;
2273   }
2274 
2275   if (op->LinearAssemble) {
2276     // Backend version
2277     CeedCall(op->LinearAssemble(op, values));
2278     return CEED_ERROR_SUCCESS;
2279   } else {
2280     // Operator fallback
2281     CeedOperator op_fallback;
2282 
2283     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2284     if (op_fallback) {
2285       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2286       return CEED_ERROR_SUCCESS;
2287     }
2288   }
2289 
2290   // Default interface implementation
2291   CeedCall(CeedVectorSetValue(values, 0.0));
2292   if (is_composite) {
2293     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2294     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2295     for (CeedInt k = 0; k < num_suboperators; k++) {
2296       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2297       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2298       offset += single_entries;
2299     }
2300   } else {
2301     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2302   }
2303   return CEED_ERROR_SUCCESS;
2304 }
2305 
2306 /**
2307   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2308 
2309   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2310 
2311   @param[in]  op               Composite CeedOperator
2312   @param[in]  num_skip_indices Number of suboperators to skip
2313   @param[in]  skip_indices     Array of indices of suboperators to skip
2314   @param[out] mult             Vector to store multiplicity (of size l_size)
2315 
2316   @return An error code: 0 - success, otherwise - failure
2317 
2318   @ref User
2319 **/
2320 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2321   Ceed                ceed;
2322   CeedInt             num_suboperators;
2323   CeedSize            l_vec_len;
2324   CeedScalar         *mult_array;
2325   CeedVector          ones_l_vec;
2326   CeedElemRestriction elem_rstr, mult_elem_rstr;
2327   CeedOperator       *sub_operators;
2328 
2329   CeedCall(CeedOperatorCheckReady(op));
2330 
2331   CeedCall(CeedOperatorGetCeed(op, &ceed));
2332 
2333   // Zero mult vector
2334   CeedCall(CeedVectorSetValue(mult, 0.0));
2335 
2336   // Get suboperators
2337   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2338   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2339   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2340 
2341   // Work vector
2342   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2343   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2344   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2345   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2346 
2347   // Compute multiplicity across suboperators
2348   for (CeedInt i = 0; i < num_suboperators; i++) {
2349     const CeedScalar *sub_mult_array;
2350     CeedVector        sub_mult_l_vec, ones_e_vec;
2351 
2352     // -- Check for suboperator to skip
2353     for (CeedInt j = 0; j < num_skip_indices; j++) {
2354       if (skip_indices[j] == i) continue;
2355     }
2356 
2357     // -- Sub operator multiplicity
2358     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2359     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr));
2360     CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2361     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2362     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2363     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2364     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2365     // ---- Flag every node present in the current suboperator
2366     for (CeedInt j = 0; j < l_vec_len; j++) {
2367       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2368     }
2369     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2370     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2371     CeedCall(CeedVectorDestroy(&ones_e_vec));
2372     CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr));
2373   }
2374   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2375   CeedCall(CeedVectorDestroy(&ones_l_vec));
2376   return CEED_ERROR_SUCCESS;
2377 }
2378 
2379 /**
2380   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2381 grid interpolation
2382 
2383   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2384 
2385   @param[in]  op_fine      Fine grid operator
2386   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2387   @param[in]  rstr_coarse  Coarse grid restriction
2388   @param[in]  basis_coarse Coarse grid active vector basis
2389   @param[out] op_coarse    Coarse grid operator
2390   @param[out] op_prolong   Coarse to fine operator, or NULL
2391   @param[out] op_restrict  Fine to coarse operator, or NULL
2392 
2393   @return An error code: 0 - success, otherwise - failure
2394 
2395   @ref User
2396 **/
2397 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2398                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2399   CeedBasis basis_c_to_f = NULL;
2400 
2401   CeedCall(CeedOperatorCheckReady(op_fine));
2402 
2403   // Build prolongation matrix, if required
2404   if (op_prolong || op_restrict) {
2405     CeedBasis basis_fine;
2406 
2407     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2408     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2409   }
2410 
2411   // Core code
2412   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2413   return CEED_ERROR_SUCCESS;
2414 }
2415 
2416 /**
2417   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2418 
2419   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2420 
2421   @param[in]  op_fine       Fine grid operator
2422   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2423   @param[in]  rstr_coarse   Coarse grid restriction
2424   @param[in]  basis_coarse  Coarse grid active vector basis
2425   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2426   @param[out] op_coarse     Coarse grid operator
2427   @param[out] op_prolong    Coarse to fine operator, or NULL
2428   @param[out] op_restrict   Fine to coarse operator, or NULL
2429 
2430   @return An error code: 0 - success, otherwise - failure
2431 
2432   @ref User
2433 **/
2434 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2435                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2436                                              CeedOperator *op_restrict) {
2437   Ceed      ceed;
2438   CeedInt   Q_f, Q_c;
2439   CeedBasis basis_fine, basis_c_to_f = NULL;
2440 
2441   CeedCall(CeedOperatorCheckReady(op_fine));
2442   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2443 
2444   // Check for compatible quadrature spaces
2445   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2446   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2447   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2448   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2449 
2450   // Create coarse to fine basis, if required
2451   if (op_prolong || op_restrict) {
2452     CeedInt     dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2453     CeedScalar *q_ref, *q_weight, *grad;
2454 
2455     // Check if interpolation matrix is provided
2456     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2457               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2458     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2459     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2460     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2461     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2462     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2463     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2464     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2465     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2466     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2467     CeedCall(CeedFree(&q_ref));
2468     CeedCall(CeedFree(&q_weight));
2469     CeedCall(CeedFree(&grad));
2470   }
2471 
2472   // Core code
2473   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2474   return CEED_ERROR_SUCCESS;
2475 }
2476 
2477 /**
2478   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2479 
2480   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2481 
2482   @param[in]  op_fine       Fine grid operator
2483   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2484   @param[in]  rstr_coarse   Coarse grid restriction
2485   @param[in]  basis_coarse  Coarse grid active vector basis
2486   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2487   @param[out] op_coarse     Coarse grid operator
2488   @param[out] op_prolong    Coarse to fine operator, or NULL
2489   @param[out] op_restrict   Fine to coarse operator, or NULL
2490 
2491   @return An error code: 0 - success, otherwise - failure
2492 
2493   @ref User
2494 **/
2495 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2496                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2497                                        CeedOperator *op_restrict) {
2498   Ceed      ceed;
2499   CeedInt   Q_f, Q_c;
2500   CeedBasis basis_fine, basis_c_to_f = NULL;
2501 
2502   CeedCall(CeedOperatorCheckReady(op_fine));
2503   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2504 
2505   // Check for compatible quadrature spaces
2506   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2507   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2508   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2509   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2510 
2511   // Coarse to fine basis
2512   if (op_prolong || op_restrict) {
2513     CeedInt          dim, num_comp, num_nodes_c, num_nodes_f;
2514     CeedScalar      *q_ref, *q_weight, *grad;
2515     CeedElemTopology topo;
2516 
2517     // Check if interpolation matrix is provided
2518     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2519               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2520     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2521     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2522     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2523     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2524     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2525     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2526     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2527     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2528     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2529     CeedCall(CeedFree(&q_ref));
2530     CeedCall(CeedFree(&q_weight));
2531     CeedCall(CeedFree(&grad));
2532   }
2533 
2534   // Core code
2535   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2536   return CEED_ERROR_SUCCESS;
2537 }
2538 
2539 /**
2540   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2541 
2542   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2543   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2544   The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2545 \hat S V\f$.
2546   The CeedOperator must be linear and non-composite.
2547   The associated CeedQFunction must therefore also be linear.
2548 
2549   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2550 
2551   @param[in]  op      CeedOperator to create element inverses
2552   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2553   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2554 
2555   @return An error code: 0 - success, otherwise - failure
2556 
2557   @ref User
2558 **/
2559 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2560   Ceed                 ceed, ceed_parent;
2561   bool                 interp = false, grad = false, is_tensor_basis = true;
2562   CeedInt              num_input_fields, P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2563   CeedSize             l_size = 1;
2564   CeedScalar          *mass, *laplace, *x, *fdm_interp, *lambda, *elem_avg;
2565   const CeedScalar    *interp_1d, *grad_1d, *q_weight_1d;
2566   CeedVector           q_data;
2567   CeedElemRestriction  rstr  = NULL, rstr_qd_i;
2568   CeedBasis            basis = NULL, fdm_basis;
2569   CeedQFunctionContext ctx_fdm;
2570   CeedQFunctionField  *qf_fields;
2571   CeedQFunction        qf, qf_fdm;
2572   CeedOperatorField   *op_fields;
2573 
2574   CeedCall(CeedOperatorCheckReady(op));
2575 
2576   if (op->CreateFDMElementInverse) {
2577     // Backend version
2578     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2579     return CEED_ERROR_SUCCESS;
2580   } else {
2581     // Operator fallback
2582     CeedOperator op_fallback;
2583 
2584     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2585     if (op_fallback) {
2586       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2587       return CEED_ERROR_SUCCESS;
2588     }
2589   }
2590 
2591   // Default interface implementation
2592   CeedCall(CeedOperatorGetCeed(op, &ceed));
2593   CeedCall(CeedOperatorGetFallbackParentCeed(op, &ceed_parent));
2594   CeedCall(CeedOperatorGetQFunction(op, &qf));
2595 
2596   // Determine active input basis
2597   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2598   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2599   for (CeedInt i = 0; i < num_input_fields; i++) {
2600     CeedVector vec;
2601 
2602     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2603     if (vec == CEED_VECTOR_ACTIVE) {
2604       CeedEvalMode eval_mode;
2605 
2606       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2607       interp = interp || eval_mode == CEED_EVAL_INTERP;
2608       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2609       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2610       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2611     }
2612   }
2613   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2614   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2615   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2616   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2617   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2618   CeedCall(CeedBasisGetDimension(basis, &dim));
2619   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2620   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2621   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2622 
2623   // Build and diagonalize 1D Mass and Laplacian
2624   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2625   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2626   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2627   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2628   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2629   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2630   CeedCall(CeedCalloc(P_1d, &lambda));
2631   // -- Build matrices
2632   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2633   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2634   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2635   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2636 
2637   // -- Diagonalize
2638   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2639   CeedCall(CeedFree(&mass));
2640   CeedCall(CeedFree(&laplace));
2641   for (CeedInt i = 0; i < P_1d; i++) {
2642     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2643   }
2644   CeedCall(CeedFree(&x));
2645 
2646   {
2647     CeedInt             layout[3], num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2648     CeedScalar          max_norm = 0;
2649     const CeedScalar   *assembled_array, *q_weight_array;
2650     CeedVector          assembled = NULL, q_weight;
2651     CeedElemRestriction rstr_qf   = NULL;
2652 
2653     // Assemble QFunction
2654     CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2655     CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2656     CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2657     CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2658 
2659     // Calculate element averages
2660     CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2661     CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2662     CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2663     CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2664     CeedCall(CeedCalloc(num_elem, &elem_avg));
2665     const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2666 
2667     for (CeedInt e = 0; e < num_elem; e++) {
2668       CeedInt count = 0;
2669 
2670       for (CeedInt q = 0; q < num_qpts; q++) {
2671         for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2672           if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2673             elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2674             count++;
2675           }
2676         }
2677       }
2678       if (count) {
2679         elem_avg[e] /= count;
2680       } else {
2681         elem_avg[e] = 1.0;
2682       }
2683     }
2684     CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2685     CeedCall(CeedVectorDestroy(&assembled));
2686     CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2687     CeedCall(CeedVectorDestroy(&q_weight));
2688   }
2689 
2690   // Build FDM diagonal
2691   {
2692     CeedScalar *q_data_array, *fdm_diagonal;
2693 
2694     CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2695     const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2696     for (CeedInt c = 0; c < num_comp; c++) {
2697       for (CeedInt n = 0; n < num_nodes; n++) {
2698         if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2699         if (grad) {
2700           for (CeedInt d = 0; d < dim; d++) {
2701             CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2702             fdm_diagonal[c * num_nodes + n] += lambda[i];
2703           }
2704         }
2705         if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2706       }
2707     }
2708     CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2709     CeedCall(CeedVectorSetValue(q_data, 0.0));
2710     CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2711     for (CeedInt e = 0; e < num_elem; e++) {
2712       for (CeedInt c = 0; c < num_comp; c++) {
2713         for (CeedInt n = 0; n < num_nodes; n++)
2714           q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2715       }
2716     }
2717     CeedCall(CeedFree(&elem_avg));
2718     CeedCall(CeedFree(&fdm_diagonal));
2719     CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2720   }
2721 
2722   // Setup FDM operator
2723   // -- Basis
2724   {
2725     CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2726 
2727     CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2728     CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2729     CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2730     CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2731     CeedCall(CeedFree(&fdm_interp));
2732     CeedCall(CeedFree(&grad_dummy));
2733     CeedCall(CeedFree(&q_ref_dummy));
2734     CeedCall(CeedFree(&q_weight_dummy));
2735     CeedCall(CeedFree(&lambda));
2736   }
2737 
2738   // -- Restriction
2739   {
2740     CeedInt strides[3] = {1, num_nodes, num_nodes * num_comp};
2741     CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2742   }
2743 
2744   // -- QFunction
2745   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2746   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2747   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2748   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2749   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2750 
2751   // -- QFunction context
2752   {
2753     CeedInt *num_comp_data;
2754 
2755     CeedCall(CeedCalloc(1, &num_comp_data));
2756     num_comp_data[0] = num_comp;
2757     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2758     CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2759   }
2760   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2761   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2762 
2763   // -- Operator
2764   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2765   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2766   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_NONE, q_data));
2767   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2768 
2769   // Cleanup
2770   CeedCall(CeedVectorDestroy(&q_data));
2771   CeedCall(CeedBasisDestroy(&fdm_basis));
2772   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2773   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2774   return CEED_ERROR_SUCCESS;
2775 }
2776 
2777 /// @}
2778