xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision c52157525785c51910c8fadbbfd077683f26126a)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  basis     CeedBasis from which to get the basis matrix
184   @param[in]  eval_mode Current basis evaluation mode
185   @param[in]  identity  Pointer to identity matrix
186   @param[out] basis_ptr Basis pointer to set
187 
188   @ref Developer
189 **/
190 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
191   switch (eval_mode) {
192     case CEED_EVAL_NONE:
193       *basis_ptr = identity;
194       break;
195     case CEED_EVAL_INTERP:
196       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
197       break;
198     case CEED_EVAL_GRAD:
199       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
200       break;
201     case CEED_EVAL_DIV:
202       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
203       break;
204     case CEED_EVAL_CURL:
205       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
206       break;
207     case CEED_EVAL_WEIGHT:
208       break;  // Caught by QF Assembly
209   }
210   assert(*basis_ptr != NULL);
211 
212   return CEED_ERROR_SUCCESS;
213 }
214 
215 /**
216   @brief Create point block restriction for active operator field
217 
218   @param[in]  rstr            Original CeedElemRestriction for active field
219   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
220 
221   @return An error code: 0 - success, otherwise - failure
222 
223   @ref Developer
224 **/
225 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
226   Ceed ceed;
227   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
228   const CeedInt *offsets;
229   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
230 
231   // Expand offsets
232   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
233   CeedSize l_size;
234   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
235   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
236   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
237   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
238   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
239   CeedInt shift = num_comp;
240   if (comp_stride != 1) shift *= num_comp;
241   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
242   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
243     pointblock_offsets[i] = offsets[i] * shift;
244   }
245 
246   // Create new restriction
247   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
248                                      pointblock_offsets, pointblock_rstr));
249 
250   // Cleanup
251   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
252 
253   return CEED_ERROR_SUCCESS;
254 }
255 
256 /**
257   @brief Core logic for assembling operator diagonal or point block diagonal
258 
259   @param[in]  op            CeedOperator to assemble point block diagonal
260   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
261   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
262   @param[out] assembled     CeedVector to store assembled diagonal
263 
264   @return An error code: 0 - success, otherwise - failure
265 
266   @ref Developer
267 **/
268 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
269   Ceed ceed;
270   CeedCall(CeedOperatorGetCeed(op, &ceed));
271 
272   // Assemble QFunction
273   CeedQFunction       qf;
274   const CeedScalar   *assembled_qf_array;
275   CeedVector          assembled_qf;
276   CeedElemRestriction assembled_elem_rstr;
277   CeedInt             num_input_fields, num_output_fields;
278   CeedInt             layout[3];
279 
280   CeedCall(CeedOperatorGetQFunction(op, &qf));
281   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
282   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
283   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout));
284   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
285   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
286 
287   // Get assembly data
288   CeedOperatorAssemblyData data;
289   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
290   CeedInt                 *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
291   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
292   CeedBasis               *active_bases;
293   CeedElemRestriction     *active_elem_rstrs;
294   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
295   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
296                                                 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components));
297   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL));
298   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs));
299 
300   // Loop over all active bases
301   for (CeedInt b = 0; b < num_active_bases; b++) {
302     // Assemble point block diagonal restriction, if needed
303     CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b];
304 
305     if (is_pointblock) {
306       CeedElemRestriction point_block_elem_rstr;
307 
308       CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr));
309       diag_elem_rstr = point_block_elem_rstr;
310     }
311 
312     // Create diagonal vector
313     CeedVector elem_diag;
314     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
315 
316     // Assemble element operator diagonals
317     CeedScalar *elem_diag_array;
318     CeedInt     num_elem, num_nodes, num_qpts, num_components;
319 
320     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
321     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
322     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
323     CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes));
324     CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components));
325     CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts));
326 
327     // Construct identity matrix for basis if required
328     bool        has_eval_none = false;
329     CeedScalar *identity      = NULL;
330     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
331       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
332     }
333     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
334       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
335     }
336     if (has_eval_none) {
337       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
338       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
339     }
340 
341     // Compute the diagonal of B^T D B
342     // Each element
343     for (CeedInt e = 0; e < num_elem; e++) {
344       // Each basis eval mode pair
345       CeedInt      d_out              = 0, q_comp_out;
346       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
347       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
348         const CeedScalar *B_t = NULL;
349         CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t);
350         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out));
351         if (q_comp_out > 1) {
352           if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
353           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
354         }
355         eval_mode_out_prev = eval_modes_out[b][e_out];
356 
357         CeedInt      d_in              = 0, q_comp_in;
358         CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
359         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
360           const CeedScalar *B = NULL;
361           CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B);
362           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in));
363           if (q_comp_in > 1) {
364             if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
365             else B = &B[(++d_in) * num_qpts * num_nodes];
366           }
367           eval_mode_in_prev = eval_modes_in[b][e_in];
368 
369           // Each component
370           for (CeedInt c_out = 0; c_out < num_components; c_out++) {
371             // Each qpt/node pair
372             for (CeedInt q = 0; q < num_qpts; q++) {
373               if (is_pointblock) {
374                 // Point Block Diagonal
375                 for (CeedInt c_in = 0; c_in < num_components; c_in++) {
376                   const CeedInt c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
377                   const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
378                   for (CeedInt n = 0; n < num_nodes; n++) {
379                     elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] +=
380                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
381                   }
382                 }
383               } else {
384                 // Diagonal Only
385                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
386                 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
387                 for (CeedInt n = 0; n < num_nodes; n++) {
388                   elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
389                 }
390               }
391             }
392           }
393         }
394       }
395     }
396     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
397 
398     // Assemble local operator diagonal
399     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
400 
401     // Cleanup
402     if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
403     CeedCall(CeedVectorDestroy(&elem_diag));
404     CeedCall(CeedFree(&identity));
405   }
406   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
407   CeedCall(CeedVectorDestroy(&assembled_qf));
408 
409   return CEED_ERROR_SUCCESS;
410 }
411 
412 /**
413   @brief Core logic for assembling composite operator diagonal
414 
415   @param[in]  op            CeedOperator to assemble point block diagonal
416   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
417   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
418   @param[out] assembled     CeedVector to store assembled diagonal
419 
420   @return An error code: 0 - success, otherwise - failure
421 
422   @ref Developer
423 **/
424 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
425                                                                  CeedVector assembled) {
426   CeedInt       num_sub;
427   CeedOperator *suboperators;
428   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
429   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
430   for (CeedInt i = 0; i < num_sub; i++) {
431     if (is_pointblock) {
432       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
433     } else {
434       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
435     }
436   }
437   return CEED_ERROR_SUCCESS;
438 }
439 
440 /**
441   @brief Build nonzero pattern for non-composite operator
442 
443   Users should generally use CeedOperatorLinearAssembleSymbolic()
444 
445   @param[in]  op     CeedOperator to assemble nonzero pattern
446   @param[in]  offset Offset for number of entries
447   @param[out] rows   Row number for each entry
448   @param[out] cols   Column number for each entry
449 
450   @return An error code: 0 - success, otherwise - failure
451 
452   @ref Developer
453 **/
454 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
455   Ceed ceed;
456   bool is_composite;
457   CeedCall(CeedOperatorGetCeed(op, &ceed));
458   CeedCall(CeedOperatorIsComposite(op, &is_composite));
459 
460   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
461 
462   CeedSize num_nodes;
463   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
464   CeedElemRestriction rstr_in;
465   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
466   CeedInt num_elem, elem_size, num_comp;
467   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
468   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
469   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
470   CeedInt layout_er[3];
471   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
472 
473   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
474 
475   // Determine elem_dof relation
476   CeedVector index_vec;
477   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
478   CeedScalar *array;
479   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
480   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
481   CeedCall(CeedVectorRestoreArray(index_vec, &array));
482   CeedVector elem_dof;
483   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
484   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
485   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
486   const CeedScalar *elem_dof_a;
487   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
488   CeedCall(CeedVectorDestroy(&index_vec));
489 
490   // Determine i, j locations for element matrices
491   CeedInt count = 0;
492   for (CeedInt e = 0; e < num_elem; e++) {
493     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
494       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
495         for (CeedInt i = 0; i < elem_size; i++) {
496           for (CeedInt j = 0; j < elem_size; j++) {
497             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
498             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
499 
500             const CeedInt row = elem_dof_a[elem_dof_index_row];
501             const CeedInt col = elem_dof_a[elem_dof_index_col];
502 
503             rows[offset + count] = row;
504             cols[offset + count] = col;
505             count++;
506           }
507         }
508       }
509     }
510   }
511   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
512   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
513   CeedCall(CeedVectorDestroy(&elem_dof));
514 
515   return CEED_ERROR_SUCCESS;
516 }
517 
518 /**
519   @brief Assemble nonzero entries for non-composite operator
520 
521   Users should generally use CeedOperatorLinearAssemble()
522 
523   @param[in]  op     CeedOperator to assemble
524   @param[in]  offset Offset for number of entries
525   @param[out] values Values to assemble into matrix
526 
527   @return An error code: 0 - success, otherwise - failure
528 
529   @ref Developer
530 **/
531 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
532   Ceed ceed;
533   bool is_composite;
534   CeedCall(CeedOperatorGetCeed(op, &ceed));
535   CeedCall(CeedOperatorIsComposite(op, &is_composite));
536 
537   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
538 
539   // Early exit for empty operator
540   {
541     CeedInt num_elem = 0;
542 
543     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
544     if (num_elem == 0) return CEED_ERROR_SUCCESS;
545   }
546 
547   if (op->LinearAssembleSingle) {
548     // Backend version
549     CeedCall(op->LinearAssembleSingle(op, offset, values));
550     return CEED_ERROR_SUCCESS;
551   } else {
552     // Operator fallback
553     CeedOperator op_fallback;
554 
555     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
556     if (op_fallback) {
557       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
558       return CEED_ERROR_SUCCESS;
559     }
560   }
561 
562   // Assemble QFunction
563   CeedQFunction qf;
564   CeedCall(CeedOperatorGetQFunction(op, &qf));
565   CeedVector          assembled_qf;
566   CeedElemRestriction rstr_q;
567   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
568   CeedSize qf_length;
569   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
570 
571   CeedInt            num_input_fields, num_output_fields;
572   CeedOperatorField *input_fields;
573   CeedOperatorField *output_fields;
574   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
575 
576   // Get assembly data
577   CeedOperatorAssemblyData data;
578   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
579   const CeedEvalMode **eval_modes_in, **eval_modes_out;
580   CeedInt             *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
581   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out,
582                                                 &eval_modes_out, NULL, NULL));
583   CeedBasis *bases;
584   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL));
585   CeedBasis basis_in = bases[0];
586 
587   CeedCheck(num_active_bases == 1, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases");
588   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
589 
590   CeedElemRestriction active_rstr;
591   CeedInt             num_elem, elem_size, num_qpts, num_comp;
592   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
593   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
594   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
595   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
596   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
597 
598   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
599 
600   // loop over elements and put in data structure
601   const CeedScalar *assembled_qf_array;
602   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
603 
604   CeedInt layout_qf[3];
605   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
606   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
607 
608   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
609   const CeedScalar **B_mats_in, **B_mats_out;
610   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out));
611   const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0];
612   CeedScalar        BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]];
613   CeedScalar        elem_mat[elem_size * elem_size];
614   CeedInt           count = 0;
615   CeedScalar       *vals;
616   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
617   for (CeedInt e = 0; e < num_elem; e++) {
618     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
619       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
620         // Compute B^T*D
621         for (CeedInt n = 0; n < elem_size; n++) {
622           for (CeedInt q = 0; q < num_qpts; q++) {
623             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
624               const CeedInt btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in);
625               CeedScalar    sum       = 0.0;
626               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
627                 const CeedInt b_out_index     = (num_eval_modes_out[0] * q + e_out) * elem_size + n;
628                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out;
629                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
630                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
631               }
632               BTD_mat[btd_index] = sum;
633             }
634           }
635         }
636         // form element matrix itself (for each block component)
637         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0]));
638 
639         // put element matrix in coordinate data structure
640         for (CeedInt i = 0; i < elem_size; i++) {
641           for (CeedInt j = 0; j < elem_size; j++) {
642             vals[offset + count] = elem_mat[i * elem_size + j];
643             count++;
644           }
645         }
646       }
647     }
648   }
649   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries");
650   CeedCall(CeedVectorRestoreArray(values, &vals));
651 
652   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
653   CeedCall(CeedVectorDestroy(&assembled_qf));
654 
655   return CEED_ERROR_SUCCESS;
656 }
657 
658 /**
659   @brief Count number of entries for assembled CeedOperator
660 
661   @param[in]  op          CeedOperator to assemble
662   @param[out] num_entries Number of entries in assembled representation
663 
664   @return An error code: 0 - success, otherwise - failure
665 
666   @ref Utility
667 **/
668 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
669   bool                is_composite;
670   CeedElemRestriction rstr;
671   CeedInt             num_elem, elem_size, num_comp;
672 
673   CeedCall(CeedOperatorIsComposite(op, &is_composite));
674   CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
675   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
676   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
677   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
678   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
679   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
680 
681   return CEED_ERROR_SUCCESS;
682 }
683 
684 /**
685   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
686 
687   @param[in]  op_fine      Fine grid operator
688   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
689   @param[in]  rstr_coarse  Coarse grid restriction
690   @param[in]  basis_coarse Coarse grid active vector basis
691   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
692   @param[out] op_coarse    Coarse grid operator
693   @param[out] op_prolong   Coarse to fine operator, or NULL
694   @param[out] op_restrict  Fine to coarse operator, or NULL
695 
696   @return An error code: 0 - success, otherwise - failure
697 
698   @ref Developer
699 **/
700 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
701                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
702   Ceed       ceed;
703   CeedVector mult_vec = NULL;
704   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
705 
706   // Check for composite operator
707   bool is_composite;
708   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
709   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
710 
711   // Coarse Grid
712   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
713   CeedElemRestriction rstr_fine = NULL;
714   // -- Clone input fields
715   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
716     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
717       rstr_fine = op_fine->input_fields[i]->elem_rstr;
718       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
719     } else {
720       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
721                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
722     }
723   }
724   // -- Clone output fields
725   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
726     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
727       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
728     } else {
729       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
730                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
731     }
732   }
733   // -- Clone QFunctionAssemblyData
734   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
735 
736   // Multiplicity vector
737   if (op_restrict || op_prolong) {
738     CeedVector mult_e_vec;
739 
740     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
741     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
742     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
743     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
744     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
745     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
746     CeedCall(CeedVectorDestroy(&mult_e_vec));
747     CeedCall(CeedVectorReciprocal(mult_vec));
748   }
749 
750   // Clone name
751   bool   has_name = op_fine->name;
752   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
753   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
754 
755   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
756   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
757             "Prolongation or restriction operator creation requires coarse-to-fine basis");
758 
759   // Restriction/Prolongation Operators
760   CeedInt num_comp;
761   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
762 
763   // Restriction
764   if (op_restrict) {
765     CeedInt             *num_comp_r_data;
766     CeedQFunction        qf_restrict;
767     CeedQFunctionContext ctx_r;
768 
769     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
770     CeedCall(CeedCalloc(1, &num_comp_r_data));
771     num_comp_r_data[0] = num_comp;
772     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
773     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
774     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
775     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
776     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
777     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
778     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
779     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
780 
781     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
782     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
783     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
784     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
785 
786     // Set name
787     char *restriction_name;
788     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
789     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
790     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
791     CeedCall(CeedFree(&restriction_name));
792 
793     // Check
794     CeedCall(CeedOperatorCheckReady(*op_restrict));
795 
796     // Cleanup
797     CeedCall(CeedQFunctionDestroy(&qf_restrict));
798   }
799 
800   // Prolongation
801   if (op_prolong) {
802     CeedInt             *num_comp_p_data;
803     CeedQFunction        qf_prolong;
804     CeedQFunctionContext ctx_p;
805 
806     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
807     CeedCall(CeedCalloc(1, &num_comp_p_data));
808     num_comp_p_data[0] = num_comp;
809     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
810     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
811     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
812     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
813     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
814     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
815     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
816     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
817 
818     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
819     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
820     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
821     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
822 
823     // Set name
824     char *prolongation_name;
825     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
826     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
827     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
828     CeedCall(CeedFree(&prolongation_name));
829 
830     // Check
831     CeedCall(CeedOperatorCheckReady(*op_prolong));
832 
833     // Cleanup
834     CeedCall(CeedQFunctionDestroy(&qf_prolong));
835   }
836 
837   // Check
838   CeedCall(CeedOperatorCheckReady(*op_coarse));
839 
840   // Cleanup
841   CeedCall(CeedVectorDestroy(&mult_vec));
842   CeedCall(CeedBasisDestroy(&basis_c_to_f));
843 
844   return CEED_ERROR_SUCCESS;
845 }
846 
847 /**
848   @brief Build 1D mass matrix and Laplacian with perturbation
849 
850   @param[in]  interp_1d   Interpolation matrix in one dimension
851   @param[in]  grad_1d     Gradient matrix in one dimension
852   @param[in]  q_weight_1d Quadrature weights in one dimension
853   @param[in]  P_1d        Number of basis nodes in one dimension
854   @param[in]  Q_1d        Number of quadrature points in one dimension
855   @param[in]  dim         Dimension of basis
856   @param[out] mass        Assembled mass matrix in one dimension
857   @param[out] laplace     Assembled perturbed Laplacian in one dimension
858 
859   @return An error code: 0 - success, otherwise - failure
860 
861   @ref Developer
862 **/
863 CeedPragmaOptimizeOff
864 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
865                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
866   for (CeedInt i = 0; i < P_1d; i++) {
867     for (CeedInt j = 0; j < P_1d; j++) {
868       CeedScalar sum = 0.0;
869       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
870       mass[i + j * P_1d] = sum;
871     }
872   }
873   // -- Laplacian
874   for (CeedInt i = 0; i < P_1d; i++) {
875     for (CeedInt j = 0; j < P_1d; j++) {
876       CeedScalar sum = 0.0;
877       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
878       laplace[i + j * P_1d] = sum;
879     }
880   }
881   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
882   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
883   return CEED_ERROR_SUCCESS;
884 }
885 CeedPragmaOptimizeOn
886 
887 /// @}
888 
889 /// ----------------------------------------------------------------------------
890 /// CeedOperator Backend API
891 /// ----------------------------------------------------------------------------
892 /// @addtogroup CeedOperatorBackend
893 /// @{
894 
895 /**
896   @brief Create object holding CeedQFunction assembly data for CeedOperator
897 
898   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
899   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
900 
901   @return An error code: 0 - success, otherwise - failure
902 
903   @ref Backend
904 **/
905 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
906   CeedCall(CeedCalloc(1, data));
907   (*data)->ref_count = 1;
908   (*data)->ceed      = ceed;
909   CeedCall(CeedReference(ceed));
910 
911   return CEED_ERROR_SUCCESS;
912 }
913 
914 /**
915   @brief Increment the reference counter for a CeedQFunctionAssemblyData
916 
917   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
918 
919   @return An error code: 0 - success, otherwise - failure
920 
921   @ref Backend
922 **/
923 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
924   data->ref_count++;
925   return CEED_ERROR_SUCCESS;
926 }
927 
928 /**
929   @brief Set re-use of CeedQFunctionAssemblyData
930 
931   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
932   @param[in]     reuse_data Boolean flag indicating data re-use
933 
934   @return An error code: 0 - success, otherwise - failure
935 
936   @ref Backend
937 **/
938 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
939   data->reuse_data        = reuse_data;
940   data->needs_data_update = true;
941   return CEED_ERROR_SUCCESS;
942 }
943 
944 /**
945   @brief Mark QFunctionAssemblyData as stale
946 
947   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
948   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
949 
950   @return An error code: 0 - success, otherwise - failure
951 
952   @ref Backend
953 **/
954 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
955   data->needs_data_update = needs_data_update;
956   return CEED_ERROR_SUCCESS;
957 }
958 
959 /**
960   @brief Determine if QFunctionAssemblyData needs update
961 
962   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
963   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
964 
965   @return An error code: 0 - success, otherwise - failure
966 
967   @ref Backend
968 **/
969 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
970   *is_update_needed = !data->reuse_data || data->needs_data_update;
971   return CEED_ERROR_SUCCESS;
972 }
973 
974 /**
975   @brief Copy the pointer to a CeedQFunctionAssemblyData.
976 
977   Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
978 
979   Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
980         CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
981         CeedQFunctionAssemblyData.
982 
983   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
984   @param[in,out] data_copy Variable to store copied reference
985 
986   @return An error code: 0 - success, otherwise - failure
987 
988   @ref Backend
989 **/
990 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
991   CeedCall(CeedQFunctionAssemblyDataReference(data));
992   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
993   *data_copy = data;
994   return CEED_ERROR_SUCCESS;
995 }
996 
997 /**
998   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
999 
1000   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1001   @param[out] is_setup Boolean flag for setup status
1002 
1003   @return An error code: 0 - success, otherwise - failure
1004 
1005   @ref Backend
1006 **/
1007 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1008   *is_setup = data->is_setup;
1009   return CEED_ERROR_SUCCESS;
1010 }
1011 
1012 /**
1013   @brief Set internal objects for CeedQFunctionAssemblyData
1014 
1015   @param[in,out] data CeedQFunctionAssemblyData to set objects
1016   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1017   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1018 
1019   @return An error code: 0 - success, otherwise - failure
1020 
1021   @ref Backend
1022 **/
1023 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1024   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1025   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1026 
1027   data->is_setup = true;
1028   return CEED_ERROR_SUCCESS;
1029 }
1030 
1031 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1032   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1033 
1034   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1035   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1036 
1037   return CEED_ERROR_SUCCESS;
1038 }
1039 
1040 /**
1041   @brief Destroy CeedQFunctionAssemblyData
1042 
1043   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1044 
1045   @return An error code: 0 - success, otherwise - failure
1046 
1047   @ref Backend
1048 **/
1049 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1050   if (!*data || --(*data)->ref_count > 0) {
1051     *data = NULL;
1052     return CEED_ERROR_SUCCESS;
1053   }
1054   CeedCall(CeedDestroy(&(*data)->ceed));
1055   CeedCall(CeedVectorDestroy(&(*data)->vec));
1056   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1057 
1058   CeedCall(CeedFree(data));
1059   return CEED_ERROR_SUCCESS;
1060 }
1061 
1062 /**
1063   @brief Get CeedOperatorAssemblyData
1064 
1065   @param[in]  op   CeedOperator to assemble
1066   @param[out] data CeedQFunctionAssemblyData
1067 
1068   @return An error code: 0 - success, otherwise - failure
1069 
1070   @ref Backend
1071 **/
1072 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1073   if (!op->op_assembled) {
1074     CeedOperatorAssemblyData data;
1075 
1076     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1077     op->op_assembled = data;
1078   }
1079   *data = op->op_assembled;
1080 
1081   return CEED_ERROR_SUCCESS;
1082 }
1083 
1084 /**
1085   @brief Create object holding CeedOperator assembly data.
1086 
1087   The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1088   An array with references to the corresponding active CeedElemRestrictions is also stored.
1089   For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1090   The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1091 CeedEvalMode.
1092   The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1093   Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1094 
1095   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1096   @param[in]  op   CeedOperator to be assembled
1097   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1098 
1099   @return An error code: 0 - success, otherwise - failure
1100 
1101   @ref Backend
1102 **/
1103 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1104   CeedInt num_active_bases = 0;
1105 
1106   // Allocate
1107   CeedCall(CeedCalloc(1, data));
1108   (*data)->ceed = ceed;
1109   CeedCall(CeedReference(ceed));
1110 
1111   // Build OperatorAssembly data
1112   CeedQFunction       qf;
1113   CeedQFunctionField *qf_fields;
1114   CeedOperatorField  *op_fields;
1115   CeedInt             num_input_fields;
1116   CeedCall(CeedOperatorGetQFunction(op, &qf));
1117   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1118   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1119 
1120   // Determine active input basis
1121   CeedInt       *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0;
1122   CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL;
1123   CeedSize     **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1124   for (CeedInt i = 0; i < num_input_fields; i++) {
1125     CeedVector vec;
1126     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1127     if (vec == CEED_VECTOR_ACTIVE) {
1128       CeedBasis    basis_in = NULL;
1129       CeedEvalMode eval_mode;
1130       CeedInt      index = -1, dim, num_comp, q_comp;
1131       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1132       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1133       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1134       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1135       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1136       for (CeedInt i = 0; i < num_active_bases; i++) {
1137         if ((*data)->active_bases[i] == basis_in) index = i;
1138       }
1139       if (index == -1) {
1140         CeedElemRestriction elem_rstr_in;
1141         index = num_active_bases;
1142         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1143         (*data)->active_bases[num_active_bases] = NULL;
1144         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases]));
1145         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1146         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1147         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1148         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases]));
1149         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1150         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1151         num_eval_modes_in[index]  = 0;
1152         num_eval_modes_out[index] = 0;
1153         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1154         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1155         eval_modes_in[index]  = NULL;
1156         eval_modes_out[index] = NULL;
1157         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1158         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1159         eval_mode_offsets_in[index]  = NULL;
1160         eval_mode_offsets_out[index] = NULL;
1161         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1162         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1163         (*data)->assembled_bases_in[index]  = NULL;
1164         (*data)->assembled_bases_out[index] = NULL;
1165         num_active_bases++;
1166       }
1167       if (eval_mode != CEED_EVAL_WEIGHT) {
1168         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1169         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1170         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1171         for (CeedInt d = 0; d < q_comp; d++) {
1172           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1173           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1174           offset += num_comp;
1175         }
1176         num_eval_modes_in[index] += q_comp;
1177       }
1178     }
1179   }
1180   (*data)->num_eval_modes_in    = num_eval_modes_in;
1181   (*data)->eval_modes_in        = eval_modes_in;
1182   (*data)->eval_mode_offsets_in = eval_mode_offsets_in;
1183 
1184   // Determine active output basis
1185   CeedInt num_output_fields;
1186   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1187   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1188   offset = 0;
1189   for (CeedInt i = 0; i < num_output_fields; i++) {
1190     CeedVector vec;
1191     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1192     if (vec == CEED_VECTOR_ACTIVE) {
1193       CeedBasis    basis_out = NULL;
1194       CeedEvalMode eval_mode;
1195       CeedInt      index = -1, dim, num_comp, q_comp;
1196       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1197       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1198       CeedCall(CeedBasisGetDimension(basis_out, &dim));
1199       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1200       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1201       for (CeedInt i = 0; i < num_active_bases; i++) {
1202         if ((*data)->active_bases[i] == basis_out) index = i;
1203       }
1204       if (index == -1) {
1205         CeedElemRestriction elem_rstr_out;
1206 
1207         index = num_active_bases;
1208         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1209         (*data)->active_bases[num_active_bases] = NULL;
1210         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases]));
1211         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1212         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1213         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1214         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases]));
1215         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1216         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1217         num_eval_modes_in[index]  = 0;
1218         num_eval_modes_out[index] = 0;
1219         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1220         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1221         eval_modes_in[index]  = NULL;
1222         eval_modes_out[index] = NULL;
1223         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1224         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1225         eval_mode_offsets_in[index]  = NULL;
1226         eval_mode_offsets_out[index] = NULL;
1227         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1228         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1229         (*data)->assembled_bases_in[index]  = NULL;
1230         (*data)->assembled_bases_out[index] = NULL;
1231         num_active_bases++;
1232       }
1233       if (eval_mode != CEED_EVAL_WEIGHT) {
1234         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1235         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1236         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1237         for (CeedInt d = 0; d < q_comp; d++) {
1238           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1239           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1240           offset += num_comp;
1241         }
1242         num_eval_modes_out[index] += q_comp;
1243       }
1244     }
1245   }
1246   (*data)->num_output_components = offset;
1247   (*data)->num_eval_modes_out    = num_eval_modes_out;
1248   (*data)->eval_modes_out        = eval_modes_out;
1249   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1250   (*data)->num_active_bases      = num_active_bases;
1251 
1252   return CEED_ERROR_SUCCESS;
1253 }
1254 
1255 /**
1256   @brief Get CeedOperator CeedEvalModes for assembly.
1257 
1258   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1259 
1260   @param[in]  data                  CeedOperatorAssemblyData
1261   @param[out] num_active_bases      Total number of active bases
1262   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1263                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1264   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1265   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1266   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1267   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1268   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1269   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1270                                       including contributions of all active bases
1271 
1272   @return An error code: 0 - success, otherwise - failure
1273 
1274 
1275   @ref Backend
1276 **/
1277 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in,
1278                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out,
1279                                          const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) {
1280   if (num_active_bases) *num_active_bases = data->num_active_bases;
1281   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1282   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1283   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1284   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1285   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1286   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1287   if (num_output_components) *num_output_components = data->num_output_components;
1288 
1289   return CEED_ERROR_SUCCESS;
1290 }
1291 
1292 /**
1293   @brief Get CeedOperator CeedBasis data for assembly.
1294 
1295   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1296 
1297   @param[in]  data                CeedOperatorAssemblyData
1298   @param[out] num_active_bases    Number of active bases, or NULL
1299   @param[out] active_bases        Pointer to hold active CeedBasis, or NULL
1300   @param[out] assembled_bases_in  Pointer to hold assembled active input B, or NULL
1301   @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL
1302 
1303   @return An error code: 0 - success, otherwise - failure
1304 
1305   @ref Backend
1306 **/
1307 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases,
1308                                      const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) {
1309   // Assemble B_in, B_out if needed
1310   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1311     CeedInt num_qpts;
1312 
1313     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1314     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1315       CeedInt     num_nodes;
1316       CeedScalar *B_in = NULL, *identity = NULL;
1317       bool        has_eval_none = false;
1318 
1319       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1320       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1321 
1322       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1323         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1324       }
1325       if (has_eval_none) {
1326         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1327         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1328           identity[i * num_nodes + i] = 1.0;
1329         }
1330       }
1331 
1332       for (CeedInt q = 0; q < num_qpts; q++) {
1333         for (CeedInt n = 0; n < num_nodes; n++) {
1334           CeedInt      d_in              = 0, q_comp_in;
1335           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1336           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1337             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1338             const CeedScalar *B  = NULL;
1339             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B);
1340             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in));
1341             if (q_comp_in > 1) {
1342               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1343               else B = &B[(++d_in) * num_qpts * num_nodes];
1344             }
1345             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1346             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1347           }
1348         }
1349       }
1350       if (identity) CeedCall(CeedFree(identity));
1351       data->assembled_bases_in[b] = B_in;
1352     }
1353   }
1354 
1355   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1356     CeedInt num_qpts;
1357 
1358     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1359     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1360       CeedInt     num_nodes;
1361       bool        has_eval_none = false;
1362       CeedScalar *B_out = NULL, *identity = NULL;
1363 
1364       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1365       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1366 
1367       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1368         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1369       }
1370       if (has_eval_none) {
1371         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1372         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1373           identity[i * num_nodes + i] = 1.0;
1374         }
1375       }
1376 
1377       for (CeedInt q = 0; q < num_qpts; q++) {
1378         for (CeedInt n = 0; n < num_nodes; n++) {
1379           CeedInt      d_out              = 0, q_comp_out;
1380           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1381           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1382             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1383             const CeedScalar *B  = NULL;
1384             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B);
1385             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out));
1386             if (q_comp_out > 1) {
1387               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1388               else B = &B[(++d_out) * num_qpts * num_nodes];
1389             }
1390             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1391             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1392           }
1393         }
1394       }
1395       if (identity) CeedCall(CeedFree(identity));
1396       data->assembled_bases_out[b] = B_out;
1397     }
1398   }
1399 
1400   // Pass out assembled data
1401   if (active_bases) *active_bases = data->active_bases;
1402   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1403   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1404 
1405   return CEED_ERROR_SUCCESS;
1406 }
1407 
1408 /**
1409   @brief Get CeedOperator CeedBasis data for assembly.
1410 
1411   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1412 
1413   @param[in]  data                  CeedOperatorAssemblyData
1414   @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL
1415   @param[out] active_elem_rstrs     Pointer to hold active CeedElemRestrictions, or NULL
1416 
1417   @return An error code: 0 - success, otherwise - failure
1418 
1419   @ref Backend
1420 **/
1421 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs,
1422                                                 CeedElemRestriction **active_elem_rstrs) {
1423   if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases;
1424   if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs;
1425 
1426   return CEED_ERROR_SUCCESS;
1427 }
1428 
1429 /**
1430   @brief Destroy CeedOperatorAssemblyData
1431 
1432   @param[in,out] data CeedOperatorAssemblyData to destroy
1433 
1434   @return An error code: 0 - success, otherwise - failure
1435 
1436   @ref Backend
1437 **/
1438 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1439   if (!*data) {
1440     *data = NULL;
1441     return CEED_ERROR_SUCCESS;
1442   }
1443   CeedCall(CeedDestroy(&(*data)->ceed));
1444   for (CeedInt b = 0; b < (*data)->num_active_bases; b++) {
1445     CeedCall(CeedBasisDestroy(&(*data)->active_bases[b]));
1446     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b]));
1447     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1448     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1449     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1450     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1451     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1452     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1453   }
1454   CeedCall(CeedFree(&(*data)->active_bases));
1455   CeedCall(CeedFree(&(*data)->active_elem_rstrs));
1456   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1457   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1458   CeedCall(CeedFree(&(*data)->eval_modes_in));
1459   CeedCall(CeedFree(&(*data)->eval_modes_out));
1460   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1461   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1462   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1463   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1464 
1465   CeedCall(CeedFree(data));
1466   return CEED_ERROR_SUCCESS;
1467 }
1468 
1469 /// @}
1470 
1471 /// ----------------------------------------------------------------------------
1472 /// CeedOperator Public API
1473 /// ----------------------------------------------------------------------------
1474 /// @addtogroup CeedOperatorUser
1475 /// @{
1476 
1477 /**
1478   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1479 
1480   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1481   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1482 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1483 
1484   Inputs and outputs are in the order provided by the user when adding CeedOperator fields.
1485   For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction
1486 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1487 
1488   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1489 
1490   @param[in]  op        CeedOperator to assemble CeedQFunction
1491   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1492   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1493   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1494 
1495   @return An error code: 0 - success, otherwise - failure
1496 
1497   @ref User
1498 **/
1499 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1500   CeedCall(CeedOperatorCheckReady(op));
1501 
1502   if (op->LinearAssembleQFunction) {
1503     // Backend version
1504     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1505   } else {
1506     // Operator fallback
1507     CeedOperator op_fallback;
1508 
1509     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1510     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1511     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1512   }
1513   return CEED_ERROR_SUCCESS;
1514 }
1515 
1516 /**
1517   @brief Assemble CeedQFunction and store result internally.
1518 
1519   Return copied references of stored data to the caller.
1520   Caller is responsible for ownership and destruction of the copied references.
1521   See also @ref CeedOperatorLinearAssembleQFunction
1522 
1523   @param[in]  op        CeedOperator to assemble CeedQFunction
1524   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1525   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1526   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1527 
1528   @return An error code: 0 - success, otherwise - failure
1529 
1530   @ref User
1531 **/
1532 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1533   CeedCall(CeedOperatorCheckReady(op));
1534 
1535   if (op->LinearAssembleQFunctionUpdate) {
1536     // Backend version
1537     bool                qf_assembled_is_setup;
1538     CeedVector          assembled_vec  = NULL;
1539     CeedElemRestriction assembled_rstr = NULL;
1540 
1541     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1542     if (qf_assembled_is_setup) {
1543       bool update_needed;
1544 
1545       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1546       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1547       if (update_needed) {
1548         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1549       }
1550     } else {
1551       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1552       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1553     }
1554     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1555 
1556     // Copy reference from internally held copy
1557     *assembled = NULL;
1558     *rstr      = NULL;
1559     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1560     CeedCall(CeedVectorDestroy(&assembled_vec));
1561     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1562     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1563   } else {
1564     // Operator fallback
1565     CeedOperator op_fallback;
1566 
1567     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1568     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1569     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1570   }
1571 
1572   return CEED_ERROR_SUCCESS;
1573 }
1574 
1575 /**
1576   @brief Assemble the diagonal of a square linear CeedOperator
1577 
1578   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1579 
1580   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1581 
1582   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1583 
1584   @param[in]  op        CeedOperator to assemble CeedQFunction
1585   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1586   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1587 
1588   @return An error code: 0 - success, otherwise - failure
1589 
1590   @ref User
1591 **/
1592 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1593   bool is_composite;
1594   CeedCall(CeedOperatorCheckReady(op));
1595   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1596 
1597   CeedSize input_size = 0, output_size = 0;
1598   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1599   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1600 
1601   // Early exit for empty operator
1602   if (!is_composite) {
1603     CeedInt num_elem = 0;
1604 
1605     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1606     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1607   }
1608 
1609   if (op->LinearAssembleDiagonal) {
1610     // Backend version
1611     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1612     return CEED_ERROR_SUCCESS;
1613   } else if (op->LinearAssembleAddDiagonal) {
1614     // Backend version with zeroing first
1615     CeedCall(CeedVectorSetValue(assembled, 0.0));
1616     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1617     return CEED_ERROR_SUCCESS;
1618   } else {
1619     // Operator fallback
1620     CeedOperator op_fallback;
1621 
1622     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1623     if (op_fallback) {
1624       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1625       return CEED_ERROR_SUCCESS;
1626     }
1627   }
1628   // Default interface implementation
1629   CeedCall(CeedVectorSetValue(assembled, 0.0));
1630   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1631 
1632   return CEED_ERROR_SUCCESS;
1633 }
1634 
1635 /**
1636   @brief Assemble the diagonal of a square linear CeedOperator
1637 
1638   This sums into a CeedVector the diagonal of a linear CeedOperator.
1639 
1640   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1641 
1642   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1643 
1644   @param[in]  op        CeedOperator to assemble CeedQFunction
1645   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1646   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1647 
1648   @return An error code: 0 - success, otherwise - failure
1649 
1650   @ref User
1651 **/
1652 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1653   bool is_composite;
1654   CeedCall(CeedOperatorCheckReady(op));
1655   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1656 
1657   CeedSize input_size = 0, output_size = 0;
1658   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1659   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1660 
1661   // Early exit for empty operator
1662   if (!is_composite) {
1663     CeedInt num_elem = 0;
1664 
1665     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1666     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1667   }
1668 
1669   if (op->LinearAssembleAddDiagonal) {
1670     // Backend version
1671     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1672     return CEED_ERROR_SUCCESS;
1673   } else {
1674     // Operator fallback
1675     CeedOperator op_fallback;
1676 
1677     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1678     if (op_fallback) {
1679       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1680       return CEED_ERROR_SUCCESS;
1681     }
1682   }
1683   // Default interface implementation
1684   if (is_composite) {
1685     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1686   } else {
1687     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1688   }
1689 
1690   return CEED_ERROR_SUCCESS;
1691 }
1692 
1693 /**
1694   @brief Assemble the point block diagonal of a square linear CeedOperator
1695 
1696   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1697 
1698   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1699 
1700   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1701 
1702   @param[in]  op        CeedOperator to assemble CeedQFunction
1703   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1704 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1705 component in].
1706   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1707 
1708   @return An error code: 0 - success, otherwise - failure
1709 
1710   @ref User
1711 **/
1712 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1713   bool is_composite;
1714   CeedCall(CeedOperatorCheckReady(op));
1715   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1716 
1717   CeedSize input_size = 0, output_size = 0;
1718   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1719   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1720 
1721   // Early exit for empty operator
1722   if (!is_composite) {
1723     CeedInt num_elem = 0;
1724 
1725     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1726     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1727   }
1728 
1729   if (op->LinearAssemblePointBlockDiagonal) {
1730     // Backend version
1731     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1732     return CEED_ERROR_SUCCESS;
1733   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1734     // Backend version with zeroing first
1735     CeedCall(CeedVectorSetValue(assembled, 0.0));
1736     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1737     return CEED_ERROR_SUCCESS;
1738   } else {
1739     // Operator fallback
1740     CeedOperator op_fallback;
1741 
1742     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1743     if (op_fallback) {
1744       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1745       return CEED_ERROR_SUCCESS;
1746     }
1747   }
1748   // Default interface implementation
1749   CeedCall(CeedVectorSetValue(assembled, 0.0));
1750   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1751 
1752   return CEED_ERROR_SUCCESS;
1753 }
1754 
1755 /**
1756   @brief Assemble the point block diagonal of a square linear CeedOperator
1757 
1758   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1759 
1760   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1761 
1762   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1763 
1764   @param[in]  op        CeedOperator to assemble CeedQFunction
1765   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1766 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1767 component in].
1768   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1769 
1770   @return An error code: 0 - success, otherwise - failure
1771 
1772   @ref User
1773 **/
1774 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1775   bool is_composite;
1776   CeedCall(CeedOperatorCheckReady(op));
1777   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1778 
1779   CeedSize input_size = 0, output_size = 0;
1780   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1781   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1782 
1783   // Early exit for empty operator
1784   if (!is_composite) {
1785     CeedInt num_elem = 0;
1786 
1787     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1788     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1789   }
1790 
1791   if (op->LinearAssembleAddPointBlockDiagonal) {
1792     // Backend version
1793     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1794     return CEED_ERROR_SUCCESS;
1795   } else {
1796     // Operator fallback
1797     CeedOperator op_fallback;
1798 
1799     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1800     if (op_fallback) {
1801       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1802       return CEED_ERROR_SUCCESS;
1803     }
1804   }
1805   // Default interface implementation
1806   if (is_composite) {
1807     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1808   } else {
1809     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1810   }
1811 
1812   return CEED_ERROR_SUCCESS;
1813 }
1814 
1815 /**
1816    @brief Fully assemble the nonzero pattern of a linear operator.
1817 
1818    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1819 
1820    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1821 matrix in entry (i, j).
1822   Note that the (i, j) pairs are not unique and may repeat.
1823   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1824 
1825    This will generally be slow unless your operator is low-order.
1826 
1827    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1828 
1829    @param[in]  op          CeedOperator to assemble
1830    @param[out] num_entries Number of entries in coordinate nonzero pattern
1831    @param[out] rows        Row number for each entry
1832    @param[out] cols        Column number for each entry
1833 
1834    @ref User
1835 **/
1836 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1837   CeedInt       num_suboperators, single_entries;
1838   CeedOperator *sub_operators;
1839   bool          is_composite;
1840   CeedCall(CeedOperatorCheckReady(op));
1841   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1842 
1843   if (op->LinearAssembleSymbolic) {
1844     // Backend version
1845     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1846     return CEED_ERROR_SUCCESS;
1847   } else {
1848     // Operator fallback
1849     CeedOperator op_fallback;
1850 
1851     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1852     if (op_fallback) {
1853       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1854       return CEED_ERROR_SUCCESS;
1855     }
1856   }
1857 
1858   // Default interface implementation
1859 
1860   // count entries and allocate rows, cols arrays
1861   *num_entries = 0;
1862   if (is_composite) {
1863     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1864     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1865     for (CeedInt k = 0; k < num_suboperators; ++k) {
1866       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1867       *num_entries += single_entries;
1868     }
1869   } else {
1870     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1871     *num_entries += single_entries;
1872   }
1873   CeedCall(CeedCalloc(*num_entries, rows));
1874   CeedCall(CeedCalloc(*num_entries, cols));
1875 
1876   // assemble nonzero locations
1877   CeedInt offset = 0;
1878   if (is_composite) {
1879     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1880     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1881     for (CeedInt k = 0; k < num_suboperators; ++k) {
1882       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1883       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1884       offset += single_entries;
1885     }
1886   } else {
1887     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1888   }
1889 
1890   return CEED_ERROR_SUCCESS;
1891 }
1892 
1893 /**
1894    @brief Fully assemble the nonzero entries of a linear operator.
1895 
1896    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1897 
1898    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1899 matrix in entry (i, j).
1900   Note that the (i, j) pairs are not unique and may repeat.
1901   This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1902 
1903    This will generally be slow unless your operator is low-order.
1904 
1905    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1906 
1907    @param[in]  op     CeedOperator to assemble
1908    @param[out] values Values to assemble into matrix
1909 
1910    @ref User
1911 **/
1912 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1913   CeedInt       num_suboperators, single_entries = 0;
1914   CeedOperator *sub_operators;
1915   bool          is_composite;
1916   CeedCall(CeedOperatorCheckReady(op));
1917   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1918 
1919   // Early exit for empty operator
1920   if (!is_composite) {
1921     CeedInt num_elem = 0;
1922 
1923     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1924     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1925   }
1926 
1927   if (op->LinearAssemble) {
1928     // Backend version
1929     CeedCall(op->LinearAssemble(op, values));
1930     return CEED_ERROR_SUCCESS;
1931   } else {
1932     // Operator fallback
1933     CeedOperator op_fallback;
1934 
1935     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1936     if (op_fallback) {
1937       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
1938       return CEED_ERROR_SUCCESS;
1939     }
1940   }
1941 
1942   // Default interface implementation
1943   CeedInt offset = 0;
1944   CeedCall(CeedVectorSetValue(values, 0.0));
1945   if (is_composite) {
1946     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1947     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1948     for (CeedInt k = 0; k < num_suboperators; k++) {
1949       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
1950       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1951       offset += single_entries;
1952     }
1953   } else {
1954     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
1955   }
1956 
1957   return CEED_ERROR_SUCCESS;
1958 }
1959 
1960 /**
1961   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
1962 
1963   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1964 
1965   @param[in]  op               Composite CeedOperator
1966   @param[in]  num_skip_indices Number of suboperators to skip
1967   @param[in]  skip_indices     Array of indices of suboperators to skip
1968   @param[out] mult             Vector to store multiplicity (of size l_size)
1969 
1970   @return An error code: 0 - success, otherwise - failure
1971 
1972   @ref User
1973 **/
1974 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
1975   CeedCall(CeedOperatorCheckReady(op));
1976 
1977   Ceed                ceed;
1978   CeedInt             num_suboperators;
1979   CeedSize            l_vec_len;
1980   CeedScalar         *mult_array;
1981   CeedVector          ones_l_vec;
1982   CeedElemRestriction elem_rstr;
1983   CeedOperator       *sub_operators;
1984 
1985   CeedCall(CeedOperatorGetCeed(op, &ceed));
1986 
1987   // Zero mult vector
1988   CeedCall(CeedVectorSetValue(mult, 0.0));
1989 
1990   // Get suboperators
1991   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1992   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1993   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
1994 
1995   // Work vector
1996   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
1997   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
1998   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
1999   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2000 
2001   // Compute multiplicity across suboperators
2002   for (CeedInt i = 0; i < num_suboperators; i++) {
2003     const CeedScalar *sub_mult_array;
2004     CeedVector        sub_mult_l_vec, ones_e_vec;
2005 
2006     // -- Check for suboperator to skip
2007     for (CeedInt j = 0; j < num_skip_indices; j++) {
2008       if (skip_indices[j] == i) continue;
2009     }
2010 
2011     // -- Sub operator multiplicity
2012     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2013     CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2014     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2015     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2016     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2017     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2018     // ---- Flag every node present in the current suboperator
2019     for (CeedInt j = 0; j < l_vec_len; j++) {
2020       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2021     }
2022     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2023     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2024     CeedCall(CeedVectorDestroy(&ones_e_vec));
2025   }
2026   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2027   CeedCall(CeedVectorDestroy(&ones_l_vec));
2028 
2029   return CEED_ERROR_SUCCESS;
2030 }
2031 
2032 /**
2033   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2034 grid interpolation
2035 
2036   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2037 
2038   @param[in]  op_fine      Fine grid operator
2039   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2040   @param[in]  rstr_coarse  Coarse grid restriction
2041   @param[in]  basis_coarse Coarse grid active vector basis
2042   @param[out] op_coarse    Coarse grid operator
2043   @param[out] op_prolong   Coarse to fine operator, or NULL
2044   @param[out] op_restrict  Fine to coarse operator, or NULL
2045 
2046   @return An error code: 0 - success, otherwise - failure
2047 
2048   @ref User
2049 **/
2050 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2051                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2052   CeedCall(CeedOperatorCheckReady(op_fine));
2053 
2054   // Build prolongation matrix, if required
2055   CeedBasis basis_c_to_f = NULL;
2056   if (op_prolong || op_restrict) {
2057     CeedBasis basis_fine;
2058     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2059     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2060   }
2061 
2062   // Core code
2063   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2064 
2065   return CEED_ERROR_SUCCESS;
2066 }
2067 
2068 /**
2069   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2070 
2071   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2072 
2073   @param[in]  op_fine       Fine grid operator
2074   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2075   @param[in]  rstr_coarse   Coarse grid restriction
2076   @param[in]  basis_coarse  Coarse grid active vector basis
2077   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2078   @param[out] op_coarse     Coarse grid operator
2079   @param[out] op_prolong    Coarse to fine operator, or NULL
2080   @param[out] op_restrict   Fine to coarse operator, or NULL
2081 
2082   @return An error code: 0 - success, otherwise - failure
2083 
2084   @ref User
2085 **/
2086 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2087                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2088                                              CeedOperator *op_restrict) {
2089   CeedCall(CeedOperatorCheckReady(op_fine));
2090   Ceed ceed;
2091   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2092 
2093   // Check for compatible quadrature spaces
2094   CeedBasis basis_fine;
2095   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2096   CeedInt Q_f, Q_c;
2097   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2098   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2099   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2100 
2101   // Create coarse to fine basis, if required
2102   CeedBasis basis_c_to_f = NULL;
2103   if (op_prolong || op_restrict) {
2104     // Check if interpolation matrix is provided
2105     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2106               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2107     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2108     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2109     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2110     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2111     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2112     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2113     CeedScalar *q_ref, *q_weight, *grad;
2114     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2115     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2116     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2117     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2118     CeedCall(CeedFree(&q_ref));
2119     CeedCall(CeedFree(&q_weight));
2120     CeedCall(CeedFree(&grad));
2121   }
2122 
2123   // Core code
2124   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2125   return CEED_ERROR_SUCCESS;
2126 }
2127 
2128 /**
2129   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2130 
2131   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2132 
2133   @param[in]  op_fine       Fine grid operator
2134   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2135   @param[in]  rstr_coarse   Coarse grid restriction
2136   @param[in]  basis_coarse  Coarse grid active vector basis
2137   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2138   @param[out] op_coarse     Coarse grid operator
2139   @param[out] op_prolong    Coarse to fine operator, or NULL
2140   @param[out] op_restrict   Fine to coarse operator, or NULL
2141 
2142   @return An error code: 0 - success, otherwise - failure
2143 
2144   @ref User
2145 **/
2146 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2147                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2148                                        CeedOperator *op_restrict) {
2149   CeedCall(CeedOperatorCheckReady(op_fine));
2150   Ceed ceed;
2151   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2152 
2153   // Check for compatible quadrature spaces
2154   CeedBasis basis_fine;
2155   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2156   CeedInt Q_f, Q_c;
2157   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2158   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2159   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2160 
2161   // Coarse to fine basis
2162   CeedBasis basis_c_to_f = NULL;
2163   if (op_prolong || op_restrict) {
2164     // Check if interpolation matrix is provided
2165     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2166               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2167     CeedElemTopology topo;
2168     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2169     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2170     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2171     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2172     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2173     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2174     CeedScalar *q_ref, *q_weight, *grad;
2175     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2176     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2177     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2178     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2179     CeedCall(CeedFree(&q_ref));
2180     CeedCall(CeedFree(&q_weight));
2181     CeedCall(CeedFree(&grad));
2182   }
2183 
2184   // Core code
2185   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2186   return CEED_ERROR_SUCCESS;
2187 }
2188 
2189 /**
2190   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2191 
2192   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2193   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2194   The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2195 \hat S V\f$.
2196   The CeedOperator must be linear and non-composite.
2197   The associated CeedQFunction must therefore also be linear.
2198 
2199   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2200 
2201   @param[in]  op      CeedOperator to create element inverses
2202   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2203   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2204 
2205   @return An error code: 0 - success, otherwise - failure
2206 
2207   @ref User
2208 **/
2209 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2210   CeedCall(CeedOperatorCheckReady(op));
2211 
2212   if (op->CreateFDMElementInverse) {
2213     // Backend version
2214     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2215     return CEED_ERROR_SUCCESS;
2216   } else {
2217     // Operator fallback
2218     CeedOperator op_fallback;
2219 
2220     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2221     if (op_fallback) {
2222       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2223       return CEED_ERROR_SUCCESS;
2224     }
2225   }
2226 
2227   // Default interface implementation
2228   Ceed ceed, ceed_parent;
2229   CeedCall(CeedOperatorGetCeed(op, &ceed));
2230   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2231   ceed_parent = ceed_parent ? ceed_parent : ceed;
2232   CeedQFunction qf;
2233   CeedCall(CeedOperatorGetQFunction(op, &qf));
2234 
2235   // Determine active input basis
2236   bool                interp = false, grad = false;
2237   CeedBasis           basis = NULL;
2238   CeedElemRestriction rstr  = NULL;
2239   CeedOperatorField  *op_fields;
2240   CeedQFunctionField *qf_fields;
2241   CeedInt             num_input_fields;
2242   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2243   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2244   for (CeedInt i = 0; i < num_input_fields; i++) {
2245     CeedVector vec;
2246     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2247     if (vec == CEED_VECTOR_ACTIVE) {
2248       CeedEvalMode eval_mode;
2249       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2250       interp = interp || eval_mode == CEED_EVAL_INTERP;
2251       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2252       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2253       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2254     }
2255   }
2256   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2257   CeedSize l_size = 1;
2258   CeedInt  P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2259   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2260   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2261   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2262   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2263   CeedCall(CeedBasisGetDimension(basis, &dim));
2264   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2265   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2266   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2267 
2268   // Build and diagonalize 1D Mass and Laplacian
2269   bool is_tensor_basis;
2270   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2271   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2272   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2273   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2274   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2275   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2276   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2277   CeedCall(CeedCalloc(P_1d, &lambda));
2278   // -- Build matrices
2279   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2280   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2281   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2282   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2283   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2284 
2285   // -- Diagonalize
2286   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2287   CeedCall(CeedFree(&mass));
2288   CeedCall(CeedFree(&laplace));
2289   for (CeedInt i = 0; i < P_1d; i++) {
2290     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2291   }
2292   CeedCall(CeedFree(&x));
2293 
2294   // Assemble QFunction
2295   CeedVector          assembled;
2296   CeedElemRestriction rstr_qf;
2297   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2298   CeedInt layout[3];
2299   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2300   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2301   CeedScalar max_norm = 0;
2302   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2303 
2304   // Calculate element averages
2305   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2306   CeedScalar       *elem_avg;
2307   const CeedScalar *assembled_array, *q_weight_array;
2308   CeedVector        q_weight;
2309   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2310   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2311   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2312   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2313   CeedCall(CeedCalloc(num_elem, &elem_avg));
2314   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2315   for (CeedInt e = 0; e < num_elem; e++) {
2316     CeedInt count = 0;
2317     for (CeedInt q = 0; q < num_qpts; q++) {
2318       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2319         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2320           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2321           count++;
2322         }
2323       }
2324     }
2325     if (count) {
2326       elem_avg[e] /= count;
2327     } else {
2328       elem_avg[e] = 1.0;
2329     }
2330   }
2331   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2332   CeedCall(CeedVectorDestroy(&assembled));
2333   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2334   CeedCall(CeedVectorDestroy(&q_weight));
2335 
2336   // Build FDM diagonal
2337   CeedVector  q_data;
2338   CeedScalar *q_data_array, *fdm_diagonal;
2339   CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2340   const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2341   for (CeedInt c = 0; c < num_comp; c++) {
2342     for (CeedInt n = 0; n < num_nodes; n++) {
2343       if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2344       if (grad) {
2345         for (CeedInt d = 0; d < dim; d++) {
2346           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2347           fdm_diagonal[c * num_nodes + n] += lambda[i];
2348         }
2349       }
2350       if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2351     }
2352   }
2353   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2354   CeedCall(CeedVectorSetValue(q_data, 0.0));
2355   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2356   for (CeedInt e = 0; e < num_elem; e++) {
2357     for (CeedInt c = 0; c < num_comp; c++) {
2358       for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2359     }
2360   }
2361   CeedCall(CeedFree(&elem_avg));
2362   CeedCall(CeedFree(&fdm_diagonal));
2363   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2364 
2365   // Setup FDM operator
2366   // -- Basis
2367   CeedBasis   fdm_basis;
2368   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2369   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2370   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2371   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2372   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2373   CeedCall(CeedFree(&fdm_interp));
2374   CeedCall(CeedFree(&grad_dummy));
2375   CeedCall(CeedFree(&q_ref_dummy));
2376   CeedCall(CeedFree(&q_weight_dummy));
2377   CeedCall(CeedFree(&lambda));
2378 
2379   // -- Restriction
2380   CeedElemRestriction rstr_qd_i;
2381   CeedInt             strides[3] = {1, num_nodes, num_nodes * num_comp};
2382   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2383   // -- QFunction
2384   CeedQFunction qf_fdm;
2385   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2386   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2387   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2388   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2389   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2390   // -- QFunction context
2391   CeedInt *num_comp_data;
2392   CeedCall(CeedCalloc(1, &num_comp_data));
2393   num_comp_data[0] = num_comp;
2394   CeedQFunctionContext ctx_fdm;
2395   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2396   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2397   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2398   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2399   // -- Operator
2400   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2401   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2402   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2403   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2404 
2405   // Cleanup
2406   CeedCall(CeedVectorDestroy(&q_data));
2407   CeedCall(CeedBasisDestroy(&fdm_basis));
2408   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2409   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2410 
2411   return CEED_ERROR_SUCCESS;
2412 }
2413 
2414 /// @}
2415