xref: /libCEED/interface/ceed-preconditioning.c (revision b05f7e9f18319d38f6812c09171b3c228477e742)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = NULL;
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
128     // Cleanup
129     CeedCall(CeedQFunctionDestroy(&qf_fallback));
130     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
131     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
132   }
133   CeedCall(CeedOperatorSetName(op_fallback, op->name));
134   CeedCall(CeedOperatorCheckReady(op_fallback));
135   // Note: No ref-counting here so we don't get caught in a reference loop.
136   //       The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback.
137   op->op_fallback                 = op_fallback;
138   op_fallback->op_fallback_parent = op;
139 
140   return CEED_ERROR_SUCCESS;
141 }
142 
143 /**
144   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
145 
146   @param[in]  op          CeedOperator to retrieve fallback for
147   @param[out] op_fallback Fallback CeedOperator
148 
149   @return An error code: 0 - success, otherwise - failure
150 
151   @ref Developer
152 **/
153 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
154   // Create if needed
155   if (!op->op_fallback) {
156     CeedCall(CeedOperatorCreateFallback(op));
157   }
158   if (op->op_fallback) {
159     bool is_debug;
160 
161     CeedCall(CeedIsDebug(op->ceed, &is_debug));
162     if (is_debug) {
163       Ceed        ceed, ceed_fallback;
164       const char *resource, *resource_fallback;
165 
166       CeedCall(CeedOperatorGetCeed(op, &ceed));
167       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
168       CeedCall(CeedGetResource(ceed, &resource));
169       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
170 
171       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
172       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
173                 op->op_fallback);
174     }
175   }
176   *op_fallback = op->op_fallback;
177 
178   return CEED_ERROR_SUCCESS;
179 }
180 
181 /**
182   @brief Select correct basis matrix pointer based on CeedEvalMode
183 
184   @param[in]  basis     CeedBasis from which to get the basis matrix
185   @param[in]  eval_mode Current basis evaluation mode
186   @param[in]  identity  Pointer to identity matrix
187   @param[out] basis_ptr Basis pointer to set
188 
189   @ref Developer
190 **/
191 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
192   switch (eval_mode) {
193     case CEED_EVAL_NONE:
194       *basis_ptr = identity;
195       break;
196     case CEED_EVAL_INTERP:
197       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
198       break;
199     case CEED_EVAL_GRAD:
200       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
201       break;
202     case CEED_EVAL_DIV:
203       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
204       break;
205     case CEED_EVAL_CURL:
206       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
207       break;
208     case CEED_EVAL_WEIGHT:
209       break;  // Caught by QF Assembly
210   }
211   assert(*basis_ptr != NULL);
212 
213   return CEED_ERROR_SUCCESS;
214 }
215 
216 /**
217   @brief Create point block restriction for active operator field
218 
219   @param[in]  rstr            Original CeedElemRestriction for active field
220   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
221 
222   @return An error code: 0 - success, otherwise - failure
223 
224   @ref Developer
225 **/
226 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
227   Ceed ceed;
228   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
229   const CeedInt *offsets;
230   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
231 
232   // Expand offsets
233   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
234   CeedSize l_size;
235   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
236   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
237   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
238   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
239   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
240   CeedInt shift = num_comp;
241   if (comp_stride != 1) shift *= num_comp;
242   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
243   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
244     pointblock_offsets[i] = offsets[i] * shift;
245   }
246 
247   // Create new restriction
248   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
249                                      pointblock_offsets, pointblock_rstr));
250 
251   // Cleanup
252   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
253 
254   return CEED_ERROR_SUCCESS;
255 }
256 
257 /**
258   @brief Core logic for assembling operator diagonal or point block diagonal
259 
260   @param[in]  op            CeedOperator to assemble point block diagonal
261   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
262   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
263   @param[out] assembled     CeedVector to store assembled diagonal
264 
265   @return An error code: 0 - success, otherwise - failure
266 
267   @ref Developer
268 **/
269 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
270   Ceed ceed;
271   CeedCall(CeedOperatorGetCeed(op, &ceed));
272 
273   // Assemble QFunction
274   CeedQFunction       qf;
275   const CeedScalar   *assembled_qf_array;
276   CeedVector          assembled_qf        = NULL;
277   CeedElemRestriction assembled_elem_rstr = NULL;
278   CeedInt             num_input_fields, num_output_fields;
279   CeedInt             layout[3];
280 
281   CeedCall(CeedOperatorGetQFunction(op, &qf));
282   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
283   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
284   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout));
285   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
286   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
287 
288   // Get assembly data
289   CeedOperatorAssemblyData data;
290   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
291   CeedInt                 *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
292   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
293   CeedBasis               *active_bases;
294   CeedElemRestriction     *active_elem_rstrs;
295   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
296   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
297                                                 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components));
298   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL));
299   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs));
300 
301   // Loop over all active bases
302   for (CeedInt b = 0; b < num_active_bases; b++) {
303     // Assemble point block diagonal restriction, if needed
304     CeedElemRestriction diag_elem_rstr = active_elem_rstrs[b];
305 
306     if (is_pointblock) {
307       CeedElemRestriction point_block_elem_rstr;
308 
309       CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_elem_rstr, &point_block_elem_rstr));
310       diag_elem_rstr = point_block_elem_rstr;
311     }
312 
313     // Create diagonal vector
314     CeedVector elem_diag;
315     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
316 
317     // Assemble element operator diagonals
318     CeedScalar *elem_diag_array;
319     CeedInt     num_elem, num_nodes, num_qpts, num_components;
320 
321     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
322     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
323     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
324     CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes));
325     CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components));
326     CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts));
327 
328     // Construct identity matrix for basis if required
329     bool        has_eval_none = false;
330     CeedScalar *identity      = NULL;
331     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
332       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
333     }
334     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
335       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
336     }
337     if (has_eval_none) {
338       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
339       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
340     }
341 
342     // Compute the diagonal of B^T D B
343     // Each element
344     for (CeedSize e = 0; e < num_elem; e++) {
345       // Each basis eval mode pair
346       CeedInt      d_out              = 0, q_comp_out;
347       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
348       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
349         const CeedScalar *B_t = NULL;
350         CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t);
351         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out));
352         if (q_comp_out > 1) {
353           if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
354           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
355         }
356         eval_mode_out_prev = eval_modes_out[b][e_out];
357 
358         CeedInt      d_in              = 0, q_comp_in;
359         CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
360         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
361           const CeedScalar *B = NULL;
362           CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B);
363           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in));
364           if (q_comp_in > 1) {
365             if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
366             else B = &B[(++d_in) * num_qpts * num_nodes];
367           }
368           eval_mode_in_prev = eval_modes_in[b][e_in];
369 
370           // Each component
371           for (CeedInt c_out = 0; c_out < num_components; c_out++) {
372             // Each qpt/node pair
373             for (CeedInt q = 0; q < num_qpts; q++) {
374               if (is_pointblock) {
375                 // Point Block Diagonal
376                 for (CeedInt c_in = 0; c_in < num_components; c_in++) {
377                   const CeedSize c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
378                   const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
379                   for (CeedInt n = 0; n < num_nodes; n++) {
380                     elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] +=
381                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
382                   }
383                 }
384               } else {
385                 // Diagonal Only
386                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
387                 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
388                 for (CeedInt n = 0; n < num_nodes; n++) {
389                   elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
390                 }
391               }
392             }
393           }
394         }
395       }
396     }
397     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
398 
399     // Assemble local operator diagonal
400     CeedCall(CeedElemRestrictionApplyUnsigned(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
401 
402     // Cleanup
403     if (is_pointblock) CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
404     CeedCall(CeedVectorDestroy(&elem_diag));
405     CeedCall(CeedFree(&identity));
406   }
407   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
408   CeedCall(CeedVectorDestroy(&assembled_qf));
409 
410   return CEED_ERROR_SUCCESS;
411 }
412 
413 /**
414   @brief Core logic for assembling composite operator diagonal
415 
416   @param[in]  op            CeedOperator to assemble point block diagonal
417   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
418   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
419   @param[out] assembled     CeedVector to store assembled diagonal
420 
421   @return An error code: 0 - success, otherwise - failure
422 
423   @ref Developer
424 **/
425 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
426                                                                  CeedVector assembled) {
427   CeedInt       num_sub;
428   CeedOperator *suboperators;
429   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
430   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
431   for (CeedInt i = 0; i < num_sub; i++) {
432     if (is_pointblock) {
433       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
434     } else {
435       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
436     }
437   }
438   return CEED_ERROR_SUCCESS;
439 }
440 
441 /**
442   @brief Build nonzero pattern for non-composite operator
443 
444   Users should generally use CeedOperatorLinearAssembleSymbolic()
445 
446   Note: For operators using oriented element restrictions, entries in rows or cols may be negative indicating the assembled value at this nonzero
447 should be negated
448 
449   @param[in]  op     CeedOperator to assemble nonzero pattern
450   @param[in]  offset Offset for number of entries
451   @param[out] rows   Row number for each entry
452   @param[out] cols   Column number for each entry
453 
454   @return An error code: 0 - success, otherwise - failure
455 
456   @ref Developer
457 **/
458 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
459   Ceed ceed;
460   bool is_composite;
461   CeedCall(CeedOperatorGetCeed(op, &ceed));
462   CeedCall(CeedOperatorIsComposite(op, &is_composite));
463 
464   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
465 
466   CeedSize num_nodes;
467   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
468   CeedElemRestriction rstr_in;
469   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
470   CeedInt num_elem, elem_size, num_comp;
471   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
472   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
473   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
474   CeedInt layout_er[3];
475   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
476 
477   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
478 
479   // Determine elem_dof relation
480   CeedVector index_vec;
481   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
482   CeedScalar *array;
483   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
484   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
485   CeedCall(CeedVectorRestoreArray(index_vec, &array));
486   CeedVector elem_dof;
487   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
488   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
489   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
490   const CeedScalar *elem_dof_a;
491   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
492   CeedCall(CeedVectorDestroy(&index_vec));
493 
494   // Determine i, j locations for element matrices
495   CeedSize count = 0;
496   for (CeedInt e = 0; e < num_elem; e++) {
497     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
498       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
499         for (CeedInt i = 0; i < elem_size; i++) {
500           for (CeedInt j = 0; j < elem_size; j++) {
501             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
502             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
503 
504             const CeedInt row = elem_dof_a[elem_dof_index_row];
505             const CeedInt col = elem_dof_a[elem_dof_index_col];
506 
507             rows[offset + count] = row;
508             cols[offset + count] = col;
509             count++;
510           }
511         }
512       }
513     }
514   }
515   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
516   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
517   CeedCall(CeedVectorDestroy(&elem_dof));
518 
519   return CEED_ERROR_SUCCESS;
520 }
521 
522 /**
523   @brief Assemble nonzero entries for non-composite operator
524 
525   Users should generally use CeedOperatorLinearAssemble()
526 
527   @param[in]  op     CeedOperator to assemble
528   @param[in]  offset Offset for number of entries
529   @param[out] values Values to assemble into matrix
530 
531   @return An error code: 0 - success, otherwise - failure
532 
533   @ref Developer
534 **/
535 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
536   Ceed ceed;
537   bool is_composite;
538   CeedCall(CeedOperatorGetCeed(op, &ceed));
539   CeedCall(CeedOperatorIsComposite(op, &is_composite));
540 
541   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
542 
543   // Early exit for empty operator
544   {
545     CeedInt num_elem = 0;
546 
547     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
548     if (num_elem == 0) return CEED_ERROR_SUCCESS;
549   }
550 
551   if (op->LinearAssembleSingle) {
552     // Backend version
553     CeedCall(op->LinearAssembleSingle(op, offset, values));
554     return CEED_ERROR_SUCCESS;
555   } else {
556     // Operator fallback
557     CeedOperator op_fallback;
558 
559     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
560     if (op_fallback) {
561       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
562       return CEED_ERROR_SUCCESS;
563     }
564   }
565 
566   // Assemble QFunction
567   CeedQFunction qf;
568   CeedCall(CeedOperatorGetQFunction(op, &qf));
569   CeedVector          assembled_qf = NULL;
570   CeedElemRestriction rstr_q       = NULL;
571   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
572   CeedSize qf_length;
573   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
574 
575   CeedInt            num_input_fields, num_output_fields;
576   CeedOperatorField *input_fields;
577   CeedOperatorField *output_fields;
578   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
579 
580   // Get assembly data
581   CeedOperatorAssemblyData data;
582   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
583   const CeedEvalMode **eval_modes_in, **eval_modes_out;
584   CeedInt             *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
585   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out,
586                                                 &eval_modes_out, NULL, NULL));
587   CeedBasis *bases;
588   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL));
589   CeedBasis basis_in = bases[0];
590 
591   CeedCheck(num_active_bases == 1, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases");
592   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
593 
594   CeedElemRestriction active_rstr;
595   CeedInt             num_elem, elem_size, num_qpts, num_comp;
596   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
597   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
598   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
599   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
600   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
601 
602   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
603 
604   // loop over elements and put in data structure
605   const CeedScalar *assembled_qf_array;
606   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
607 
608   CeedInt layout_qf[3];
609   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
610   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
611 
612   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
613   const CeedScalar **B_mats_in, **B_mats_out;
614   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out));
615   const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0];
616   CeedScalar        BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]];
617   CeedScalar        elem_mat[elem_size * elem_size];
618   CeedSize          count = 0;
619   CeedScalar       *vals;
620   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
621   for (CeedSize e = 0; e < num_elem; e++) {
622     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
623       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
624         // Compute B^T*D
625         for (CeedSize n = 0; n < elem_size; n++) {
626           for (CeedSize q = 0; q < num_qpts; q++) {
627             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
628               const CeedSize btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in);
629               CeedScalar     sum       = 0.0;
630               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
631                 const CeedSize b_out_index     = (num_eval_modes_out[0] * q + e_out) * elem_size + n;
632                 const CeedSize eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out;
633                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
634                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
635               }
636               BTD_mat[btd_index] = sum;
637             }
638           }
639         }
640         // form element matrix itself (for each block component)
641         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0]));
642 
643         // put element matrix in coordinate data structure
644         for (CeedInt i = 0; i < elem_size; i++) {
645           for (CeedInt j = 0; j < elem_size; j++) {
646             vals[offset + count] = elem_mat[i * elem_size + j];
647             count++;
648           }
649         }
650       }
651     }
652   }
653   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries");
654   CeedCall(CeedVectorRestoreArray(values, &vals));
655 
656   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
657   CeedCall(CeedVectorDestroy(&assembled_qf));
658 
659   return CEED_ERROR_SUCCESS;
660 }
661 
662 /**
663   @brief Count number of entries for assembled CeedOperator
664 
665   @param[in]  op          CeedOperator to assemble
666   @param[out] num_entries Number of entries in assembled representation
667 
668   @return An error code: 0 - success, otherwise - failure
669 
670   @ref Utility
671 **/
672 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) {
673   bool                is_composite;
674   CeedElemRestriction rstr;
675   CeedInt             num_elem, elem_size, num_comp;
676 
677   CeedCall(CeedOperatorIsComposite(op, &is_composite));
678   CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
679   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
680   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
681   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
682   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
683   *num_entries = (CeedSize)elem_size * num_comp * elem_size * num_comp * num_elem;
684 
685   return CEED_ERROR_SUCCESS;
686 }
687 
688 /**
689   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
690 
691   @param[in]  op_fine      Fine grid operator
692   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
693   @param[in]  rstr_coarse  Coarse grid restriction
694   @param[in]  basis_coarse Coarse grid active vector basis
695   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
696   @param[out] op_coarse    Coarse grid operator
697   @param[out] op_prolong   Coarse to fine operator, or NULL
698   @param[out] op_restrict  Fine to coarse operator, or NULL
699 
700   @return An error code: 0 - success, otherwise - failure
701 
702   @ref Developer
703 **/
704 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
705                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
706   Ceed                ceed;
707   CeedVector          mult_vec         = NULL;
708   CeedElemRestriction rstr_p_mult_fine = NULL;
709   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
710 
711   // Check for composite operator
712   bool is_composite;
713   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
714   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
715 
716   // Coarse Grid
717   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
718   CeedElemRestriction rstr_fine = NULL;
719   // -- Clone input fields
720   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
721     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
722       rstr_fine = op_fine->input_fields[i]->elem_rstr;
723       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
724     } else {
725       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
726                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
727     }
728   }
729   // -- Clone output fields
730   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
731     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
732       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
733     } else {
734       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
735                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
736     }
737   }
738   // -- Clone QFunctionAssemblyData
739   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
740 
741   // Multiplicity vector
742   if (op_restrict || op_prolong) {
743     CeedVector mult_e_vec;
744 
745     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
746     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
747     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
748     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
749     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
750     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
751     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
752     CeedCall(CeedVectorDestroy(&mult_e_vec));
753     CeedCall(CeedVectorReciprocal(mult_vec));
754   }
755 
756   // Clone name
757   bool   has_name = op_fine->name;
758   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
759   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
760 
761   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
762   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
763             "Prolongation or restriction operator creation requires coarse-to-fine basis");
764 
765   // Restriction/Prolongation Operators
766   CeedInt num_comp;
767   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
768 
769   // Restriction
770   if (op_restrict) {
771     CeedInt             *num_comp_r_data;
772     CeedQFunction        qf_restrict;
773     CeedQFunctionContext ctx_r;
774 
775     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
776     CeedCall(CeedCalloc(1, &num_comp_r_data));
777     num_comp_r_data[0] = num_comp;
778     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
779     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
780     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
781     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
782     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
783     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
784     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
785     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
786 
787     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
788     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
789     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec));
790     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
791 
792     // Set name
793     char *restriction_name;
794     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
795     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
796     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
797     CeedCall(CeedFree(&restriction_name));
798 
799     // Check
800     CeedCall(CeedOperatorCheckReady(*op_restrict));
801 
802     // Cleanup
803     CeedCall(CeedQFunctionDestroy(&qf_restrict));
804   }
805 
806   // Prolongation
807   if (op_prolong) {
808     CeedInt             *num_comp_p_data;
809     CeedQFunction        qf_prolong;
810     CeedQFunctionContext ctx_p;
811 
812     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
813     CeedCall(CeedCalloc(1, &num_comp_p_data));
814     num_comp_p_data[0] = num_comp;
815     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
816     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
817     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
818     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
819     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
820     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
821     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
822     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
823 
824     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
825     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
826     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec));
827     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
828 
829     // Set name
830     char *prolongation_name;
831     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
832     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
833     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
834     CeedCall(CeedFree(&prolongation_name));
835 
836     // Check
837     CeedCall(CeedOperatorCheckReady(*op_prolong));
838 
839     // Cleanup
840     CeedCall(CeedQFunctionDestroy(&qf_prolong));
841   }
842 
843   // Check
844   CeedCall(CeedOperatorCheckReady(*op_coarse));
845 
846   // Cleanup
847   CeedCall(CeedVectorDestroy(&mult_vec));
848   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
849   CeedCall(CeedBasisDestroy(&basis_c_to_f));
850 
851   return CEED_ERROR_SUCCESS;
852 }
853 
854 /**
855   @brief Build 1D mass matrix and Laplacian with perturbation
856 
857   @param[in]  interp_1d   Interpolation matrix in one dimension
858   @param[in]  grad_1d     Gradient matrix in one dimension
859   @param[in]  q_weight_1d Quadrature weights in one dimension
860   @param[in]  P_1d        Number of basis nodes in one dimension
861   @param[in]  Q_1d        Number of quadrature points in one dimension
862   @param[in]  dim         Dimension of basis
863   @param[out] mass        Assembled mass matrix in one dimension
864   @param[out] laplace     Assembled perturbed Laplacian in one dimension
865 
866   @return An error code: 0 - success, otherwise - failure
867 
868   @ref Developer
869 **/
870 CeedPragmaOptimizeOff
871 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
872                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
873   for (CeedInt i = 0; i < P_1d; i++) {
874     for (CeedInt j = 0; j < P_1d; j++) {
875       CeedScalar sum = 0.0;
876       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
877       mass[i + j * P_1d] = sum;
878     }
879   }
880   // -- Laplacian
881   for (CeedInt i = 0; i < P_1d; i++) {
882     for (CeedInt j = 0; j < P_1d; j++) {
883       CeedScalar sum = 0.0;
884       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
885       laplace[i + j * P_1d] = sum;
886     }
887   }
888   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
889   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
890   return CEED_ERROR_SUCCESS;
891 }
892 CeedPragmaOptimizeOn
893 
894 /// @}
895 
896 /// ----------------------------------------------------------------------------
897 /// CeedOperator Backend API
898 /// ----------------------------------------------------------------------------
899 /// @addtogroup CeedOperatorBackend
900 /// @{
901 
902 /**
903   @brief Create object holding CeedQFunction assembly data for CeedOperator
904 
905   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
906   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
907 
908   @return An error code: 0 - success, otherwise - failure
909 
910   @ref Backend
911 **/
912 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
913   CeedCall(CeedCalloc(1, data));
914   (*data)->ref_count = 1;
915   (*data)->ceed      = ceed;
916   CeedCall(CeedReference(ceed));
917 
918   return CEED_ERROR_SUCCESS;
919 }
920 
921 /**
922   @brief Increment the reference counter for a CeedQFunctionAssemblyData
923 
924   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
925 
926   @return An error code: 0 - success, otherwise - failure
927 
928   @ref Backend
929 **/
930 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
931   data->ref_count++;
932   return CEED_ERROR_SUCCESS;
933 }
934 
935 /**
936   @brief Set re-use of CeedQFunctionAssemblyData
937 
938   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
939   @param[in]     reuse_data Boolean flag indicating data re-use
940 
941   @return An error code: 0 - success, otherwise - failure
942 
943   @ref Backend
944 **/
945 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
946   data->reuse_data        = reuse_data;
947   data->needs_data_update = true;
948   return CEED_ERROR_SUCCESS;
949 }
950 
951 /**
952   @brief Mark QFunctionAssemblyData as stale
953 
954   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
955   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
956 
957   @return An error code: 0 - success, otherwise - failure
958 
959   @ref Backend
960 **/
961 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
962   data->needs_data_update = needs_data_update;
963   return CEED_ERROR_SUCCESS;
964 }
965 
966 /**
967   @brief Determine if QFunctionAssemblyData needs update
968 
969   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
970   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
971 
972   @return An error code: 0 - success, otherwise - failure
973 
974   @ref Backend
975 **/
976 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
977   *is_update_needed = !data->reuse_data || data->needs_data_update;
978   return CEED_ERROR_SUCCESS;
979 }
980 
981 /**
982   @brief Copy the pointer to a CeedQFunctionAssemblyData.
983 
984   Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
985 
986   Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
987         CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
988         CeedQFunctionAssemblyData.
989 
990   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
991   @param[in,out] data_copy Variable to store copied reference
992 
993   @return An error code: 0 - success, otherwise - failure
994 
995   @ref Backend
996 **/
997 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
998   CeedCall(CeedQFunctionAssemblyDataReference(data));
999   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1000   *data_copy = data;
1001   return CEED_ERROR_SUCCESS;
1002 }
1003 
1004 /**
1005   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1006 
1007   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1008   @param[out] is_setup Boolean flag for setup status
1009 
1010   @return An error code: 0 - success, otherwise - failure
1011 
1012   @ref Backend
1013 **/
1014 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1015   *is_setup = data->is_setup;
1016   return CEED_ERROR_SUCCESS;
1017 }
1018 
1019 /**
1020   @brief Set internal objects for CeedQFunctionAssemblyData
1021 
1022   @param[in,out] data CeedQFunctionAssemblyData to set objects
1023   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1024   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1025 
1026   @return An error code: 0 - success, otherwise - failure
1027 
1028   @ref Backend
1029 **/
1030 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1031   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1032   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1033 
1034   data->is_setup = true;
1035   return CEED_ERROR_SUCCESS;
1036 }
1037 
1038 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1039   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1040 
1041   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1042   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1043 
1044   return CEED_ERROR_SUCCESS;
1045 }
1046 
1047 /**
1048   @brief Destroy CeedQFunctionAssemblyData
1049 
1050   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1051 
1052   @return An error code: 0 - success, otherwise - failure
1053 
1054   @ref Backend
1055 **/
1056 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1057   if (!*data || --(*data)->ref_count > 0) {
1058     *data = NULL;
1059     return CEED_ERROR_SUCCESS;
1060   }
1061   CeedCall(CeedDestroy(&(*data)->ceed));
1062   CeedCall(CeedVectorDestroy(&(*data)->vec));
1063   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1064 
1065   CeedCall(CeedFree(data));
1066   return CEED_ERROR_SUCCESS;
1067 }
1068 
1069 /**
1070   @brief Get CeedOperatorAssemblyData
1071 
1072   @param[in]  op   CeedOperator to assemble
1073   @param[out] data CeedQFunctionAssemblyData
1074 
1075   @return An error code: 0 - success, otherwise - failure
1076 
1077   @ref Backend
1078 **/
1079 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1080   if (!op->op_assembled) {
1081     CeedOperatorAssemblyData data;
1082 
1083     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1084     op->op_assembled = data;
1085   }
1086   *data = op->op_assembled;
1087 
1088   return CEED_ERROR_SUCCESS;
1089 }
1090 
1091 /**
1092   @brief Create object holding CeedOperator assembly data.
1093 
1094   The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1095   An array with references to the corresponding active CeedElemRestrictions is also stored.
1096   For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1097   The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1098 CeedEvalMode.
1099   The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1100   Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1101 
1102   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1103   @param[in]  op   CeedOperator to be assembled
1104   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1105 
1106   @return An error code: 0 - success, otherwise - failure
1107 
1108   @ref Backend
1109 **/
1110 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1111   CeedInt num_active_bases = 0;
1112 
1113   // Allocate
1114   CeedCall(CeedCalloc(1, data));
1115   (*data)->ceed = ceed;
1116   CeedCall(CeedReference(ceed));
1117 
1118   // Build OperatorAssembly data
1119   CeedQFunction       qf;
1120   CeedQFunctionField *qf_fields;
1121   CeedOperatorField  *op_fields;
1122   CeedInt             num_input_fields;
1123   CeedCall(CeedOperatorGetQFunction(op, &qf));
1124   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1125   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1126 
1127   // Determine active input basis
1128   CeedInt       *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0;
1129   CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL;
1130   CeedSize     **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1131   for (CeedInt i = 0; i < num_input_fields; i++) {
1132     CeedVector vec;
1133     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1134     if (vec == CEED_VECTOR_ACTIVE) {
1135       CeedBasis    basis_in = NULL;
1136       CeedEvalMode eval_mode;
1137       CeedInt      index = -1, dim, num_comp, q_comp;
1138       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1139       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1140       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1141       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1142       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1143       for (CeedInt i = 0; i < num_active_bases; i++) {
1144         if ((*data)->active_bases[i] == basis_in) index = i;
1145       }
1146       if (index == -1) {
1147         CeedElemRestriction elem_rstr_in;
1148         index = num_active_bases;
1149         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1150         (*data)->active_bases[num_active_bases] = NULL;
1151         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases]));
1152         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1153         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1154         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1155         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases]));
1156         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1157         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1158         num_eval_modes_in[index]  = 0;
1159         num_eval_modes_out[index] = 0;
1160         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1161         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1162         eval_modes_in[index]  = NULL;
1163         eval_modes_out[index] = NULL;
1164         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1165         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1166         eval_mode_offsets_in[index]  = NULL;
1167         eval_mode_offsets_out[index] = NULL;
1168         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1169         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1170         (*data)->assembled_bases_in[index]  = NULL;
1171         (*data)->assembled_bases_out[index] = NULL;
1172         num_active_bases++;
1173       }
1174       if (eval_mode != CEED_EVAL_WEIGHT) {
1175         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1176         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1177         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1178         for (CeedInt d = 0; d < q_comp; d++) {
1179           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1180           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1181           offset += num_comp;
1182         }
1183         num_eval_modes_in[index] += q_comp;
1184       }
1185     }
1186   }
1187   (*data)->num_eval_modes_in    = num_eval_modes_in;
1188   (*data)->eval_modes_in        = eval_modes_in;
1189   (*data)->eval_mode_offsets_in = eval_mode_offsets_in;
1190 
1191   // Determine active output basis
1192   CeedInt num_output_fields;
1193   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1194   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1195   offset = 0;
1196   for (CeedInt i = 0; i < num_output_fields; i++) {
1197     CeedVector vec;
1198     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1199     if (vec == CEED_VECTOR_ACTIVE) {
1200       CeedBasis    basis_out = NULL;
1201       CeedEvalMode eval_mode;
1202       CeedInt      index = -1, dim, num_comp, q_comp;
1203       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1204       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1205       CeedCall(CeedBasisGetDimension(basis_out, &dim));
1206       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1207       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1208       for (CeedInt i = 0; i < num_active_bases; i++) {
1209         if ((*data)->active_bases[i] == basis_out) index = i;
1210       }
1211       if (index == -1) {
1212         CeedElemRestriction elem_rstr_out;
1213 
1214         index = num_active_bases;
1215         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1216         (*data)->active_bases[num_active_bases] = NULL;
1217         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases]));
1218         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1219         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1220         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1221         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases]));
1222         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1223         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1224         num_eval_modes_in[index]  = 0;
1225         num_eval_modes_out[index] = 0;
1226         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1227         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1228         eval_modes_in[index]  = NULL;
1229         eval_modes_out[index] = NULL;
1230         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1231         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1232         eval_mode_offsets_in[index]  = NULL;
1233         eval_mode_offsets_out[index] = NULL;
1234         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1235         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1236         (*data)->assembled_bases_in[index]  = NULL;
1237         (*data)->assembled_bases_out[index] = NULL;
1238         num_active_bases++;
1239       }
1240       if (eval_mode != CEED_EVAL_WEIGHT) {
1241         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1242         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1243         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1244         for (CeedInt d = 0; d < q_comp; d++) {
1245           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1246           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1247           offset += num_comp;
1248         }
1249         num_eval_modes_out[index] += q_comp;
1250       }
1251     }
1252   }
1253   (*data)->num_output_components = offset;
1254   (*data)->num_eval_modes_out    = num_eval_modes_out;
1255   (*data)->eval_modes_out        = eval_modes_out;
1256   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1257   (*data)->num_active_bases      = num_active_bases;
1258 
1259   return CEED_ERROR_SUCCESS;
1260 }
1261 
1262 /**
1263   @brief Get CeedOperator CeedEvalModes for assembly.
1264 
1265   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1266 
1267   @param[in]  data                  CeedOperatorAssemblyData
1268   @param[out] num_active_bases      Total number of active bases
1269   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1270                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1271   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1272   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1273   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1274   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1275   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1276   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1277                                       including contributions of all active bases
1278 
1279   @return An error code: 0 - success, otherwise - failure
1280 
1281 
1282   @ref Backend
1283 **/
1284 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in,
1285                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out,
1286                                          const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) {
1287   if (num_active_bases) *num_active_bases = data->num_active_bases;
1288   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1289   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1290   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1291   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1292   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1293   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1294   if (num_output_components) *num_output_components = data->num_output_components;
1295 
1296   return CEED_ERROR_SUCCESS;
1297 }
1298 
1299 /**
1300   @brief Get CeedOperator CeedBasis data for assembly.
1301 
1302   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1303 
1304   @param[in]  data                CeedOperatorAssemblyData
1305   @param[out] num_active_bases    Number of active bases, or NULL
1306   @param[out] active_bases        Pointer to hold active CeedBasis, or NULL
1307   @param[out] assembled_bases_in  Pointer to hold assembled active input B, or NULL
1308   @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL
1309 
1310   @return An error code: 0 - success, otherwise - failure
1311 
1312   @ref Backend
1313 **/
1314 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases,
1315                                      const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) {
1316   // Assemble B_in, B_out if needed
1317   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1318     CeedInt num_qpts;
1319 
1320     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1321     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1322       CeedInt     num_nodes;
1323       CeedScalar *B_in = NULL, *identity = NULL;
1324       bool        has_eval_none = false;
1325 
1326       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1327       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1328 
1329       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1330         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1331       }
1332       if (has_eval_none) {
1333         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1334         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1335           identity[i * num_nodes + i] = 1.0;
1336         }
1337       }
1338 
1339       for (CeedInt q = 0; q < num_qpts; q++) {
1340         for (CeedInt n = 0; n < num_nodes; n++) {
1341           CeedInt      d_in              = 0, q_comp_in;
1342           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1343           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1344             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1345             const CeedScalar *B  = NULL;
1346             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B);
1347             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in));
1348             if (q_comp_in > 1) {
1349               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1350               else B = &B[(++d_in) * num_qpts * num_nodes];
1351             }
1352             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1353             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1354           }
1355         }
1356       }
1357       if (identity) CeedCall(CeedFree(identity));
1358       data->assembled_bases_in[b] = B_in;
1359     }
1360   }
1361 
1362   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1363     CeedInt num_qpts;
1364 
1365     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1366     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1367       CeedInt     num_nodes;
1368       bool        has_eval_none = false;
1369       CeedScalar *B_out = NULL, *identity = NULL;
1370 
1371       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1372       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1373 
1374       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1375         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1376       }
1377       if (has_eval_none) {
1378         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1379         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1380           identity[i * num_nodes + i] = 1.0;
1381         }
1382       }
1383 
1384       for (CeedInt q = 0; q < num_qpts; q++) {
1385         for (CeedInt n = 0; n < num_nodes; n++) {
1386           CeedInt      d_out              = 0, q_comp_out;
1387           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1388           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1389             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1390             const CeedScalar *B  = NULL;
1391             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B);
1392             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out));
1393             if (q_comp_out > 1) {
1394               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1395               else B = &B[(++d_out) * num_qpts * num_nodes];
1396             }
1397             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1398             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1399           }
1400         }
1401       }
1402       if (identity) CeedCall(CeedFree(identity));
1403       data->assembled_bases_out[b] = B_out;
1404     }
1405   }
1406 
1407   // Pass out assembled data
1408   if (active_bases) *active_bases = data->active_bases;
1409   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1410   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1411 
1412   return CEED_ERROR_SUCCESS;
1413 }
1414 
1415 /**
1416   @brief Get CeedOperator CeedBasis data for assembly.
1417 
1418   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1419 
1420   @param[in]  data                  CeedOperatorAssemblyData
1421   @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL
1422   @param[out] active_elem_rstrs     Pointer to hold active CeedElemRestrictions, or NULL
1423 
1424   @return An error code: 0 - success, otherwise - failure
1425 
1426   @ref Backend
1427 **/
1428 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs,
1429                                                 CeedElemRestriction **active_elem_rstrs) {
1430   if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases;
1431   if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs;
1432 
1433   return CEED_ERROR_SUCCESS;
1434 }
1435 
1436 /**
1437   @brief Destroy CeedOperatorAssemblyData
1438 
1439   @param[in,out] data CeedOperatorAssemblyData to destroy
1440 
1441   @return An error code: 0 - success, otherwise - failure
1442 
1443   @ref Backend
1444 **/
1445 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1446   if (!*data) {
1447     *data = NULL;
1448     return CEED_ERROR_SUCCESS;
1449   }
1450   CeedCall(CeedDestroy(&(*data)->ceed));
1451   for (CeedInt b = 0; b < (*data)->num_active_bases; b++) {
1452     CeedCall(CeedBasisDestroy(&(*data)->active_bases[b]));
1453     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b]));
1454     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1455     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1456     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1457     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1458     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1459     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1460   }
1461   CeedCall(CeedFree(&(*data)->active_bases));
1462   CeedCall(CeedFree(&(*data)->active_elem_rstrs));
1463   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1464   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1465   CeedCall(CeedFree(&(*data)->eval_modes_in));
1466   CeedCall(CeedFree(&(*data)->eval_modes_out));
1467   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1468   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1469   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1470   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1471 
1472   CeedCall(CeedFree(data));
1473   return CEED_ERROR_SUCCESS;
1474 }
1475 
1476 /// @}
1477 
1478 /// ----------------------------------------------------------------------------
1479 /// CeedOperator Public API
1480 /// ----------------------------------------------------------------------------
1481 /// @addtogroup CeedOperatorUser
1482 /// @{
1483 
1484 /**
1485   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1486 
1487   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1488   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1489 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1490 
1491   Inputs and outputs are in the order provided by the user when adding CeedOperator fields.
1492   For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction
1493 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1494 
1495   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1496 
1497   @param[in]  op        CeedOperator to assemble CeedQFunction
1498   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1499   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1500   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1501 
1502   @return An error code: 0 - success, otherwise - failure
1503 
1504   @ref User
1505 **/
1506 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1507   CeedCall(CeedOperatorCheckReady(op));
1508 
1509   if (op->LinearAssembleQFunction) {
1510     // Backend version
1511     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1512   } else {
1513     // Operator fallback
1514     CeedOperator op_fallback;
1515 
1516     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1517     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1518     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1519   }
1520   return CEED_ERROR_SUCCESS;
1521 }
1522 
1523 /**
1524   @brief Assemble CeedQFunction and store result internally.
1525 
1526   Return copied references of stored data to the caller.
1527   Caller is responsible for ownership and destruction of the copied references.
1528   See also @ref CeedOperatorLinearAssembleQFunction
1529 
1530   Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers.
1531         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
1532 
1533   @param[in]  op        CeedOperator to assemble CeedQFunction
1534   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1535   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1536   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1537 
1538   @return An error code: 0 - success, otherwise - failure
1539 
1540   @ref User
1541 **/
1542 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1543   int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL;
1544   CeedOperator op_assemble                                                                           = NULL;
1545 
1546   CeedCall(CeedOperatorCheckReady(op));
1547 
1548   // Determine if fallback parent or operator has implementation
1549   if (op->op_fallback_parent && op->op_fallback_parent->LinearAssembleQFunctionUpdate) {
1550     // -- Backend version for op fallback parent is faster, if it exists
1551     LinearAssembleQFunctionUpdate = op->op_fallback_parent->LinearAssembleQFunctionUpdate;
1552     op_assemble                   = op->op_fallback_parent;
1553   } else if (op->LinearAssembleQFunctionUpdate) {
1554     // -- Backend version for op
1555     LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate;
1556     op_assemble                   = op;
1557   }
1558 
1559   // Assemble QFunction
1560   if (LinearAssembleQFunctionUpdate) {
1561     // Backend or fallback parent version
1562     bool                qf_assembled_is_setup;
1563     CeedVector          assembled_vec  = NULL;
1564     CeedElemRestriction assembled_rstr = NULL;
1565 
1566     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1567     if (qf_assembled_is_setup) {
1568       bool update_needed;
1569 
1570       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1571       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1572       if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request));
1573     } else {
1574       CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request));
1575       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1576     }
1577     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1578 
1579     // Copy reference from internally held copy
1580     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1581     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1582     CeedCall(CeedVectorDestroy(&assembled_vec));
1583     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1584   } else {
1585     // Operator fallback
1586     CeedOperator op_fallback;
1587 
1588     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1589     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1590     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1591   }
1592 
1593   return CEED_ERROR_SUCCESS;
1594 }
1595 
1596 /**
1597   @brief Assemble the diagonal of a square linear CeedOperator
1598 
1599   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1600 
1601   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1602 
1603   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1604 
1605   @param[in]  op        CeedOperator to assemble CeedQFunction
1606   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1607   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1608 
1609   @return An error code: 0 - success, otherwise - failure
1610 
1611   @ref User
1612 **/
1613 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1614   bool is_composite;
1615   CeedCall(CeedOperatorCheckReady(op));
1616   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1617 
1618   CeedSize input_size = 0, output_size = 0;
1619   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1620   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1621 
1622   // Early exit for empty operator
1623   if (!is_composite) {
1624     CeedInt num_elem = 0;
1625 
1626     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1627     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1628   }
1629 
1630   if (op->LinearAssembleDiagonal) {
1631     // Backend version
1632     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1633     return CEED_ERROR_SUCCESS;
1634   } else if (op->LinearAssembleAddDiagonal) {
1635     // Backend version with zeroing first
1636     CeedCall(CeedVectorSetValue(assembled, 0.0));
1637     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1638     return CEED_ERROR_SUCCESS;
1639   } else {
1640     // Operator fallback
1641     CeedOperator op_fallback;
1642 
1643     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1644     if (op_fallback) {
1645       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1646       return CEED_ERROR_SUCCESS;
1647     }
1648   }
1649   // Default interface implementation
1650   CeedCall(CeedVectorSetValue(assembled, 0.0));
1651   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1652 
1653   return CEED_ERROR_SUCCESS;
1654 }
1655 
1656 /**
1657   @brief Assemble the diagonal of a square linear CeedOperator
1658 
1659   This sums into a CeedVector the diagonal of a linear CeedOperator.
1660 
1661   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1662 
1663   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1664 
1665   @param[in]  op        CeedOperator to assemble CeedQFunction
1666   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1667   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1668 
1669   @return An error code: 0 - success, otherwise - failure
1670 
1671   @ref User
1672 **/
1673 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1674   bool is_composite;
1675   CeedCall(CeedOperatorCheckReady(op));
1676   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1677 
1678   CeedSize input_size = 0, output_size = 0;
1679   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1680   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1681 
1682   // Early exit for empty operator
1683   if (!is_composite) {
1684     CeedInt num_elem = 0;
1685 
1686     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1687     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1688   }
1689 
1690   if (op->LinearAssembleAddDiagonal) {
1691     // Backend version
1692     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1693     return CEED_ERROR_SUCCESS;
1694   } else {
1695     // Operator fallback
1696     CeedOperator op_fallback;
1697 
1698     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1699     if (op_fallback) {
1700       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1701       return CEED_ERROR_SUCCESS;
1702     }
1703   }
1704   // Default interface implementation
1705   if (is_composite) {
1706     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1707   } else {
1708     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1709   }
1710 
1711   return CEED_ERROR_SUCCESS;
1712 }
1713 
1714 /**
1715   @brief Assemble the point block diagonal of a square linear CeedOperator
1716 
1717   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1718 
1719   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1720 
1721   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1722 
1723   @param[in]  op        CeedOperator to assemble CeedQFunction
1724   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1725 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1726 component in].
1727   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1728 
1729   @return An error code: 0 - success, otherwise - failure
1730 
1731   @ref User
1732 **/
1733 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1734   bool is_composite;
1735   CeedCall(CeedOperatorCheckReady(op));
1736   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1737 
1738   CeedSize input_size = 0, output_size = 0;
1739   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1740   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1741 
1742   // Early exit for empty operator
1743   if (!is_composite) {
1744     CeedInt num_elem = 0;
1745 
1746     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1747     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1748   }
1749 
1750   if (op->LinearAssemblePointBlockDiagonal) {
1751     // Backend version
1752     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1753     return CEED_ERROR_SUCCESS;
1754   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1755     // Backend version with zeroing first
1756     CeedCall(CeedVectorSetValue(assembled, 0.0));
1757     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1758     return CEED_ERROR_SUCCESS;
1759   } else {
1760     // Operator fallback
1761     CeedOperator op_fallback;
1762 
1763     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1764     if (op_fallback) {
1765       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1766       return CEED_ERROR_SUCCESS;
1767     }
1768   }
1769   // Default interface implementation
1770   CeedCall(CeedVectorSetValue(assembled, 0.0));
1771   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1772 
1773   return CEED_ERROR_SUCCESS;
1774 }
1775 
1776 /**
1777   @brief Assemble the point block diagonal of a square linear CeedOperator
1778 
1779   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1780 
1781   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1782 
1783   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1784 
1785   @param[in]  op        CeedOperator to assemble CeedQFunction
1786   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1787 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1788 component in].
1789   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1790 
1791   @return An error code: 0 - success, otherwise - failure
1792 
1793   @ref User
1794 **/
1795 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1796   bool is_composite;
1797   CeedCall(CeedOperatorCheckReady(op));
1798   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1799 
1800   CeedSize input_size = 0, output_size = 0;
1801   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1802   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1803 
1804   // Early exit for empty operator
1805   if (!is_composite) {
1806     CeedInt num_elem = 0;
1807 
1808     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1809     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1810   }
1811 
1812   if (op->LinearAssembleAddPointBlockDiagonal) {
1813     // Backend version
1814     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1815     return CEED_ERROR_SUCCESS;
1816   } else {
1817     // Operator fallback
1818     CeedOperator op_fallback;
1819 
1820     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1821     if (op_fallback) {
1822       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1823       return CEED_ERROR_SUCCESS;
1824     }
1825   }
1826   // Default interface implementation
1827   if (is_composite) {
1828     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1829   } else {
1830     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1831   }
1832 
1833   return CEED_ERROR_SUCCESS;
1834 }
1835 
1836 /**
1837    @brief Fully assemble the nonzero pattern of a linear operator.
1838 
1839    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1840 
1841    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1842 matrix in entry (i, j).
1843   Note that the (i, j) pairs are not unique and may repeat.
1844   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1845 
1846    This will generally be slow unless your operator is low-order.
1847 
1848    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1849 
1850    @param[in]  op          CeedOperator to assemble
1851    @param[out] num_entries Number of entries in coordinate nonzero pattern
1852    @param[out] rows        Row number for each entry
1853    @param[out] cols        Column number for each entry
1854 
1855    @ref User
1856 **/
1857 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1858   CeedInt       num_suboperators;
1859   CeedSize      single_entries;
1860   CeedOperator *sub_operators;
1861   bool          is_composite;
1862   CeedCall(CeedOperatorCheckReady(op));
1863   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1864 
1865   if (op->LinearAssembleSymbolic) {
1866     // Backend version
1867     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1868     return CEED_ERROR_SUCCESS;
1869   } else {
1870     // Operator fallback
1871     CeedOperator op_fallback;
1872 
1873     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1874     if (op_fallback) {
1875       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1876       return CEED_ERROR_SUCCESS;
1877     }
1878   }
1879 
1880   // Default interface implementation
1881 
1882   // count entries and allocate rows, cols arrays
1883   *num_entries = 0;
1884   if (is_composite) {
1885     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1886     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1887     for (CeedInt k = 0; k < num_suboperators; ++k) {
1888       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1889       *num_entries += single_entries;
1890     }
1891   } else {
1892     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1893     *num_entries += single_entries;
1894   }
1895   CeedCall(CeedCalloc(*num_entries, rows));
1896   CeedCall(CeedCalloc(*num_entries, cols));
1897 
1898   // assemble nonzero locations
1899   CeedInt offset = 0;
1900   if (is_composite) {
1901     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1902     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1903     for (CeedInt k = 0; k < num_suboperators; ++k) {
1904       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1905       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1906       offset += single_entries;
1907     }
1908   } else {
1909     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1910   }
1911 
1912   return CEED_ERROR_SUCCESS;
1913 }
1914 
1915 /**
1916    @brief Fully assemble the nonzero entries of a linear operator.
1917 
1918    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1919 
1920    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1921 matrix in entry (i, j).
1922   Note that the (i, j) pairs are not unique and may repeat.
1923   This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1924 
1925    This will generally be slow unless your operator is low-order.
1926 
1927    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1928 
1929    @param[in]  op     CeedOperator to assemble
1930    @param[out] values Values to assemble into matrix
1931 
1932    @ref User
1933 **/
1934 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1935   CeedInt       num_suboperators;
1936   CeedSize      single_entries = 0;
1937   CeedOperator *sub_operators;
1938   bool          is_composite;
1939   CeedCall(CeedOperatorCheckReady(op));
1940   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1941 
1942   // Early exit for empty operator
1943   if (!is_composite) {
1944     CeedInt num_elem = 0;
1945 
1946     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1947     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1948   }
1949 
1950   if (op->LinearAssemble) {
1951     // Backend version
1952     CeedCall(op->LinearAssemble(op, values));
1953     return CEED_ERROR_SUCCESS;
1954   } else {
1955     // Operator fallback
1956     CeedOperator op_fallback;
1957 
1958     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1959     if (op_fallback) {
1960       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
1961       return CEED_ERROR_SUCCESS;
1962     }
1963   }
1964 
1965   // Default interface implementation
1966   CeedInt offset = 0;
1967   CeedCall(CeedVectorSetValue(values, 0.0));
1968   if (is_composite) {
1969     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1970     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1971     for (CeedInt k = 0; k < num_suboperators; k++) {
1972       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
1973       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1974       offset += single_entries;
1975     }
1976   } else {
1977     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
1978   }
1979 
1980   return CEED_ERROR_SUCCESS;
1981 }
1982 
1983 /**
1984   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
1985 
1986   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1987 
1988   @param[in]  op               Composite CeedOperator
1989   @param[in]  num_skip_indices Number of suboperators to skip
1990   @param[in]  skip_indices     Array of indices of suboperators to skip
1991   @param[out] mult             Vector to store multiplicity (of size l_size)
1992 
1993   @return An error code: 0 - success, otherwise - failure
1994 
1995   @ref User
1996 **/
1997 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
1998   CeedCall(CeedOperatorCheckReady(op));
1999 
2000   Ceed                ceed;
2001   CeedInt             num_suboperators;
2002   CeedSize            l_vec_len;
2003   CeedScalar         *mult_array;
2004   CeedVector          ones_l_vec;
2005   CeedElemRestriction elem_rstr;
2006   CeedOperator       *sub_operators;
2007 
2008   CeedCall(CeedOperatorGetCeed(op, &ceed));
2009 
2010   // Zero mult vector
2011   CeedCall(CeedVectorSetValue(mult, 0.0));
2012 
2013   // Get suboperators
2014   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2015   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2016   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2017 
2018   // Work vector
2019   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2020   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2021   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2022   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2023 
2024   // Compute multiplicity across suboperators
2025   for (CeedInt i = 0; i < num_suboperators; i++) {
2026     const CeedScalar *sub_mult_array;
2027     CeedVector        sub_mult_l_vec, ones_e_vec;
2028 
2029     // -- Check for suboperator to skip
2030     for (CeedInt j = 0; j < num_skip_indices; j++) {
2031       if (skip_indices[j] == i) continue;
2032     }
2033 
2034     // -- Sub operator multiplicity
2035     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2036     CeedCall(CeedElemRestrictionCreateVector(elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2037     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2038     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2039     CeedCall(CeedElemRestrictionApply(elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2040     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2041     // ---- Flag every node present in the current suboperator
2042     for (CeedInt j = 0; j < l_vec_len; j++) {
2043       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2044     }
2045     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2046     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2047     CeedCall(CeedVectorDestroy(&ones_e_vec));
2048   }
2049   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2050   CeedCall(CeedVectorDestroy(&ones_l_vec));
2051 
2052   return CEED_ERROR_SUCCESS;
2053 }
2054 
2055 /**
2056   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2057 grid interpolation
2058 
2059   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2060 
2061   @param[in]  op_fine      Fine grid operator
2062   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2063   @param[in]  rstr_coarse  Coarse grid restriction
2064   @param[in]  basis_coarse Coarse grid active vector basis
2065   @param[out] op_coarse    Coarse grid operator
2066   @param[out] op_prolong   Coarse to fine operator, or NULL
2067   @param[out] op_restrict  Fine to coarse operator, or NULL
2068 
2069   @return An error code: 0 - success, otherwise - failure
2070 
2071   @ref User
2072 **/
2073 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2074                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2075   CeedCall(CeedOperatorCheckReady(op_fine));
2076 
2077   // Build prolongation matrix, if required
2078   CeedBasis basis_c_to_f = NULL;
2079   if (op_prolong || op_restrict) {
2080     CeedBasis basis_fine;
2081     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2082     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2083   }
2084 
2085   // Core code
2086   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2087 
2088   return CEED_ERROR_SUCCESS;
2089 }
2090 
2091 /**
2092   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2093 
2094   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2095 
2096   @param[in]  op_fine       Fine grid operator
2097   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2098   @param[in]  rstr_coarse   Coarse grid restriction
2099   @param[in]  basis_coarse  Coarse grid active vector basis
2100   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2101   @param[out] op_coarse     Coarse grid operator
2102   @param[out] op_prolong    Coarse to fine operator, or NULL
2103   @param[out] op_restrict   Fine to coarse operator, or NULL
2104 
2105   @return An error code: 0 - success, otherwise - failure
2106 
2107   @ref User
2108 **/
2109 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2110                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2111                                              CeedOperator *op_restrict) {
2112   CeedCall(CeedOperatorCheckReady(op_fine));
2113   Ceed ceed;
2114   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2115 
2116   // Check for compatible quadrature spaces
2117   CeedBasis basis_fine;
2118   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2119   CeedInt Q_f, Q_c;
2120   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2121   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2122   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2123 
2124   // Create coarse to fine basis, if required
2125   CeedBasis basis_c_to_f = NULL;
2126   if (op_prolong || op_restrict) {
2127     // Check if interpolation matrix is provided
2128     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2129               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2130     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2131     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2132     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2133     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2134     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2135     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2136     CeedScalar *q_ref, *q_weight, *grad;
2137     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2138     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2139     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2140     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2141     CeedCall(CeedFree(&q_ref));
2142     CeedCall(CeedFree(&q_weight));
2143     CeedCall(CeedFree(&grad));
2144   }
2145 
2146   // Core code
2147   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2148   return CEED_ERROR_SUCCESS;
2149 }
2150 
2151 /**
2152   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2153 
2154   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2155 
2156   @param[in]  op_fine       Fine grid operator
2157   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2158   @param[in]  rstr_coarse   Coarse grid restriction
2159   @param[in]  basis_coarse  Coarse grid active vector basis
2160   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2161   @param[out] op_coarse     Coarse grid operator
2162   @param[out] op_prolong    Coarse to fine operator, or NULL
2163   @param[out] op_restrict   Fine to coarse operator, or NULL
2164 
2165   @return An error code: 0 - success, otherwise - failure
2166 
2167   @ref User
2168 **/
2169 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2170                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2171                                        CeedOperator *op_restrict) {
2172   CeedCall(CeedOperatorCheckReady(op_fine));
2173   Ceed ceed;
2174   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2175 
2176   // Check for compatible quadrature spaces
2177   CeedBasis basis_fine;
2178   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2179   CeedInt Q_f, Q_c;
2180   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2181   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2182   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2183 
2184   // Coarse to fine basis
2185   CeedBasis basis_c_to_f = NULL;
2186   if (op_prolong || op_restrict) {
2187     // Check if interpolation matrix is provided
2188     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2189               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2190     CeedElemTopology topo;
2191     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2192     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2193     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2194     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2195     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2196     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2197     CeedScalar *q_ref, *q_weight, *grad;
2198     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2199     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2200     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2201     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2202     CeedCall(CeedFree(&q_ref));
2203     CeedCall(CeedFree(&q_weight));
2204     CeedCall(CeedFree(&grad));
2205   }
2206 
2207   // Core code
2208   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2209   return CEED_ERROR_SUCCESS;
2210 }
2211 
2212 /**
2213   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2214 
2215   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2216   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2217   The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2218 \hat S V\f$.
2219   The CeedOperator must be linear and non-composite.
2220   The associated CeedQFunction must therefore also be linear.
2221 
2222   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2223 
2224   @param[in]  op      CeedOperator to create element inverses
2225   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2226   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2227 
2228   @return An error code: 0 - success, otherwise - failure
2229 
2230   @ref User
2231 **/
2232 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2233   CeedCall(CeedOperatorCheckReady(op));
2234 
2235   if (op->CreateFDMElementInverse) {
2236     // Backend version
2237     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2238     return CEED_ERROR_SUCCESS;
2239   } else {
2240     // Operator fallback
2241     CeedOperator op_fallback;
2242 
2243     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2244     if (op_fallback) {
2245       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2246       return CEED_ERROR_SUCCESS;
2247     }
2248   }
2249 
2250   // Default interface implementation
2251   Ceed ceed, ceed_parent;
2252   CeedCall(CeedOperatorGetCeed(op, &ceed));
2253   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2254   ceed_parent = ceed_parent ? ceed_parent : ceed;
2255   CeedQFunction qf;
2256   CeedCall(CeedOperatorGetQFunction(op, &qf));
2257 
2258   // Determine active input basis
2259   bool                interp = false, grad = false;
2260   CeedBasis           basis = NULL;
2261   CeedElemRestriction rstr  = NULL;
2262   CeedOperatorField  *op_fields;
2263   CeedQFunctionField *qf_fields;
2264   CeedInt             num_input_fields;
2265   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2266   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2267   for (CeedInt i = 0; i < num_input_fields; i++) {
2268     CeedVector vec;
2269     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2270     if (vec == CEED_VECTOR_ACTIVE) {
2271       CeedEvalMode eval_mode;
2272       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2273       interp = interp || eval_mode == CEED_EVAL_INTERP;
2274       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2275       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2276       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2277     }
2278   }
2279   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2280   CeedSize l_size = 1;
2281   CeedInt  P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2282   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2283   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2284   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2285   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2286   CeedCall(CeedBasisGetDimension(basis, &dim));
2287   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2288   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2289   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2290 
2291   // Build and diagonalize 1D Mass and Laplacian
2292   bool is_tensor_basis;
2293   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2294   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2295   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2296   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2297   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2298   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2299   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2300   CeedCall(CeedCalloc(P_1d, &lambda));
2301   // -- Build matrices
2302   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2303   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2304   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2305   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2306   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2307 
2308   // -- Diagonalize
2309   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2310   CeedCall(CeedFree(&mass));
2311   CeedCall(CeedFree(&laplace));
2312   for (CeedInt i = 0; i < P_1d; i++) {
2313     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2314   }
2315   CeedCall(CeedFree(&x));
2316 
2317   // Assemble QFunction
2318   CeedVector          assembled = NULL;
2319   CeedElemRestriction rstr_qf   = NULL;
2320   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2321   CeedInt layout[3];
2322   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2323   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2324   CeedScalar max_norm = 0;
2325   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2326 
2327   // Calculate element averages
2328   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2329   CeedScalar       *elem_avg;
2330   const CeedScalar *assembled_array, *q_weight_array;
2331   CeedVector        q_weight;
2332   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2333   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2334   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2335   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2336   CeedCall(CeedCalloc(num_elem, &elem_avg));
2337   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2338   for (CeedInt e = 0; e < num_elem; e++) {
2339     CeedInt count = 0;
2340     for (CeedInt q = 0; q < num_qpts; q++) {
2341       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2342         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2343           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2344           count++;
2345         }
2346       }
2347     }
2348     if (count) {
2349       elem_avg[e] /= count;
2350     } else {
2351       elem_avg[e] = 1.0;
2352     }
2353   }
2354   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2355   CeedCall(CeedVectorDestroy(&assembled));
2356   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2357   CeedCall(CeedVectorDestroy(&q_weight));
2358 
2359   // Build FDM diagonal
2360   CeedVector  q_data;
2361   CeedScalar *q_data_array, *fdm_diagonal;
2362   CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2363   const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2364   for (CeedInt c = 0; c < num_comp; c++) {
2365     for (CeedInt n = 0; n < num_nodes; n++) {
2366       if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2367       if (grad) {
2368         for (CeedInt d = 0; d < dim; d++) {
2369           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2370           fdm_diagonal[c * num_nodes + n] += lambda[i];
2371         }
2372       }
2373       if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2374     }
2375   }
2376   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2377   CeedCall(CeedVectorSetValue(q_data, 0.0));
2378   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2379   for (CeedInt e = 0; e < num_elem; e++) {
2380     for (CeedInt c = 0; c < num_comp; c++) {
2381       for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2382     }
2383   }
2384   CeedCall(CeedFree(&elem_avg));
2385   CeedCall(CeedFree(&fdm_diagonal));
2386   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2387 
2388   // Setup FDM operator
2389   // -- Basis
2390   CeedBasis   fdm_basis;
2391   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2392   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2393   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2394   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2395   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2396   CeedCall(CeedFree(&fdm_interp));
2397   CeedCall(CeedFree(&grad_dummy));
2398   CeedCall(CeedFree(&q_ref_dummy));
2399   CeedCall(CeedFree(&q_weight_dummy));
2400   CeedCall(CeedFree(&lambda));
2401 
2402   // -- Restriction
2403   CeedElemRestriction rstr_qd_i;
2404   CeedInt             strides[3] = {1, num_nodes, num_nodes * num_comp};
2405   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2406   // -- QFunction
2407   CeedQFunction qf_fdm;
2408   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2409   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2410   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2411   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2412   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2413   // -- QFunction context
2414   CeedInt *num_comp_data;
2415   CeedCall(CeedCalloc(1, &num_comp_data));
2416   num_comp_data[0] = num_comp;
2417   CeedQFunctionContext ctx_fdm;
2418   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2419   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2420   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2421   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2422   // -- Operator
2423   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2424   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2425   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2426   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2427 
2428   // Cleanup
2429   CeedCall(CeedVectorDestroy(&q_data));
2430   CeedCall(CeedBasisDestroy(&fdm_basis));
2431   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2432   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2433 
2434   return CEED_ERROR_SUCCESS;
2435 }
2436 
2437 /// @}
2438