xref: /libCEED/interface/ceed-preconditioning.c (revision 2b730f8b5a9c809740a0b3b302db43a719c636b1)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed/backend.h>
11 #include <ceed/ceed.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced
28          CeedOperator functionality
29 
30   @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction
31   @param[in] qf            CeedQFunction to create fallback for
32   @param[out] qf_fallback  fallback CeedQFunction
33 
34   @return An error code: 0 - success, otherwise - failure
35 
36   @ref Developer
37 **/
38 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
39   // Check if NULL qf passed in
40   if (!qf) return CEED_ERROR_SUCCESS;
41 
42   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
43   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
44 
45   char *source_path_with_name = "";
46   if (qf->source_path) {
47     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
48     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
49     memcpy(source_path_with_name, qf->source_path, path_len);
50     memcpy(&source_path_with_name[path_len], ":", 1);
51     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
52   } else {
53     CeedCall(CeedCalloc(1, &source_path_with_name));
54   }
55 
56   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
57   {
58     CeedQFunctionContext ctx;
59 
60     CeedCall(CeedQFunctionGetContext(qf, &ctx));
61     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
62   }
63   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
64     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
65   }
66   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
67     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
68   }
69   CeedCall(CeedFree(&source_path_with_name));
70 
71   return CEED_ERROR_SUCCESS;
72 }
73 
74 /**
75   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced
76          CeedOperator functionality
77 
78   @param op  CeedOperator to create fallback for
79 
80   @return An error code: 0 - success, otherwise - failure
81 
82   @ref Developer
83 **/
84 static int CeedOperatorCreateFallback(CeedOperator op) {
85   Ceed ceed_fallback;
86 
87   // Check not already created
88   if (op->op_fallback) return CEED_ERROR_SUCCESS;
89 
90   // Fallback Ceed
91   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
92   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
93 
94   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
95   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
96 
97   // Clone Op
98   CeedOperator op_fallback;
99   if (op->is_composite) {
100     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
101     for (CeedInt i = 0; i < op->num_suboperators; i++) {
102       CeedOperator op_sub_fallback;
103 
104       CeedCall(CeedOperatorGetFallback(op->sub_operators[i], &op_sub_fallback));
105       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
106     }
107   } else {
108     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
109     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
110     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
111     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
112     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
113     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
114       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_restr, op->input_fields[i]->basis,
115                                     op->input_fields[i]->vec));
116     }
117     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
118       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_restr, op->output_fields[i]->basis,
119                                     op->output_fields[i]->vec));
120     }
121     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
122     if (op_fallback->num_qpts == 0) {
123       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
124     }
125     // Cleanup
126     CeedCall(CeedQFunctionDestroy(&qf_fallback));
127     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
128     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
129   }
130   CeedCall(CeedOperatorSetName(op_fallback, op->name));
131   CeedCall(CeedOperatorCheckReady(op_fallback));
132   op->op_fallback = op_fallback;
133 
134   return CEED_ERROR_SUCCESS;
135 }
136 
137 /**
138   @brief Retreive fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
139 
140   @param[in] op            CeedOperator to retrieve fallback for
141   @param[out] op_fallback  Fallback CeedOperator
142 
143   @return An error code: 0 - success, otherwise - failure
144 
145   @ref Developer
146 **/
147 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
148   // Create if needed
149   if (!op->op_fallback) {
150     CeedCall(CeedOperatorCreateFallback(op));
151   }
152   if (op->op_fallback) {
153     bool is_debug;
154 
155     CeedCall(CeedIsDebug(op->ceed, &is_debug));
156     if (is_debug) {
157       Ceed        ceed_fallback;
158       const char *resource, *resource_fallback;
159 
160       CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
161       CeedCall(CeedGetResource(op->ceed, &resource));
162       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
163 
164       CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
165       CeedDebug(op->ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
166                 op->op_fallback);
167     }
168   }
169   *op_fallback = op->op_fallback;
170 
171   return CEED_ERROR_SUCCESS;
172 }
173 
174 /**
175   @brief Select correct basis matrix pointer based on CeedEvalMode
176 
177   @param[in] eval_mode   Current basis evaluation mode
178   @param[in] identity    Pointer to identity matrix
179   @param[in] interp      Pointer to interpolation matrix
180   @param[in] grad        Pointer to gradient matrix
181   @param[out] basis_ptr  Basis pointer to set
182 
183   @ref Developer
184 **/
185 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad,
186                                                const CeedScalar **basis_ptr) {
187   switch (eval_mode) {
188     case CEED_EVAL_NONE:
189       *basis_ptr = identity;
190       break;
191     case CEED_EVAL_INTERP:
192       *basis_ptr = interp;
193       break;
194     case CEED_EVAL_GRAD:
195       *basis_ptr = grad;
196       break;
197     case CEED_EVAL_WEIGHT:
198     case CEED_EVAL_DIV:
199     case CEED_EVAL_CURL:
200       break;  // Caught by QF Assembly
201   }
202   assert(*basis_ptr != NULL);
203 }
204 
205 /**
206   @brief Create point block restriction for active operator field
207 
208   @param[in] rstr              Original CeedElemRestriction for active field
209   @param[out] pointblock_rstr  Address of the variable where the newly created
210                                  CeedElemRestriction will be stored
211 
212   @return An error code: 0 - success, otherwise - failure
213 
214   @ref Developer
215 **/
216 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
217   Ceed ceed;
218   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
219   const CeedInt *offsets;
220   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
221 
222   // Expand offsets
223   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
224   CeedSize l_size;
225   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
226   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
227   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
228   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
229   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
230   CeedInt shift = num_comp;
231   if (comp_stride != 1) shift *= num_comp;
232   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
233   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
234     pointblock_offsets[i] = offsets[i] * shift;
235   }
236 
237   // Create new restriction
238   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
239                                      pointblock_offsets, pointblock_rstr));
240 
241   // Cleanup
242   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
243 
244   return CEED_ERROR_SUCCESS;
245 }
246 
247 /**
248   @brief Core logic for assembling operator diagonal or point block diagonal
249 
250   @param[in] op             CeedOperator to assemble point block diagonal
251   @param[in] request        Address of CeedRequest for non-blocking completion, else
252                               CEED_REQUEST_IMMEDIATE
253   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
254   @param[out] assembled     CeedVector to store assembled diagonal
255 
256   @return An error code: 0 - success, otherwise - failure
257 
258   @ref Developer
259 **/
260 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
261   Ceed ceed;
262   CeedCall(CeedOperatorGetCeed(op, &ceed));
263 
264   // Assemble QFunction
265   CeedQFunction qf;
266   CeedCall(CeedOperatorGetQFunction(op, &qf));
267   CeedInt num_input_fields, num_output_fields;
268   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
269   CeedVector          assembled_qf;
270   CeedElemRestriction rstr;
271   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr, request));
272   CeedInt layout[3];
273   CeedCall(CeedElemRestrictionGetELayout(rstr, &layout));
274   CeedCall(CeedElemRestrictionDestroy(&rstr));
275 
276   // Get assembly data
277   CeedOperatorAssemblyData data;
278   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
279   const CeedEvalMode *eval_mode_in, *eval_mode_out;
280   CeedInt             num_eval_mode_in, num_eval_mode_out;
281   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
282   CeedBasis basis_in, basis_out;
283   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
284   CeedInt num_comp;
285   CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
286 
287   // Assemble point block diagonal restriction, if needed
288   CeedElemRestriction diag_rstr;
289   CeedCall(CeedOperatorGetActiveElemRestriction(op, &diag_rstr));
290   if (is_pointblock) {
291     CeedElemRestriction point_block_rstr;
292     CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_rstr, &point_block_rstr));
293     diag_rstr = point_block_rstr;
294   }
295 
296   // Create diagonal vector
297   CeedVector elem_diag;
298   CeedCall(CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag));
299 
300   // Assemble element operator diagonals
301   CeedScalar       *elem_diag_array;
302   const CeedScalar *assembled_qf_array;
303   CeedCall(CeedVectorSetValue(elem_diag, 0.0));
304   CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
305   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
306   CeedInt num_elem, num_nodes, num_qpts;
307   CeedCall(CeedElemRestrictionGetNumElements(diag_rstr, &num_elem));
308   CeedCall(CeedBasisGetNumNodes(basis_in, &num_nodes));
309   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
310 
311   // Basis matrices
312   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
313   CeedScalar       *identity      = NULL;
314   bool              has_eval_none = false;
315   for (CeedInt i = 0; i < num_eval_mode_in; i++) {
316     has_eval_none = has_eval_none || (eval_mode_in[i] == CEED_EVAL_NONE);
317   }
318   for (CeedInt i = 0; i < num_eval_mode_out; i++) {
319     has_eval_none = has_eval_none || (eval_mode_out[i] == CEED_EVAL_NONE);
320   }
321   if (has_eval_none) {
322     CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
323     for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
324   }
325   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
326   CeedCall(CeedBasisGetInterp(basis_out, &interp_out));
327   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
328   CeedCall(CeedBasisGetGrad(basis_out, &grad_out));
329   // Compute the diagonal of B^T D B
330   // Each element
331   for (CeedInt e = 0; e < num_elem; e++) {
332     CeedInt d_out = -1;
333     // Each basis eval mode pair
334     for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
335       const CeedScalar *bt = NULL;
336       if (eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out += 1;
337       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * num_nodes], &bt);
338       CeedInt d_in = -1;
339       for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
340         const CeedScalar *b = NULL;
341         if (eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in += 1;
342         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * num_nodes], &b);
343         // Each component
344         for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
345           // Each qpoint/node pair
346           for (CeedInt q = 0; q < num_qpts; q++) {
347             if (is_pointblock) {
348               // Point Block Diagonal
349               for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
350                 const CeedScalar qf_value =
351                     assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_in) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
352                                        e * layout[2]];
353                 for (CeedInt n = 0; n < num_nodes; n++) {
354                   elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
355                       bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
356                 }
357               }
358             } else {
359               // Diagonal Only
360               const CeedScalar qf_value =
361                   assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_out) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
362                                      e * layout[2]];
363               for (CeedInt n = 0; n < num_nodes; n++) {
364                 elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
365               }
366             }
367           }
368         }
369       }
370     }
371   }
372   CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
373   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
374 
375   // Assemble local operator diagonal
376   CeedCall(CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
377 
378   // Cleanup
379   if (is_pointblock) {
380     CeedCall(CeedElemRestrictionDestroy(&diag_rstr));
381   }
382   CeedCall(CeedVectorDestroy(&assembled_qf));
383   CeedCall(CeedVectorDestroy(&elem_diag));
384   CeedCall(CeedFree(&identity));
385 
386   return CEED_ERROR_SUCCESS;
387 }
388 
389 /**
390   @brief Core logic for assembling composite operator diagonal
391 
392   @param[in] op             CeedOperator to assemble point block diagonal
393   @param[in] request        Address of CeedRequest for non-blocking completion, else
394                             CEED_REQUEST_IMMEDIATE
395   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
396   @param[out] assembled     CeedVector to store assembled diagonal
397 
398   @return An error code: 0 - success, otherwise - failure
399 
400   @ref Developer
401 **/
402 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
403                                                                  CeedVector assembled) {
404   CeedInt       num_sub;
405   CeedOperator *suboperators;
406   CeedCall(CeedOperatorGetNumSub(op, &num_sub));
407   CeedCall(CeedOperatorGetSubList(op, &suboperators));
408   for (CeedInt i = 0; i < num_sub; i++) {
409     if (is_pointblock) {
410       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
411     } else {
412       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
413     }
414   }
415   return CEED_ERROR_SUCCESS;
416 }
417 
418 /**
419   @brief Build nonzero pattern for non-composite operator
420 
421   Users should generally use CeedOperatorLinearAssembleSymbolic()
422 
423   @param[in] op      CeedOperator to assemble nonzero pattern
424   @param[in] offset  Offset for number of entries
425   @param[out] rows   Row number for each entry
426   @param[out] cols   Column number for each entry
427 
428   @return An error code: 0 - success, otherwise - failure
429 
430   @ref Developer
431 **/
432 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
433   Ceed ceed = op->ceed;
434   if (op->is_composite) {
435     // LCOV_EXCL_START
436     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
437     // LCOV_EXCL_STOP
438   }
439 
440   CeedSize num_nodes;
441   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
442   CeedElemRestriction rstr_in;
443   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
444   CeedInt num_elem, elem_size, num_comp;
445   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
446   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
447   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
448   CeedInt layout_er[3];
449   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
450 
451   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
452 
453   // Determine elem_dof relation
454   CeedVector index_vec;
455   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
456   CeedScalar *array;
457   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
458   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
459   CeedCall(CeedVectorRestoreArray(index_vec, &array));
460   CeedVector elem_dof;
461   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
462   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
463   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
464   const CeedScalar *elem_dof_a;
465   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
466   CeedCall(CeedVectorDestroy(&index_vec));
467 
468   // Determine i, j locations for element matrices
469   CeedInt count = 0;
470   for (CeedInt e = 0; e < num_elem; e++) {
471     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
472       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
473         for (CeedInt i = 0; i < elem_size; i++) {
474           for (CeedInt j = 0; j < elem_size; j++) {
475             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
476             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
477 
478             const CeedInt row = elem_dof_a[elem_dof_index_row];
479             const CeedInt col = elem_dof_a[elem_dof_index_col];
480 
481             rows[offset + count] = row;
482             cols[offset + count] = col;
483             count++;
484           }
485         }
486       }
487     }
488   }
489   if (count != local_num_entries) {
490     // LCOV_EXCL_START
491     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
492     // LCOV_EXCL_STOP
493   }
494   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
495   CeedCall(CeedVectorDestroy(&elem_dof));
496 
497   return CEED_ERROR_SUCCESS;
498 }
499 
500 /**
501   @brief Assemble nonzero entries for non-composite operator
502 
503   Users should generally use CeedOperatorLinearAssemble()
504 
505   @param[in] op       CeedOperator to assemble
506   @param[in] offset   Offest for number of entries
507   @param[out] values  Values to assemble into matrix
508 
509   @return An error code: 0 - success, otherwise - failure
510 
511   @ref Developer
512 **/
513 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
514   Ceed ceed = op->ceed;
515   if (op->is_composite) {
516     // LCOV_EXCL_START
517     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
518     // LCOV_EXCL_STOP
519   }
520   if (op->num_elem == 0) return CEED_ERROR_SUCCESS;
521 
522   if (op->LinearAssembleSingle) {
523     // Backend version
524     CeedCall(op->LinearAssembleSingle(op, offset, values));
525     return CEED_ERROR_SUCCESS;
526   } else {
527     // Operator fallback
528     CeedOperator op_fallback;
529 
530     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
531     if (op_fallback) {
532       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
533       return CEED_ERROR_SUCCESS;
534     }
535   }
536 
537   // Assemble QFunction
538   CeedQFunction qf;
539   CeedCall(CeedOperatorGetQFunction(op, &qf));
540   CeedVector          assembled_qf;
541   CeedElemRestriction rstr_q;
542   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
543   CeedSize qf_length;
544   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
545 
546   CeedInt            num_input_fields, num_output_fields;
547   CeedOperatorField *input_fields;
548   CeedOperatorField *output_fields;
549   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
550 
551   // Get assembly data
552   CeedOperatorAssemblyData data;
553   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
554   const CeedEvalMode *eval_mode_in, *eval_mode_out;
555   CeedInt             num_eval_mode_in, num_eval_mode_out;
556   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
557   CeedBasis basis_in, basis_out;
558   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
559 
560   if (num_eval_mode_in == 0 || num_eval_mode_out == 0) {
561     // LCOV_EXCL_START
562     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
563     // LCOV_EXCL_STOP
564   }
565 
566   CeedElemRestriction active_rstr;
567   CeedInt             num_elem, elem_size, num_qpts, num_comp;
568   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
569   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
570   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
571   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
572   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
573 
574   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
575 
576   // loop over elements and put in data structure
577   const CeedScalar *interp_in, *grad_in;
578   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
579   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
580 
581   const CeedScalar *assembled_qf_array;
582   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
583 
584   CeedInt layout_qf[3];
585   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
586   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
587 
588   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
589   const CeedScalar *B_mat_in, *B_mat_out;
590   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &B_mat_in, NULL, &B_mat_out));
591   CeedScalar  BTD_mat[elem_size * num_qpts * num_eval_mode_in];
592   CeedScalar  elem_mat[elem_size * elem_size];
593   CeedInt     count = 0;
594   CeedScalar *vals;
595   CeedCall(CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals));
596   for (CeedInt e = 0; e < num_elem; e++) {
597     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
598       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
599         // Compute B^T*D
600         for (CeedInt n = 0; n < elem_size; n++) {
601           for (CeedInt q = 0; q < num_qpts; q++) {
602             for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
603               const CeedInt btd_index = n * (num_qpts * num_eval_mode_in) + (num_eval_mode_in * q + e_in);
604               CeedScalar    sum       = 0.0;
605               for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
606                 const CeedInt b_out_index     = (num_eval_mode_out * q + e_out) * elem_size + n;
607                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_mode_out + e_out) * num_comp + comp_out;
608                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
609                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
610               }
611               BTD_mat[btd_index] = sum;
612             }
613           }
614         }
615         // form element matrix itself (for each block component)
616         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_mode_in));
617 
618         // put element matrix in coordinate data structure
619         for (CeedInt i = 0; i < elem_size; i++) {
620           for (CeedInt j = 0; j < elem_size; j++) {
621             vals[offset + count] = elem_mat[i * elem_size + j];
622             count++;
623           }
624         }
625       }
626     }
627   }
628   if (count != local_num_entries) {
629     // LCOV_EXCL_START
630     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
631     // LCOV_EXCL_STOP
632   }
633   CeedCall(CeedVectorRestoreArray(values, &vals));
634 
635   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
636   CeedCall(CeedVectorDestroy(&assembled_qf));
637 
638   return CEED_ERROR_SUCCESS;
639 }
640 
641 /**
642   @brief Count number of entries for assembled CeedOperator
643 
644   @param[in] op            CeedOperator to assemble
645   @param[out] num_entries  Number of entries in assembled representation
646 
647   @return An error code: 0 - success, otherwise - failure
648 
649   @ref Utility
650 **/
651 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
652   CeedElemRestriction rstr;
653   CeedInt             num_elem, elem_size, num_comp;
654 
655   if (op->is_composite) {
656     // LCOV_EXCL_START
657     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
658     // LCOV_EXCL_STOP
659   }
660   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
661   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
662   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
663   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
664   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
665 
666   return CEED_ERROR_SUCCESS;
667 }
668 
669 /**
670   @brief Common code for creating a multigrid coarse operator and level
671            transfer operators for a CeedOperator
672 
673   @param[in] op_fine       Fine grid operator
674   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
675   @param[in] rstr_coarse   Coarse grid restriction
676   @param[in] basis_coarse  Coarse grid active vector basis
677   @param[in] basis_c_to_f  Basis for coarse to fine interpolation
678   @param[out] op_coarse    Coarse grid operator
679   @param[out] op_prolong   Coarse to fine operator
680   @param[out] op_restrict  Fine to coarse operator
681 
682   @return An error code: 0 - success, otherwise - failure
683 
684   @ref Developer
685 **/
686 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
687                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
688   Ceed ceed;
689   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
690 
691   // Check for composite operator
692   bool is_composite;
693   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
694   if (is_composite) {
695     // LCOV_EXCL_START
696     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
697     // LCOV_EXCL_STOP
698   }
699 
700   // Coarse Grid
701   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
702   CeedElemRestriction rstr_fine = NULL;
703   // -- Clone input fields
704   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
705     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
706       rstr_fine = op_fine->input_fields[i]->elem_restr;
707       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
708     } else {
709       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_restr,
710                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
711     }
712   }
713   // -- Clone output fields
714   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
715     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
716       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
717     } else {
718       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_restr,
719                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
720     }
721   }
722   // -- Clone QFunctionAssemblyData
723   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
724 
725   // Multiplicity vector
726   CeedVector mult_vec, mult_e_vec;
727   CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
728   CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
729   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
730   CeedCall(CeedVectorSetValue(mult_vec, 0.0));
731   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
732   CeedCall(CeedVectorDestroy(&mult_e_vec));
733   CeedCall(CeedVectorReciprocal(mult_vec));
734 
735   // Restriction
736   CeedInt num_comp;
737   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
738   CeedQFunction qf_restrict;
739   CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
740   CeedInt *num_comp_r_data;
741   CeedCall(CeedCalloc(1, &num_comp_r_data));
742   num_comp_r_data[0] = num_comp;
743   CeedQFunctionContext ctx_r;
744   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
745   CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
746   CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
747   CeedCall(CeedQFunctionContextDestroy(&ctx_r));
748   CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
749   CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
750   CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
751   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
752 
753   CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
754   CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
755   CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
756   CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
757 
758   // Prolongation
759   CeedQFunction qf_prolong;
760   CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
761   CeedInt *num_comp_p_data;
762   CeedCall(CeedCalloc(1, &num_comp_p_data));
763   num_comp_p_data[0] = num_comp;
764   CeedQFunctionContext ctx_p;
765   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
766   CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
767   CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
768   CeedCall(CeedQFunctionContextDestroy(&ctx_p));
769   CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
770   CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
771   CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
772   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
773 
774   CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
775   CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
776   CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
777   CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
778 
779   // Clone name
780   bool   has_name = op_fine->name;
781   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
782   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
783   {
784     char *prolongation_name;
785     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
786     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
787     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
788     CeedCall(CeedFree(&prolongation_name));
789   }
790   {
791     char *restriction_name;
792     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
793     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
794     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
795     CeedCall(CeedFree(&restriction_name));
796   }
797 
798   // Cleanup
799   CeedCall(CeedVectorDestroy(&mult_vec));
800   CeedCall(CeedBasisDestroy(&basis_c_to_f));
801   CeedCall(CeedQFunctionDestroy(&qf_restrict));
802   CeedCall(CeedQFunctionDestroy(&qf_prolong));
803 
804   return CEED_ERROR_SUCCESS;
805 }
806 
807 /**
808   @brief Build 1D mass matrix and Laplacian with perturbation
809 
810   @param[in] interp_1d    Interpolation matrix in one dimension
811   @param[in] grad_1d      Gradient matrix in one dimension
812   @param[in] q_weight_1d  Quadrature weights in one dimension
813   @param[in] P_1d         Number of basis nodes in one dimension
814   @param[in] Q_1d         Number of quadrature points in one dimension
815   @param[in] dim          Dimension of basis
816   @param[out] mass        Assembled mass matrix in one dimension
817   @param[out] laplace     Assembled perturbed Laplacian in one dimension
818 
819   @return An error code: 0 - success, otherwise - failure
820 
821   @ref Developer
822 **/
823 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d,
824                                                       CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
825   for (CeedInt i = 0; i < P_1d; i++) {
826     for (CeedInt j = 0; j < P_1d; j++) {
827       CeedScalar sum = 0.0;
828       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
829       mass[i + j * P_1d] = sum;
830     }
831   }
832   // -- Laplacian
833   for (CeedInt i = 0; i < P_1d; i++) {
834     for (CeedInt j = 0; j < P_1d; j++) {
835       CeedScalar sum = 0.0;
836       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
837       laplace[i + j * P_1d] = sum;
838     }
839   }
840   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
841   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
842   return CEED_ERROR_SUCCESS;
843 }
844 CeedPragmaOptimizeOn
845 
846     /// @}
847 
848     /// ----------------------------------------------------------------------------
849     /// CeedOperator Backend API
850     /// ----------------------------------------------------------------------------
851     /// @addtogroup CeedOperatorBackend
852     /// @{
853 
854     /**
855       @brief Create object holding CeedQFunction assembly data for CeedOperator
856 
857       @param[in] ceed  A Ceed object where the CeedQFunctionAssemblyData will be created
858       @param[out] data Address of the variable where the newly created
859                          CeedQFunctionAssemblyData will be stored
860 
861       @return An error code: 0 - success, otherwise - failure
862 
863       @ref Backend
864     **/
865     int
866     CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
867   CeedCall(CeedCalloc(1, data));
868   (*data)->ref_count = 1;
869   (*data)->ceed      = ceed;
870   CeedCall(CeedReference(ceed));
871 
872   return CEED_ERROR_SUCCESS;
873 }
874 
875 /**
876   @brief Increment the reference counter for a CeedQFunctionAssemblyData
877 
878   @param data  CeedQFunctionAssemblyData to increment the reference counter
879 
880   @return An error code: 0 - success, otherwise - failure
881 
882   @ref Backend
883 **/
884 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
885   data->ref_count++;
886   return CEED_ERROR_SUCCESS;
887 }
888 
889 /**
890   @brief Set re-use of CeedQFunctionAssemblyData
891 
892   @param data       CeedQFunctionAssemblyData to mark for reuse
893   @param reuse_data Boolean flag indicating data re-use
894 
895   @return An error code: 0 - success, otherwise - failure
896 
897   @ref Backend
898 **/
899 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
900   data->reuse_data        = reuse_data;
901   data->needs_data_update = true;
902   return CEED_ERROR_SUCCESS;
903 }
904 
905 /**
906   @brief Mark QFunctionAssemblyData as stale
907 
908   @param data              CeedQFunctionAssemblyData to mark as stale
909   @param needs_data_update Boolean flag indicating if update is needed or completed
910 
911   @return An error code: 0 - success, otherwise - failure
912 
913   @ref Backend
914 **/
915 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
916   data->needs_data_update = needs_data_update;
917   return CEED_ERROR_SUCCESS;
918 }
919 
920 /**
921   @brief Determine if QFunctionAssemblyData needs update
922 
923   @param[in] data              CeedQFunctionAssemblyData to mark as stale
924   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
925 
926   @return An error code: 0 - success, otherwise - failure
927 
928   @ref Backend
929 **/
930 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
931   *is_update_needed = !data->reuse_data || data->needs_data_update;
932   return CEED_ERROR_SUCCESS;
933 }
934 
935 /**
936   @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should
937            be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`;
938            Note: If `*data_copy` is non-NULL, then it is assumed that
939            `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This
940            CeedQFunctionAssemblyData will be destroyed if `*data_copy` is
941            the only reference to this CeedQFunctionAssemblyData.
942 
943   @param data            CeedQFunctionAssemblyData to copy reference to
944   @param[out] data_copy  Variable to store copied reference
945 
946   @return An error code: 0 - success, otherwise - failure
947 
948   @ref Backend
949 **/
950 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
951   CeedCall(CeedQFunctionAssemblyDataReference(data));
952   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
953   *data_copy = data;
954   return CEED_ERROR_SUCCESS;
955 }
956 
957 /**
958   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
959 
960   @param[in] data      CeedQFunctionAssemblyData to retreive status
961   @param[out] is_setup Boolean flag for setup status
962 
963   @return An error code: 0 - success, otherwise - failure
964 
965   @ref Backend
966 **/
967 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
968   *is_setup = data->is_setup;
969   return CEED_ERROR_SUCCESS;
970 }
971 
972 /**
973   @brief Set internal objects for CeedQFunctionAssemblyData
974 
975   @param[in] data  CeedQFunctionAssemblyData to set objects
976   @param[in] vec   CeedVector to store assembled CeedQFunction at quadrature points
977   @param[in] rstr  CeedElemRestriction for CeedVector containing assembled CeedQFunction
978 
979   @return An error code: 0 - success, otherwise - failure
980 
981   @ref Backend
982 **/
983 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
984   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
985   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
986 
987   data->is_setup = true;
988   return CEED_ERROR_SUCCESS;
989 }
990 
991 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
992   if (!data->is_setup) {
993     // LCOV_EXCL_START
994     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
995     // LCOV_EXCL_STOP
996   }
997 
998   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
999   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1000 
1001   return CEED_ERROR_SUCCESS;
1002 }
1003 
1004 /**
1005   @brief Destroy CeedQFunctionAssemblyData
1006 
1007   @param[out] data  CeedQFunctionAssemblyData to destroy
1008 
1009   @return An error code: 0 - success, otherwise - failure
1010 
1011   @ref Backend
1012 **/
1013 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1014   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1015 
1016   CeedCall(CeedDestroy(&(*data)->ceed));
1017   CeedCall(CeedVectorDestroy(&(*data)->vec));
1018   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1019 
1020   CeedCall(CeedFree(data));
1021   return CEED_ERROR_SUCCESS;
1022 }
1023 
1024 /**
1025   @brief Get CeedOperatorAssemblyData
1026 
1027   @param[in] op     CeedOperator to assemble
1028   @param[out] data  CeedQFunctionAssemblyData
1029 
1030   @return An error code: 0 - success, otherwise - failure
1031 
1032   @ref Backend
1033 **/
1034 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1035   if (!op->op_assembled) {
1036     CeedOperatorAssemblyData data;
1037 
1038     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1039     op->op_assembled = data;
1040   }
1041   *data = op->op_assembled;
1042 
1043   return CEED_ERROR_SUCCESS;
1044 }
1045 
1046 /**
1047   @brief Create object holding CeedOperator assembly data
1048 
1049   @param[in] ceed   A Ceed object where the CeedOperatorAssemblyData will be created
1050   @param[in] op     CeedOperator to be assembled
1051   @param[out] data  Address of the variable where the newly created
1052                       CeedOperatorAssemblyData will be stored
1053 
1054   @return An error code: 0 - success, otherwise - failure
1055 
1056   @ref Backend
1057 **/
1058 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1059   CeedCall(CeedCalloc(1, data));
1060   (*data)->ceed = ceed;
1061   CeedCall(CeedReference(ceed));
1062 
1063   // Build OperatorAssembly data
1064   CeedQFunction       qf;
1065   CeedQFunctionField *qf_fields;
1066   CeedOperatorField  *op_fields;
1067   CeedInt             num_input_fields;
1068   CeedCall(CeedOperatorGetQFunction(op, &qf));
1069   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1070   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1071 
1072   // Determine active input basis
1073   CeedInt       num_eval_mode_in = 0, dim = 1;
1074   CeedEvalMode *eval_mode_in = NULL;
1075   CeedBasis     basis_in     = NULL;
1076   for (CeedInt i = 0; i < num_input_fields; i++) {
1077     CeedVector vec;
1078     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1079     if (vec == CEED_VECTOR_ACTIVE) {
1080       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1081       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1082       CeedEvalMode eval_mode;
1083       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1084       switch (eval_mode) {
1085         case CEED_EVAL_NONE:
1086         case CEED_EVAL_INTERP:
1087           CeedCall(CeedRealloc(num_eval_mode_in + 1, &eval_mode_in));
1088           eval_mode_in[num_eval_mode_in] = eval_mode;
1089           num_eval_mode_in += 1;
1090           break;
1091         case CEED_EVAL_GRAD:
1092           CeedCall(CeedRealloc(num_eval_mode_in + dim, &eval_mode_in));
1093           for (CeedInt d = 0; d < dim; d++) {
1094             eval_mode_in[num_eval_mode_in + d] = eval_mode;
1095           }
1096           num_eval_mode_in += dim;
1097           break;
1098         case CEED_EVAL_WEIGHT:
1099         case CEED_EVAL_DIV:
1100         case CEED_EVAL_CURL:
1101           break;  // Caught by QF Assembly
1102       }
1103     }
1104   }
1105   (*data)->num_eval_mode_in = num_eval_mode_in;
1106   (*data)->eval_mode_in     = eval_mode_in;
1107   CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->basis_in));
1108 
1109   // Determine active output basis
1110   CeedInt num_output_fields;
1111   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1112   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1113   CeedInt       num_eval_mode_out = 0;
1114   CeedEvalMode *eval_mode_out     = NULL;
1115   CeedBasis     basis_out         = NULL;
1116   for (CeedInt i = 0; i < num_output_fields; i++) {
1117     CeedVector vec;
1118     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1119     if (vec == CEED_VECTOR_ACTIVE) {
1120       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1121       CeedEvalMode eval_mode;
1122       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1123       switch (eval_mode) {
1124         case CEED_EVAL_NONE:
1125         case CEED_EVAL_INTERP:
1126           CeedCall(CeedRealloc(num_eval_mode_out + 1, &eval_mode_out));
1127           eval_mode_out[num_eval_mode_out] = eval_mode;
1128           num_eval_mode_out += 1;
1129           break;
1130         case CEED_EVAL_GRAD:
1131           CeedCall(CeedRealloc(num_eval_mode_out + dim, &eval_mode_out));
1132           for (CeedInt d = 0; d < dim; d++) {
1133             eval_mode_out[num_eval_mode_out + d] = eval_mode;
1134           }
1135           num_eval_mode_out += dim;
1136           break;
1137         case CEED_EVAL_WEIGHT:
1138         case CEED_EVAL_DIV:
1139         case CEED_EVAL_CURL:
1140           break;  // Caught by QF Assembly
1141       }
1142     }
1143   }
1144   (*data)->num_eval_mode_out = num_eval_mode_out;
1145   (*data)->eval_mode_out     = eval_mode_out;
1146   CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->basis_out));
1147 
1148   return CEED_ERROR_SUCCESS;
1149 }
1150 
1151 /**
1152   @brief Get CeedOperator CeedEvalModes for assembly
1153 
1154   @param[in] data                CeedOperatorAssemblyData
1155   @param[out] num_eval_mode_in   Pointer to hold number of input CeedEvalModes, or NULL
1156   @param[out] eval_mode_in       Pointer to hold input CeedEvalModes, or NULL
1157   @param[out] num_eval_mode_out  Pointer to hold number of output CeedEvalModes, or NULL
1158   @param[out] eval_mode_out      Pointer to hold output CeedEvalModes, or NULL
1159 
1160   @return An error code: 0 - success, otherwise - failure
1161 
1162   @ref Backend
1163 **/
1164 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_eval_mode_in, const CeedEvalMode **eval_mode_in,
1165                                          CeedInt *num_eval_mode_out, const CeedEvalMode **eval_mode_out) {
1166   if (num_eval_mode_in) *num_eval_mode_in = data->num_eval_mode_in;
1167   if (eval_mode_in) *eval_mode_in = data->eval_mode_in;
1168   if (num_eval_mode_out) *num_eval_mode_out = data->num_eval_mode_out;
1169   if (eval_mode_out) *eval_mode_out = data->eval_mode_out;
1170 
1171   return CEED_ERROR_SUCCESS;
1172 }
1173 
1174 /**
1175   @brief Get CeedOperator CeedBasis data for assembly
1176 
1177   @param[in] data        CeedOperatorAssemblyData
1178   @param[out] basis_in   Pointer to hold active input CeedBasis, or NULL
1179   @param[out] B_in       Pointer to hold assembled active input B, or NULL
1180   @param[out] basis_out  Pointer to hold active output CeedBasis, or NULL
1181   @param[out] B_out      Pointer to hold assembled active output B, or NULL
1182 
1183   @return An error code: 0 - success, otherwise - failure
1184 
1185   @ref Backend
1186 **/
1187 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedBasis *basis_in, const CeedScalar **B_in, CeedBasis *basis_out,
1188                                      const CeedScalar **B_out) {
1189   // Assemble B_in, B_out if needed
1190   if (B_in && !data->B_in) {
1191     CeedInt           num_qpts, elem_size;
1192     CeedScalar       *B_in, *identity = NULL;
1193     const CeedScalar *interp_in, *grad_in;
1194     bool              has_eval_none = false;
1195 
1196     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_in, &num_qpts));
1197     CeedCall(CeedBasisGetNumNodes(data->basis_in, &elem_size));
1198     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_in, &B_in));
1199 
1200     for (CeedInt i = 0; i < data->num_eval_mode_in; i++) {
1201       has_eval_none = has_eval_none || (data->eval_mode_in[i] == CEED_EVAL_NONE);
1202     }
1203     if (has_eval_none) {
1204       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1205       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1206         identity[i * elem_size + i] = 1.0;
1207       }
1208     }
1209     CeedCall(CeedBasisGetInterp(data->basis_in, &interp_in));
1210     CeedCall(CeedBasisGetGrad(data->basis_in, &grad_in));
1211 
1212     for (CeedInt q = 0; q < num_qpts; q++) {
1213       for (CeedInt n = 0; n < elem_size; n++) {
1214         CeedInt d_in = -1;
1215         for (CeedInt e_in = 0; e_in < data->num_eval_mode_in; e_in++) {
1216           const CeedInt     qq = data->num_eval_mode_in * q;
1217           const CeedScalar *b  = NULL;
1218 
1219           if (data->eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in++;
1220           CeedOperatorGetBasisPointer(data->eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &b);
1221           B_in[(qq + e_in) * elem_size + n] = b[q * elem_size + n];
1222         }
1223       }
1224     }
1225     data->B_in = B_in;
1226   }
1227 
1228   if (B_out && !data->B_out) {
1229     CeedInt           num_qpts, elem_size;
1230     CeedScalar       *B_out, *identity = NULL;
1231     const CeedScalar *interp_out, *grad_out;
1232     bool              has_eval_none = false;
1233 
1234     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_out, &num_qpts));
1235     CeedCall(CeedBasisGetNumNodes(data->basis_out, &elem_size));
1236     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_out, &B_out));
1237 
1238     for (CeedInt i = 0; i < data->num_eval_mode_out; i++) {
1239       has_eval_none = has_eval_none || (data->eval_mode_out[i] == CEED_EVAL_NONE);
1240     }
1241     if (has_eval_none) {
1242       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1243       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1244         identity[i * elem_size + i] = 1.0;
1245       }
1246     }
1247     CeedCall(CeedBasisGetInterp(data->basis_out, &interp_out));
1248     CeedCall(CeedBasisGetGrad(data->basis_out, &grad_out));
1249 
1250     for (CeedInt q = 0; q < num_qpts; q++) {
1251       for (CeedInt n = 0; n < elem_size; n++) {
1252         CeedInt d_out = -1;
1253         for (CeedInt e_out = 0; e_out < data->num_eval_mode_out; e_out++) {
1254           const CeedInt     qq = data->num_eval_mode_out * q;
1255           const CeedScalar *b  = NULL;
1256 
1257           if (data->eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out++;
1258           CeedOperatorGetBasisPointer(data->eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &b);
1259           B_out[(qq + e_out) * elem_size + n] = b[q * elem_size + n];
1260         }
1261       }
1262     }
1263     data->B_out = B_out;
1264   }
1265 
1266   if (basis_in) *basis_in = data->basis_in;
1267   if (B_in) *B_in = data->B_in;
1268   if (basis_out) *basis_out = data->basis_out;
1269   if (B_out) *B_out = data->B_out;
1270 
1271   return CEED_ERROR_SUCCESS;
1272 }
1273 
1274 /**
1275   @brief Destroy CeedOperatorAssemblyData
1276 
1277   @param[out] data  CeedOperatorAssemblyData to destroy
1278 
1279   @return An error code: 0 - success, otherwise - failure
1280 
1281   @ref Backend
1282 **/
1283 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1284   if (!*data) return CEED_ERROR_SUCCESS;
1285 
1286   CeedCall(CeedDestroy(&(*data)->ceed));
1287   CeedCall(CeedBasisDestroy(&(*data)->basis_in));
1288   CeedCall(CeedBasisDestroy(&(*data)->basis_out));
1289   CeedCall(CeedFree(&(*data)->eval_mode_in));
1290   CeedCall(CeedFree(&(*data)->eval_mode_out));
1291   CeedCall(CeedFree(&(*data)->B_in));
1292   CeedCall(CeedFree(&(*data)->B_out));
1293 
1294   CeedCall(CeedFree(data));
1295   return CEED_ERROR_SUCCESS;
1296 }
1297 
1298 /// @}
1299 
1300 /// ----------------------------------------------------------------------------
1301 /// CeedOperator Public API
1302 /// ----------------------------------------------------------------------------
1303 /// @addtogroup CeedOperatorUser
1304 /// @{
1305 
1306 /**
1307   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1308 
1309   This returns a CeedVector containing a matrix at each quadrature point
1310     providing the action of the CeedQFunction associated with the CeedOperator.
1311     The vector 'assembled' is of shape
1312       [num_elements, num_input_fields, num_output_fields, num_quad_points]
1313     and contains column-major matrices representing the action of the
1314     CeedQFunction for a corresponding quadrature point on an element. Inputs and
1315     outputs are in the order provided by the user when adding CeedOperator fields.
1316     For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and
1317     'v', provided in that order, would result in an assembled QFunction that
1318     consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting
1319     on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1320 
1321   Note: Calling this function asserts that setup is complete
1322           and sets the CeedOperator as immutable.
1323 
1324   @param op              CeedOperator to assemble CeedQFunction
1325   @param[out] assembled  CeedVector to store assembled CeedQFunction at
1326                            quadrature points
1327   @param[out] rstr       CeedElemRestriction for CeedVector containing assembled
1328                            CeedQFunction
1329   @param request         Address of CeedRequest for non-blocking completion, else
1330                            @ref CEED_REQUEST_IMMEDIATE
1331 
1332   @return An error code: 0 - success, otherwise - failure
1333 
1334   @ref User
1335 **/
1336 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1337   CeedCall(CeedOperatorCheckReady(op));
1338 
1339   if (op->LinearAssembleQFunction) {
1340     // Backend version
1341     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1342   } else {
1343     // Operator fallback
1344     CeedOperator op_fallback;
1345 
1346     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1347     if (op_fallback) {
1348       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1349     } else {
1350       // LCOV_EXCL_START
1351       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1352       // LCOV_EXCL_STOP
1353     }
1354   }
1355   return CEED_ERROR_SUCCESS;
1356 }
1357 
1358 /**
1359   @brief Assemble CeedQFunction and store result internall. Return copied
1360            references of stored data to the caller. Caller is responsible for
1361            ownership and destruction of the copied references. See also
1362            @ref CeedOperatorLinearAssembleQFunction
1363 
1364   @param op              CeedOperator to assemble CeedQFunction
1365   @param assembled       CeedVector to store assembled CeedQFunction at
1366                            quadrature points
1367   @param rstr            CeedElemRestriction for CeedVector containing assembled
1368                            CeedQFunction
1369   @param request         Address of CeedRequest for non-blocking completion, else
1370                            @ref CEED_REQUEST_IMMEDIATE
1371 
1372   @return An error code: 0 - success, otherwise - failure
1373 
1374   @ref User
1375 **/
1376 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1377   CeedCall(CeedOperatorCheckReady(op));
1378 
1379   if (op->LinearAssembleQFunctionUpdate) {
1380     // Backend version
1381     bool                qf_assembled_is_setup;
1382     CeedVector          assembled_vec  = NULL;
1383     CeedElemRestriction assembled_rstr = NULL;
1384 
1385     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1386     if (qf_assembled_is_setup) {
1387       bool update_needed;
1388 
1389       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1390       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1391       if (update_needed) {
1392         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1393       }
1394     } else {
1395       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1396       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1397     }
1398     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1399 
1400     // Copy reference from internally held copy
1401     *assembled = NULL;
1402     *rstr      = NULL;
1403     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1404     CeedCall(CeedVectorDestroy(&assembled_vec));
1405     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1406     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1407   } else {
1408     // Operator fallback
1409     CeedOperator op_fallback;
1410 
1411     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1412     if (op_fallback) {
1413       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1414     } else {
1415       // LCOV_EXCL_START
1416       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1417       // LCOV_EXCL_STOP
1418     }
1419   }
1420 
1421   return CEED_ERROR_SUCCESS;
1422 }
1423 
1424 /**
1425   @brief Assemble the diagonal of a square linear CeedOperator
1426 
1427   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1428 
1429   Note: Currently only non-composite CeedOperators with a single field and
1430           composite CeedOperators with single field sub-operators are supported.
1431 
1432   Note: Calling this function asserts that setup is complete
1433           and sets the CeedOperator as immutable.
1434 
1435   @param op              CeedOperator to assemble CeedQFunction
1436   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1437   @param request         Address of CeedRequest for non-blocking completion, else
1438                            @ref CEED_REQUEST_IMMEDIATE
1439 
1440   @return An error code: 0 - success, otherwise - failure
1441 
1442   @ref User
1443 **/
1444 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1445   CeedCall(CeedOperatorCheckReady(op));
1446 
1447   CeedSize input_size = 0, output_size = 0;
1448   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1449   if (input_size != output_size) {
1450     // LCOV_EXCL_START
1451     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1452     // LCOV_EXCL_STOP
1453   }
1454 
1455   if (op->LinearAssembleDiagonal) {
1456     // Backend version
1457     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1458     return CEED_ERROR_SUCCESS;
1459   } else if (op->LinearAssembleAddDiagonal) {
1460     // Backend version with zeroing first
1461     CeedCall(CeedVectorSetValue(assembled, 0.0));
1462     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1463     return CEED_ERROR_SUCCESS;
1464   } else {
1465     // Operator fallback
1466     CeedOperator op_fallback;
1467 
1468     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1469     if (op_fallback) {
1470       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1471       return CEED_ERROR_SUCCESS;
1472     }
1473   }
1474   // Default interface implementation
1475   CeedCall(CeedVectorSetValue(assembled, 0.0));
1476   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1477 
1478   return CEED_ERROR_SUCCESS;
1479 }
1480 
1481 /**
1482   @brief Assemble the diagonal of a square linear CeedOperator
1483 
1484   This sums into a CeedVector the diagonal of a linear CeedOperator.
1485 
1486   Note: Currently only non-composite CeedOperators with a single field and
1487           composite CeedOperators with single field sub-operators are supported.
1488 
1489   Note: Calling this function asserts that setup is complete
1490           and sets the CeedOperator as immutable.
1491 
1492   @param op              CeedOperator to assemble CeedQFunction
1493   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1494   @param request         Address of CeedRequest for non-blocking completion, else
1495                            @ref CEED_REQUEST_IMMEDIATE
1496 
1497   @return An error code: 0 - success, otherwise - failure
1498 
1499   @ref User
1500 **/
1501 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1502   CeedCall(CeedOperatorCheckReady(op));
1503 
1504   CeedSize input_size = 0, output_size = 0;
1505   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1506   if (input_size != output_size) {
1507     // LCOV_EXCL_START
1508     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1509     // LCOV_EXCL_STOP
1510   }
1511 
1512   if (op->LinearAssembleAddDiagonal) {
1513     // Backend version
1514     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1515     return CEED_ERROR_SUCCESS;
1516   } else {
1517     // Operator fallback
1518     CeedOperator op_fallback;
1519 
1520     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1521     if (op_fallback) {
1522       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1523       return CEED_ERROR_SUCCESS;
1524     }
1525   }
1526   // Default interface implementation
1527   bool is_composite;
1528   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1529   if (is_composite) {
1530     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1531   } else {
1532     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1533   }
1534 
1535   return CEED_ERROR_SUCCESS;
1536 }
1537 
1538 /**
1539   @brief Assemble the point block diagonal of a square linear CeedOperator
1540 
1541   This overwrites a CeedVector with the point block diagonal of a linear
1542     CeedOperator.
1543 
1544   Note: Currently only non-composite CeedOperators with a single field and
1545           composite CeedOperators with single field sub-operators are supported.
1546 
1547   Note: Calling this function asserts that setup is complete
1548           and sets the CeedOperator as immutable.
1549 
1550   @param op              CeedOperator to assemble CeedQFunction
1551   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1552                            diagonal, provided in row-major form with an
1553                            @a num_comp * @a num_comp block at each node. The dimensions
1554                            of this vector are derived from the active vector
1555                            for the CeedOperator. The array has shape
1556                            [nodes, component out, component in].
1557   @param request         Address of CeedRequest for non-blocking completion, else
1558                            CEED_REQUEST_IMMEDIATE
1559 
1560   @return An error code: 0 - success, otherwise - failure
1561 
1562   @ref User
1563 **/
1564 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1565   CeedCall(CeedOperatorCheckReady(op));
1566 
1567   CeedSize input_size = 0, output_size = 0;
1568   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1569   if (input_size != output_size) {
1570     // LCOV_EXCL_START
1571     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1572     // LCOV_EXCL_STOP
1573   }
1574 
1575   if (op->LinearAssemblePointBlockDiagonal) {
1576     // Backend version
1577     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1578     return CEED_ERROR_SUCCESS;
1579   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1580     // Backend version with zeroing first
1581     CeedCall(CeedVectorSetValue(assembled, 0.0));
1582     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1583     return CEED_ERROR_SUCCESS;
1584   } else {
1585     // Operator fallback
1586     CeedOperator op_fallback;
1587 
1588     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1589     if (op_fallback) {
1590       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1591       return CEED_ERROR_SUCCESS;
1592     }
1593   }
1594   // Default interface implementation
1595   CeedCall(CeedVectorSetValue(assembled, 0.0));
1596   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1597 
1598   return CEED_ERROR_SUCCESS;
1599 }
1600 
1601 /**
1602   @brief Assemble the point block diagonal of a square linear CeedOperator
1603 
1604   This sums into a CeedVector with the point block diagonal of a linear
1605     CeedOperator.
1606 
1607   Note: Currently only non-composite CeedOperators with a single field and
1608           composite CeedOperators with single field sub-operators are supported.
1609 
1610   Note: Calling this function asserts that setup is complete
1611           and sets the CeedOperator as immutable.
1612 
1613   @param op              CeedOperator to assemble CeedQFunction
1614   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1615                            diagonal, provided in row-major form with an
1616                            @a num_comp * @a num_comp block at each node. The dimensions
1617                            of this vector are derived from the active vector
1618                            for the CeedOperator. The array has shape
1619                            [nodes, component out, component in].
1620   @param request         Address of CeedRequest for non-blocking completion, else
1621                            CEED_REQUEST_IMMEDIATE
1622 
1623   @return An error code: 0 - success, otherwise - failure
1624 
1625   @ref User
1626 **/
1627 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1628   CeedCall(CeedOperatorCheckReady(op));
1629 
1630   CeedSize input_size = 0, output_size = 0;
1631   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1632   if (input_size != output_size) {
1633     // LCOV_EXCL_START
1634     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1635     // LCOV_EXCL_STOP
1636   }
1637 
1638   if (op->LinearAssembleAddPointBlockDiagonal) {
1639     // Backend version
1640     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1641     return CEED_ERROR_SUCCESS;
1642   } else {
1643     // Operator fallback
1644     CeedOperator op_fallback;
1645 
1646     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1647     if (op_fallback) {
1648       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1649       return CEED_ERROR_SUCCESS;
1650     }
1651   }
1652   // Default interface implemenation
1653   bool is_composite;
1654   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1655   if (is_composite) {
1656     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1657   } else {
1658     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1659   }
1660 
1661   return CEED_ERROR_SUCCESS;
1662 }
1663 
1664 /**
1665    @brief Fully assemble the nonzero pattern of a linear operator.
1666 
1667    Expected to be used in conjunction with CeedOperatorLinearAssemble()
1668 
1669    The assembly routines use coordinate format, with num_entries tuples of the
1670    form (i, j, value) which indicate that value should be added to the matrix
1671    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1672    This function returns the number of entries and their (i, j) locations,
1673    while CeedOperatorLinearAssemble() provides the values in the same
1674    ordering.
1675 
1676    This will generally be slow unless your operator is low-order.
1677 
1678   Note: Calling this function asserts that setup is complete
1679           and sets the CeedOperator as immutable.
1680 
1681    @param[in]  op           CeedOperator to assemble
1682    @param[out] num_entries  Number of entries in coordinate nonzero pattern
1683    @param[out] rows         Row number for each entry
1684    @param[out] cols         Column number for each entry
1685 
1686    @ref User
1687 **/
1688 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1689   CeedInt       num_suboperators, single_entries;
1690   CeedOperator *sub_operators;
1691   bool          is_composite;
1692   CeedCall(CeedOperatorCheckReady(op));
1693 
1694   if (op->LinearAssembleSymbolic) {
1695     // Backend version
1696     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1697     return CEED_ERROR_SUCCESS;
1698   } else {
1699     // Operator fallback
1700     CeedOperator op_fallback;
1701 
1702     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1703     if (op_fallback) {
1704       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1705       return CEED_ERROR_SUCCESS;
1706     }
1707   }
1708 
1709   // Default interface implementation
1710 
1711   // count entries and allocate rows, cols arrays
1712   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1713   *num_entries = 0;
1714   if (is_composite) {
1715     CeedCall(CeedOperatorGetNumSub(op, &num_suboperators));
1716     CeedCall(CeedOperatorGetSubList(op, &sub_operators));
1717     for (CeedInt k = 0; k < num_suboperators; ++k) {
1718       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1719       *num_entries += single_entries;
1720     }
1721   } else {
1722     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1723     *num_entries += single_entries;
1724   }
1725   CeedCall(CeedCalloc(*num_entries, rows));
1726   CeedCall(CeedCalloc(*num_entries, cols));
1727 
1728   // assemble nonzero locations
1729   CeedInt offset = 0;
1730   if (is_composite) {
1731     CeedCall(CeedOperatorGetNumSub(op, &num_suboperators));
1732     CeedCall(CeedOperatorGetSubList(op, &sub_operators));
1733     for (CeedInt k = 0; k < num_suboperators; ++k) {
1734       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1735       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1736       offset += single_entries;
1737     }
1738   } else {
1739     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1740   }
1741 
1742   return CEED_ERROR_SUCCESS;
1743 }
1744 
1745 /**
1746    @brief Fully assemble the nonzero entries of a linear operator.
1747 
1748    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic()
1749 
1750    The assembly routines use coordinate format, with num_entries tuples of the
1751    form (i, j, value) which indicate that value should be added to the matrix
1752    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1753    This function returns the values of the nonzero entries to be added, their
1754    (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1755 
1756    This will generally be slow unless your operator is low-order.
1757 
1758   Note: Calling this function asserts that setup is complete
1759           and sets the CeedOperator as immutable.
1760 
1761    @param[in]  op      CeedOperator to assemble
1762    @param[out] values  Values to assemble into matrix
1763 
1764    @ref User
1765 **/
1766 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1767   CeedInt       num_suboperators, single_entries = 0;
1768   CeedOperator *sub_operators;
1769   CeedCall(CeedOperatorCheckReady(op));
1770 
1771   if (op->LinearAssemble) {
1772     // Backend version
1773     CeedCall(op->LinearAssemble(op, values));
1774     return CEED_ERROR_SUCCESS;
1775   } else {
1776     // Operator fallback
1777     CeedOperator op_fallback;
1778 
1779     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1780     if (op_fallback) {
1781       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
1782       return CEED_ERROR_SUCCESS;
1783     }
1784   }
1785 
1786   // Default interface implementation
1787   bool is_composite;
1788   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1789 
1790   CeedInt offset = 0;
1791   if (is_composite) {
1792     CeedCall(CeedOperatorGetNumSub(op, &num_suboperators));
1793     CeedCall(CeedOperatorGetSubList(op, &sub_operators));
1794     for (CeedInt k = 0; k < num_suboperators; k++) {
1795       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
1796       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1797       offset += single_entries;
1798     }
1799   } else {
1800     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
1801   }
1802 
1803   return CEED_ERROR_SUCCESS;
1804 }
1805 
1806 /**
1807   @brief Create a multigrid coarse operator and level transfer operators
1808            for a CeedOperator, creating the prolongation basis from the
1809            fine and coarse grid interpolation
1810 
1811   Note: Calling this function asserts that setup is complete
1812           and sets the CeedOperator as immutable.
1813 
1814   @param[in] op_fine       Fine grid operator
1815   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
1816   @param[in] rstr_coarse   Coarse grid restriction
1817   @param[in] basis_coarse  Coarse grid active vector basis
1818   @param[out] op_coarse    Coarse grid operator
1819   @param[out] op_prolong   Coarse to fine operator
1820   @param[out] op_restrict  Fine to coarse operator
1821 
1822   @return An error code: 0 - success, otherwise - failure
1823 
1824   @ref User
1825 **/
1826 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1827                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
1828   CeedCall(CeedOperatorCheckReady(op_fine));
1829 
1830   // Build prolongation matrix
1831   CeedBasis basis_fine, basis_c_to_f;
1832   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1833   CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
1834 
1835   // Core code
1836   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1837 
1838   return CEED_ERROR_SUCCESS;
1839 }
1840 
1841 /**
1842   @brief Create a multigrid coarse operator and level transfer operators
1843            for a CeedOperator with a tensor basis for the active basis
1844 
1845   Note: Calling this function asserts that setup is complete
1846           and sets the CeedOperator as immutable.
1847 
1848   @param[in] op_fine        Fine grid operator
1849   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1850   @param[in] rstr_coarse    Coarse grid restriction
1851   @param[in] basis_coarse   Coarse grid active vector basis
1852   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1853   @param[out] op_coarse     Coarse grid operator
1854   @param[out] op_prolong    Coarse to fine operator
1855   @param[out] op_restrict   Fine to coarse operator
1856 
1857   @return An error code: 0 - success, otherwise - failure
1858 
1859   @ref User
1860 **/
1861 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1862                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
1863                                              CeedOperator *op_restrict) {
1864   CeedCall(CeedOperatorCheckReady(op_fine));
1865   Ceed ceed;
1866   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1867 
1868   // Check for compatible quadrature spaces
1869   CeedBasis basis_fine;
1870   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1871   CeedInt Q_f, Q_c;
1872   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
1873   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
1874   if (Q_f != Q_c) {
1875     // LCOV_EXCL_START
1876     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
1877     // LCOV_EXCL_STOP
1878   }
1879 
1880   // Coarse to fine basis
1881   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
1882   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
1883   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
1884   CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
1885   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
1886   P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
1887   CeedScalar *q_ref, *q_weight, *grad;
1888   CeedCall(CeedCalloc(P_1d_f, &q_ref));
1889   CeedCall(CeedCalloc(P_1d_f, &q_weight));
1890   CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
1891   CeedBasis basis_c_to_f;
1892   CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
1893   CeedCall(CeedFree(&q_ref));
1894   CeedCall(CeedFree(&q_weight));
1895   CeedCall(CeedFree(&grad));
1896 
1897   // Core code
1898   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1899   return CEED_ERROR_SUCCESS;
1900 }
1901 
1902 /**
1903   @brief Create a multigrid coarse operator and level transfer operators
1904            for a CeedOperator with a non-tensor basis for the active vector
1905 
1906   Note: Calling this function asserts that setup is complete
1907           and sets the CeedOperator as immutable.
1908 
1909   @param[in] op_fine        Fine grid operator
1910   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1911   @param[in] rstr_coarse    Coarse grid restriction
1912   @param[in] basis_coarse   Coarse grid active vector basis
1913   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1914   @param[out] op_coarse     Coarse grid operator
1915   @param[out] op_prolong    Coarse to fine operator
1916   @param[out] op_restrict   Fine to coarse operator
1917 
1918   @return An error code: 0 - success, otherwise - failure
1919 
1920   @ref User
1921 **/
1922 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1923                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
1924                                        CeedOperator *op_restrict) {
1925   CeedCall(CeedOperatorCheckReady(op_fine));
1926   Ceed ceed;
1927   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1928 
1929   // Check for compatible quadrature spaces
1930   CeedBasis basis_fine;
1931   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1932   CeedInt Q_f, Q_c;
1933   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
1934   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
1935   if (Q_f != Q_c) {
1936     // LCOV_EXCL_START
1937     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
1938     // LCOV_EXCL_STOP
1939   }
1940 
1941   // Coarse to fine basis
1942   CeedElemTopology topo;
1943   CeedCall(CeedBasisGetTopology(basis_fine, &topo));
1944   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
1945   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
1946   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
1947   CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
1948   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
1949   CeedScalar *q_ref, *q_weight, *grad;
1950   CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
1951   CeedCall(CeedCalloc(num_nodes_f, &q_weight));
1952   CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
1953   CeedBasis basis_c_to_f;
1954   CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
1955   CeedCall(CeedFree(&q_ref));
1956   CeedCall(CeedFree(&q_weight));
1957   CeedCall(CeedFree(&grad));
1958 
1959   // Core code
1960   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1961   return CEED_ERROR_SUCCESS;
1962 }
1963 
1964 /**
1965   @brief Build a FDM based approximate inverse for each element for a
1966            CeedOperator
1967 
1968   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization
1969     Method based approximate inverse. This function obtains the simultaneous
1970     diagonalization for the 1D mass and Laplacian operators,
1971       M = V^T V, K = V^T S V.
1972     The assembled QFunction is used to modify the eigenvalues from simultaneous
1973     diagonalization and obtain an approximate inverse of the form
1974       V^T S^hat V. The CeedOperator must be linear and non-composite. The
1975     associated CeedQFunction must therefore also be linear.
1976 
1977   Note: Calling this function asserts that setup is complete
1978           and sets the CeedOperator as immutable.
1979 
1980   @param op            CeedOperator to create element inverses
1981   @param[out] fdm_inv  CeedOperator to apply the action of a FDM based inverse
1982                          for each element
1983   @param request       Address of CeedRequest for non-blocking completion, else
1984                          @ref CEED_REQUEST_IMMEDIATE
1985 
1986   @return An error code: 0 - success, otherwise - failure
1987 
1988   @ref User
1989 **/
1990 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
1991   CeedCall(CeedOperatorCheckReady(op));
1992 
1993   if (op->CreateFDMElementInverse) {
1994     // Backend version
1995     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
1996     return CEED_ERROR_SUCCESS;
1997   } else {
1998     // Operator fallback
1999     CeedOperator op_fallback;
2000 
2001     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2002     if (op_fallback) {
2003       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2004       return CEED_ERROR_SUCCESS;
2005     }
2006   }
2007 
2008   // Default interface implementation
2009   Ceed ceed, ceed_parent;
2010   CeedCall(CeedOperatorGetCeed(op, &ceed));
2011   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2012   ceed_parent = ceed_parent ? ceed_parent : ceed;
2013   CeedQFunction qf;
2014   CeedCall(CeedOperatorGetQFunction(op, &qf));
2015 
2016   // Determine active input basis
2017   bool                interp = false, grad = false;
2018   CeedBasis           basis = NULL;
2019   CeedElemRestriction rstr  = NULL;
2020   CeedOperatorField  *op_fields;
2021   CeedQFunctionField *qf_fields;
2022   CeedInt             num_input_fields;
2023   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2024   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2025   for (CeedInt i = 0; i < num_input_fields; i++) {
2026     CeedVector vec;
2027     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2028     if (vec == CEED_VECTOR_ACTIVE) {
2029       CeedEvalMode eval_mode;
2030       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2031       interp = interp || eval_mode == CEED_EVAL_INTERP;
2032       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2033       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2034       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2035     }
2036   }
2037   if (!basis) {
2038     // LCOV_EXCL_START
2039     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2040     // LCOV_EXCL_STOP
2041   }
2042   CeedSize l_size = 1;
2043   CeedInt  P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2044   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2045   CeedCall(CeedBasisGetNumNodes(basis, &elem_size));
2046   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2047   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2048   CeedCall(CeedBasisGetDimension(basis, &dim));
2049   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2050   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2051   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2052 
2053   // Build and diagonalize 1D Mass and Laplacian
2054   bool tensor_basis;
2055   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2056   if (!tensor_basis) {
2057     // LCOV_EXCL_START
2058     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2059     // LCOV_EXCL_STOP
2060   }
2061   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2062   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2063   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2064   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2065   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2066   CeedCall(CeedCalloc(P_1d, &lambda));
2067   // -- Build matrices
2068   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2069   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2070   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2071   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2072   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2073 
2074   // -- Diagonalize
2075   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2076   CeedCall(CeedFree(&mass));
2077   CeedCall(CeedFree(&laplace));
2078   for (CeedInt i = 0; i < P_1d; i++) {
2079     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2080   }
2081   CeedCall(CeedFree(&x));
2082 
2083   // Assemble QFunction
2084   CeedVector          assembled;
2085   CeedElemRestriction rstr_qf;
2086   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2087   CeedInt layout[3];
2088   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2089   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2090   CeedScalar max_norm = 0;
2091   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2092 
2093   // Calculate element averages
2094   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2095   CeedScalar       *elem_avg;
2096   const CeedScalar *assembled_array, *q_weight_array;
2097   CeedVector        q_weight;
2098   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2099   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2100   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2101   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2102   CeedCall(CeedCalloc(num_elem, &elem_avg));
2103   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2104   for (CeedInt e = 0; e < num_elem; e++) {
2105     CeedInt count = 0;
2106     for (CeedInt q = 0; q < num_qpts; q++) {
2107       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2108         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2109           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2110           count++;
2111         }
2112       }
2113     }
2114     if (count) {
2115       elem_avg[e] /= count;
2116     } else {
2117       elem_avg[e] = 1.0;
2118     }
2119   }
2120   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2121   CeedCall(CeedVectorDestroy(&assembled));
2122   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2123   CeedCall(CeedVectorDestroy(&q_weight));
2124 
2125   // Build FDM diagonal
2126   CeedVector  q_data;
2127   CeedScalar *q_data_array, *fdm_diagonal;
2128   CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal));
2129   const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON;
2130   for (CeedInt c = 0; c < num_comp; c++) {
2131     for (CeedInt n = 0; n < elem_size; n++) {
2132       if (interp) fdm_diagonal[c * elem_size + n] = 1.0;
2133       if (grad) {
2134         for (CeedInt d = 0; d < dim; d++) {
2135           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2136           fdm_diagonal[c * elem_size + n] += lambda[i];
2137         }
2138       }
2139       if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound;
2140     }
2141   }
2142   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data));
2143   CeedCall(CeedVectorSetValue(q_data, 0.0));
2144   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2145   for (CeedInt e = 0; e < num_elem; e++) {
2146     for (CeedInt c = 0; c < num_comp; c++) {
2147       for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]);
2148     }
2149   }
2150   CeedCall(CeedFree(&elem_avg));
2151   CeedCall(CeedFree(&fdm_diagonal));
2152   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2153 
2154   // Setup FDM operator
2155   // -- Basis
2156   CeedBasis   fdm_basis;
2157   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2158   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2159   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2160   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2161   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2162   CeedCall(CeedFree(&fdm_interp));
2163   CeedCall(CeedFree(&grad_dummy));
2164   CeedCall(CeedFree(&q_ref_dummy));
2165   CeedCall(CeedFree(&q_weight_dummy));
2166   CeedCall(CeedFree(&lambda));
2167 
2168   // -- Restriction
2169   CeedElemRestriction rstr_qd_i;
2170   CeedInt             strides[3] = {1, elem_size, elem_size * num_comp};
2171   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i));
2172   // -- QFunction
2173   CeedQFunction qf_fdm;
2174   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2175   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2176   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2177   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2178   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2179   // -- QFunction context
2180   CeedInt *num_comp_data;
2181   CeedCall(CeedCalloc(1, &num_comp_data));
2182   num_comp_data[0] = num_comp;
2183   CeedQFunctionContext ctx_fdm;
2184   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2185   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2186   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2187   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2188   // -- Operator
2189   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2190   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2191   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2192   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2193 
2194   // Cleanup
2195   CeedCall(CeedVectorDestroy(&q_data));
2196   CeedCall(CeedBasisDestroy(&fdm_basis));
2197   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2198   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2199 
2200   return CEED_ERROR_SUCCESS;
2201 }
2202 
2203 /// @}
2204