xref: /libCEED/interface/ceed-preconditioning.c (revision f3d47e3642aeddb363a2ee1bc484555cc57cbb59)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed/backend.h>
11 #include <ceed/ceed.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   Ceed ceed_fallback;
84 
85   // Check not already created
86   if (op->op_fallback) return CEED_ERROR_SUCCESS;
87 
88   // Fallback Ceed
89   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
90   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
91 
92   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
93   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
94 
95   // Clone Op
96   CeedOperator op_fallback;
97   if (op->is_composite) {
98     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
99     for (CeedInt i = 0; i < op->num_suboperators; i++) {
100       CeedOperator op_sub_fallback;
101 
102       CeedCall(CeedOperatorGetFallback(op->sub_operators[i], &op_sub_fallback));
103       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
104     }
105   } else {
106     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
107     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
108     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
109     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
110     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
111     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
112       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_restr, op->input_fields[i]->basis,
113                                     op->input_fields[i]->vec));
114     }
115     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
116       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_restr, op->output_fields[i]->basis,
117                                     op->output_fields[i]->vec));
118     }
119     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
120     if (op_fallback->num_qpts == 0) {
121       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
122     }
123     // Cleanup
124     CeedCall(CeedQFunctionDestroy(&qf_fallback));
125     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
126     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
127   }
128   CeedCall(CeedOperatorSetName(op_fallback, op->name));
129   CeedCall(CeedOperatorCheckReady(op_fallback));
130   op->op_fallback = op_fallback;
131 
132   return CEED_ERROR_SUCCESS;
133 }
134 
135 /**
136   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
137 
138   @param[in]  op          CeedOperator to retrieve fallback for
139   @param[out] op_fallback Fallback CeedOperator
140 
141   @return An error code: 0 - success, otherwise - failure
142 
143   @ref Developer
144 **/
145 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
146   // Create if needed
147   if (!op->op_fallback) {
148     CeedCall(CeedOperatorCreateFallback(op));
149   }
150   if (op->op_fallback) {
151     bool is_debug;
152 
153     CeedCall(CeedIsDebug(op->ceed, &is_debug));
154     if (is_debug) {
155       Ceed        ceed_fallback;
156       const char *resource, *resource_fallback;
157 
158       CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
159       CeedCall(CeedGetResource(op->ceed, &resource));
160       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
161 
162       CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
163       CeedDebug(op->ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
164                 op->op_fallback);
165     }
166   }
167   *op_fallback = op->op_fallback;
168 
169   return CEED_ERROR_SUCCESS;
170 }
171 
172 /**
173   @brief Select correct basis matrix pointer based on CeedEvalMode
174 
175   @param[in]  eval_mode Current basis evaluation mode
176   @param[in]  identity  Pointer to identity matrix
177   @param[in]  interp    Pointer to interpolation matrix
178   @param[in]  grad      Pointer to gradient matrix
179   @param[out] basis_ptr Basis pointer to set
180 
181   @ref Developer
182 **/
183 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad,
184                                                const CeedScalar **basis_ptr) {
185   switch (eval_mode) {
186     case CEED_EVAL_NONE:
187       *basis_ptr = identity;
188       break;
189     case CEED_EVAL_INTERP:
190       *basis_ptr = interp;
191       break;
192     case CEED_EVAL_GRAD:
193       *basis_ptr = grad;
194       break;
195     case CEED_EVAL_WEIGHT:
196     case CEED_EVAL_DIV:
197     case CEED_EVAL_CURL:
198       break;  // Caught by QF Assembly
199   }
200   assert(*basis_ptr != NULL);
201 }
202 
203 /**
204   @brief Create point block restriction for active operator field
205 
206   @param[in]  rstr            Original CeedElemRestriction for active field
207   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
208 
209   @return An error code: 0 - success, otherwise - failure
210 
211   @ref Developer
212 **/
213 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
214   Ceed ceed;
215   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
216   const CeedInt *offsets;
217   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
218 
219   // Expand offsets
220   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
221   CeedSize l_size;
222   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
223   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
224   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
225   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
226   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
227   CeedInt shift = num_comp;
228   if (comp_stride != 1) shift *= num_comp;
229   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
230   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
231     pointblock_offsets[i] = offsets[i] * shift;
232   }
233 
234   // Create new restriction
235   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
236                                      pointblock_offsets, pointblock_rstr));
237 
238   // Cleanup
239   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
240 
241   return CEED_ERROR_SUCCESS;
242 }
243 
244 /**
245   @brief Core logic for assembling operator diagonal or point block diagonal
246 
247   @param[in]  op            CeedOperator to assemble point block diagonal
248   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
249   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
250   @param[out] assembled     CeedVector to store assembled diagonal
251 
252   @return An error code: 0 - success, otherwise - failure
253 
254   @ref Developer
255 **/
256 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
257   Ceed ceed;
258   CeedCall(CeedOperatorGetCeed(op, &ceed));
259 
260   // Assemble QFunction
261   CeedQFunction qf;
262   CeedCall(CeedOperatorGetQFunction(op, &qf));
263   CeedInt num_input_fields, num_output_fields;
264   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
265   CeedVector          assembled_qf;
266   CeedElemRestriction rstr;
267   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr, request));
268   CeedInt layout[3];
269   CeedCall(CeedElemRestrictionGetELayout(rstr, &layout));
270   CeedCall(CeedElemRestrictionDestroy(&rstr));
271 
272   // Get assembly data
273   CeedOperatorAssemblyData data;
274   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
275   const CeedEvalMode *eval_mode_in, *eval_mode_out;
276   CeedInt             num_eval_mode_in, num_eval_mode_out;
277   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
278   CeedBasis basis_in, basis_out;
279   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
280   CeedInt num_comp;
281   CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
282 
283   // Assemble point block diagonal restriction, if needed
284   CeedElemRestriction diag_rstr;
285   CeedCall(CeedOperatorGetActiveElemRestriction(op, &diag_rstr));
286   if (is_pointblock) {
287     CeedElemRestriction point_block_rstr;
288     CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_rstr, &point_block_rstr));
289     diag_rstr = point_block_rstr;
290   }
291 
292   // Create diagonal vector
293   CeedVector elem_diag;
294   CeedCall(CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag));
295 
296   // Assemble element operator diagonals
297   CeedScalar       *elem_diag_array;
298   const CeedScalar *assembled_qf_array;
299   CeedCall(CeedVectorSetValue(elem_diag, 0.0));
300   CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
301   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
302   CeedInt num_elem, num_nodes, num_qpts;
303   CeedCall(CeedElemRestrictionGetNumElements(diag_rstr, &num_elem));
304   CeedCall(CeedBasisGetNumNodes(basis_in, &num_nodes));
305   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
306 
307   // Basis matrices
308   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
309   CeedScalar       *identity      = NULL;
310   bool              has_eval_none = false;
311   for (CeedInt i = 0; i < num_eval_mode_in; i++) {
312     has_eval_none = has_eval_none || (eval_mode_in[i] == CEED_EVAL_NONE);
313   }
314   for (CeedInt i = 0; i < num_eval_mode_out; i++) {
315     has_eval_none = has_eval_none || (eval_mode_out[i] == CEED_EVAL_NONE);
316   }
317   if (has_eval_none) {
318     CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
319     for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
320   }
321   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
322   CeedCall(CeedBasisGetInterp(basis_out, &interp_out));
323   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
324   CeedCall(CeedBasisGetGrad(basis_out, &grad_out));
325   // Compute the diagonal of B^T D B
326   // Each element
327   for (CeedInt e = 0; e < num_elem; e++) {
328     CeedInt d_out = -1;
329     // Each basis eval mode pair
330     for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
331       const CeedScalar *bt = NULL;
332       if (eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out += 1;
333       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * num_nodes], &bt);
334       CeedInt d_in = -1;
335       for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
336         const CeedScalar *b = NULL;
337         if (eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in += 1;
338         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * num_nodes], &b);
339         // Each component
340         for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
341           // Each qpoint/node pair
342           for (CeedInt q = 0; q < num_qpts; q++) {
343             if (is_pointblock) {
344               // Point Block Diagonal
345               for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
346                 const CeedScalar qf_value =
347                     assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_in) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
348                                        e * layout[2]];
349                 for (CeedInt n = 0; n < num_nodes; n++) {
350                   elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
351                       bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
352                 }
353               }
354             } else {
355               // Diagonal Only
356               const CeedScalar qf_value =
357                   assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_out) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
358                                      e * layout[2]];
359               for (CeedInt n = 0; n < num_nodes; n++) {
360                 elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
361               }
362             }
363           }
364         }
365       }
366     }
367   }
368   CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
369   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
370 
371   // Assemble local operator diagonal
372   CeedCall(CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
373 
374   // Cleanup
375   if (is_pointblock) {
376     CeedCall(CeedElemRestrictionDestroy(&diag_rstr));
377   }
378   CeedCall(CeedVectorDestroy(&assembled_qf));
379   CeedCall(CeedVectorDestroy(&elem_diag));
380   CeedCall(CeedFree(&identity));
381 
382   return CEED_ERROR_SUCCESS;
383 }
384 
385 /**
386   @brief Core logic for assembling composite operator diagonal
387 
388   @param[in]  op            CeedOperator to assemble point block diagonal
389   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
390   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
391   @param[out] assembled     CeedVector to store assembled diagonal
392 
393   @return An error code: 0 - success, otherwise - failure
394 
395   @ref Developer
396 **/
397 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
398                                                                  CeedVector assembled) {
399   CeedInt       num_sub;
400   CeedOperator *suboperators;
401   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
402   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
403   for (CeedInt i = 0; i < num_sub; i++) {
404     if (is_pointblock) {
405       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
406     } else {
407       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
408     }
409   }
410   return CEED_ERROR_SUCCESS;
411 }
412 
413 /**
414   @brief Build nonzero pattern for non-composite operator
415 
416   Users should generally use CeedOperatorLinearAssembleSymbolic()
417 
418   @param[in]  op     CeedOperator to assemble nonzero pattern
419   @param[in]  offset Offset for number of entries
420   @param[out] rows   Row number for each entry
421   @param[out] cols   Column number for each entry
422 
423   @return An error code: 0 - success, otherwise - failure
424 
425   @ref Developer
426 **/
427 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
428   Ceed ceed;
429   bool is_composite;
430   CeedCall(CeedOperatorGetCeed(op, &ceed));
431   CeedCall(CeedOperatorIsComposite(op, &is_composite));
432 
433   if (op->is_composite) {
434     // LCOV_EXCL_START
435     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
436     // LCOV_EXCL_STOP
437   }
438 
439   CeedSize num_nodes;
440   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
441   CeedElemRestriction rstr_in;
442   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
443   CeedInt num_elem, elem_size, num_comp;
444   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
445   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
446   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
447   CeedInt layout_er[3];
448   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
449 
450   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
451 
452   // Determine elem_dof relation
453   CeedVector index_vec;
454   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
455   CeedScalar *array;
456   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
457   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
458   CeedCall(CeedVectorRestoreArray(index_vec, &array));
459   CeedVector elem_dof;
460   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
461   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
462   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
463   const CeedScalar *elem_dof_a;
464   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
465   CeedCall(CeedVectorDestroy(&index_vec));
466 
467   // Determine i, j locations for element matrices
468   CeedInt count = 0;
469   for (CeedInt e = 0; e < num_elem; e++) {
470     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
471       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
472         for (CeedInt i = 0; i < elem_size; i++) {
473           for (CeedInt j = 0; j < elem_size; j++) {
474             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
475             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
476 
477             const CeedInt row = elem_dof_a[elem_dof_index_row];
478             const CeedInt col = elem_dof_a[elem_dof_index_col];
479 
480             rows[offset + count] = row;
481             cols[offset + count] = col;
482             count++;
483           }
484         }
485       }
486     }
487   }
488   if (count != local_num_entries) {
489     // LCOV_EXCL_START
490     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
491     // LCOV_EXCL_STOP
492   }
493   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
494   CeedCall(CeedVectorDestroy(&elem_dof));
495 
496   return CEED_ERROR_SUCCESS;
497 }
498 
499 /**
500   @brief Assemble nonzero entries for non-composite operator
501 
502   Users should generally use CeedOperatorLinearAssemble()
503 
504   @param[in]  op     CeedOperator to assemble
505   @param[in]  offset Offset for number of entries
506   @param[out] values Values to assemble into matrix
507 
508   @return An error code: 0 - success, otherwise - failure
509 
510   @ref Developer
511 **/
512 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
513   Ceed ceed;
514   bool is_composite;
515   CeedCall(CeedOperatorGetCeed(op, &ceed));
516   CeedCall(CeedOperatorIsComposite(op, &is_composite));
517 
518   if (is_composite) {
519     // LCOV_EXCL_START
520     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
521     // LCOV_EXCL_STOP
522   }
523 
524   // Early exit for empty operator
525   {
526     CeedInt num_elem = 0;
527 
528     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
529     if (num_elem == 0) return CEED_ERROR_SUCCESS;
530   }
531 
532   if (op->LinearAssembleSingle) {
533     // Backend version
534     CeedCall(op->LinearAssembleSingle(op, offset, values));
535     return CEED_ERROR_SUCCESS;
536   } else {
537     // Operator fallback
538     CeedOperator op_fallback;
539 
540     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
541     if (op_fallback) {
542       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
543       return CEED_ERROR_SUCCESS;
544     }
545   }
546 
547   // Assemble QFunction
548   CeedQFunction qf;
549   CeedCall(CeedOperatorGetQFunction(op, &qf));
550   CeedVector          assembled_qf;
551   CeedElemRestriction rstr_q;
552   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
553   CeedSize qf_length;
554   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
555 
556   CeedInt            num_input_fields, num_output_fields;
557   CeedOperatorField *input_fields;
558   CeedOperatorField *output_fields;
559   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
560 
561   // Get assembly data
562   CeedOperatorAssemblyData data;
563   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
564   const CeedEvalMode *eval_mode_in, *eval_mode_out;
565   CeedInt             num_eval_mode_in, num_eval_mode_out;
566   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
567   CeedBasis basis_in, basis_out;
568   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
569 
570   if (num_eval_mode_in == 0 || num_eval_mode_out == 0) {
571     // LCOV_EXCL_START
572     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
573     // LCOV_EXCL_STOP
574   }
575 
576   CeedElemRestriction active_rstr;
577   CeedInt             num_elem, elem_size, num_qpts, num_comp;
578   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
579   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
580   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
581   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
582   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
583 
584   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
585 
586   // loop over elements and put in data structure
587   const CeedScalar *interp_in, *grad_in;
588   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
589   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
590 
591   const CeedScalar *assembled_qf_array;
592   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
593 
594   CeedInt layout_qf[3];
595   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
596   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
597 
598   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
599   const CeedScalar *B_mat_in, *B_mat_out;
600   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &B_mat_in, NULL, &B_mat_out));
601   CeedScalar  BTD_mat[elem_size * num_qpts * num_eval_mode_in];
602   CeedScalar  elem_mat[elem_size * elem_size];
603   CeedInt     count = 0;
604   CeedScalar *vals;
605   CeedCall(CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals));
606   for (CeedInt e = 0; e < num_elem; e++) {
607     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
608       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
609         // Compute B^T*D
610         for (CeedInt n = 0; n < elem_size; n++) {
611           for (CeedInt q = 0; q < num_qpts; q++) {
612             for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
613               const CeedInt btd_index = n * (num_qpts * num_eval_mode_in) + (num_eval_mode_in * q + e_in);
614               CeedScalar    sum       = 0.0;
615               for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
616                 const CeedInt b_out_index     = (num_eval_mode_out * q + e_out) * elem_size + n;
617                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_mode_out + e_out) * num_comp + comp_out;
618                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
619                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
620               }
621               BTD_mat[btd_index] = sum;
622             }
623           }
624         }
625         // form element matrix itself (for each block component)
626         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_mode_in));
627 
628         // put element matrix in coordinate data structure
629         for (CeedInt i = 0; i < elem_size; i++) {
630           for (CeedInt j = 0; j < elem_size; j++) {
631             vals[offset + count] = elem_mat[i * elem_size + j];
632             count++;
633           }
634         }
635       }
636     }
637   }
638   if (count != local_num_entries) {
639     // LCOV_EXCL_START
640     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
641     // LCOV_EXCL_STOP
642   }
643   CeedCall(CeedVectorRestoreArray(values, &vals));
644 
645   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
646   CeedCall(CeedVectorDestroy(&assembled_qf));
647 
648   return CEED_ERROR_SUCCESS;
649 }
650 
651 /**
652   @brief Count number of entries for assembled CeedOperator
653 
654   @param[in]  op          CeedOperator to assemble
655   @param[out] num_entries Number of entries in assembled representation
656 
657   @return An error code: 0 - success, otherwise - failure
658 
659   @ref Utility
660 **/
661 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
662   CeedElemRestriction rstr;
663   CeedInt             num_elem, elem_size, num_comp;
664 
665   if (op->is_composite) {
666     // LCOV_EXCL_START
667     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
668     // LCOV_EXCL_STOP
669   }
670   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
671   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
672   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
673   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
674   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
675 
676   return CEED_ERROR_SUCCESS;
677 }
678 
679 /**
680   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
681 
682   @param[in]  op_fine      Fine grid operator
683   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
684   @param[in]  rstr_coarse  Coarse grid restriction
685   @param[in]  basis_coarse Coarse grid active vector basis
686   @param[in]  basis_c_to_f Basis for coarse to fine interpolation
687   @param[out] op_coarse    Coarse grid operator
688   @param[out] op_prolong   Coarse to fine operator
689   @param[out] op_restrict  Fine to coarse operator
690 
691   @return An error code: 0 - success, otherwise - failure
692 
693   @ref Developer
694 **/
695 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
696                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
697   Ceed ceed;
698   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
699 
700   // Check for composite operator
701   bool is_composite;
702   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
703   if (is_composite) {
704     // LCOV_EXCL_START
705     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
706     // LCOV_EXCL_STOP
707   }
708 
709   // Coarse Grid
710   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
711   CeedElemRestriction rstr_fine = NULL;
712   // -- Clone input fields
713   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
714     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
715       rstr_fine = op_fine->input_fields[i]->elem_restr;
716       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
717     } else {
718       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_restr,
719                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
720     }
721   }
722   // -- Clone output fields
723   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
724     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
725       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
726     } else {
727       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_restr,
728                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
729     }
730   }
731   // -- Clone QFunctionAssemblyData
732   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
733 
734   // Multiplicity vector
735   CeedVector mult_vec, mult_e_vec;
736   CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
737   CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
738   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
739   CeedCall(CeedVectorSetValue(mult_vec, 0.0));
740   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
741   CeedCall(CeedVectorDestroy(&mult_e_vec));
742   CeedCall(CeedVectorReciprocal(mult_vec));
743 
744   // Restriction
745   CeedInt num_comp;
746   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
747   CeedQFunction qf_restrict;
748   CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
749   CeedInt *num_comp_r_data;
750   CeedCall(CeedCalloc(1, &num_comp_r_data));
751   num_comp_r_data[0] = num_comp;
752   CeedQFunctionContext ctx_r;
753   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
754   CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
755   CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
756   CeedCall(CeedQFunctionContextDestroy(&ctx_r));
757   CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
758   CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
759   CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
760   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
761 
762   CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
763   CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
764   CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
765   CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
766 
767   // Prolongation
768   CeedQFunction qf_prolong;
769   CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
770   CeedInt *num_comp_p_data;
771   CeedCall(CeedCalloc(1, &num_comp_p_data));
772   num_comp_p_data[0] = num_comp;
773   CeedQFunctionContext ctx_p;
774   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
775   CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
776   CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
777   CeedCall(CeedQFunctionContextDestroy(&ctx_p));
778   CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
779   CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
780   CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
781   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
782 
783   CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
784   CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
785   CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
786   CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
787 
788   // Clone name
789   bool   has_name = op_fine->name;
790   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
791   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
792   {
793     char *prolongation_name;
794     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
795     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
796     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
797     CeedCall(CeedFree(&prolongation_name));
798   }
799   {
800     char *restriction_name;
801     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
802     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
803     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
804     CeedCall(CeedFree(&restriction_name));
805   }
806 
807   // Check
808   CeedCall(CeedOperatorCheckReady(*op_coarse));
809   CeedCall(CeedOperatorCheckReady(*op_prolong));
810   CeedCall(CeedOperatorCheckReady(*op_restrict));
811 
812   // Cleanup
813   CeedCall(CeedVectorDestroy(&mult_vec));
814   CeedCall(CeedBasisDestroy(&basis_c_to_f));
815   CeedCall(CeedQFunctionDestroy(&qf_restrict));
816   CeedCall(CeedQFunctionDestroy(&qf_prolong));
817 
818   return CEED_ERROR_SUCCESS;
819 }
820 
821 /**
822   @brief Build 1D mass matrix and Laplacian with perturbation
823 
824   @param[in]  interp_1d   Interpolation matrix in one dimension
825   @param[in]  grad_1d     Gradient matrix in one dimension
826   @param[in]  q_weight_1d Quadrature weights in one dimension
827   @param[in]  P_1d        Number of basis nodes in one dimension
828   @param[in]  Q_1d        Number of quadrature points in one dimension
829   @param[in]  dim         Dimension of basis
830   @param[out] mass        Assembled mass matrix in one dimension
831   @param[out] laplace     Assembled perturbed Laplacian in one dimension
832 
833   @return An error code: 0 - success, otherwise - failure
834 
835   @ref Developer
836 **/
837 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d,
838                                                       CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
839   for (CeedInt i = 0; i < P_1d; i++) {
840     for (CeedInt j = 0; j < P_1d; j++) {
841       CeedScalar sum = 0.0;
842       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
843       mass[i + j * P_1d] = sum;
844     }
845   }
846   // -- Laplacian
847   for (CeedInt i = 0; i < P_1d; i++) {
848     for (CeedInt j = 0; j < P_1d; j++) {
849       CeedScalar sum = 0.0;
850       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
851       laplace[i + j * P_1d] = sum;
852     }
853   }
854   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
855   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
856   return CEED_ERROR_SUCCESS;
857 }
858 CeedPragmaOptimizeOn;
859 
860 /// @}
861 
862 /// ----------------------------------------------------------------------------
863 /// CeedOperator Backend API
864 /// ----------------------------------------------------------------------------
865 /// @addtogroup CeedOperatorBackend
866 /// @{
867 
868 /**
869   @brief Create object holding CeedQFunction assembly data for CeedOperator
870 
871   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
872   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
873 
874   @return An error code: 0 - success, otherwise - failure
875 
876   @ref Backend
877 **/
878 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
879   CeedCall(CeedCalloc(1, data));
880   (*data)->ref_count = 1;
881   (*data)->ceed      = ceed;
882   CeedCall(CeedReference(ceed));
883 
884   return CEED_ERROR_SUCCESS;
885 }
886 
887 /**
888   @brief Increment the reference counter for a CeedQFunctionAssemblyData
889 
890   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
891 
892   @return An error code: 0 - success, otherwise - failure
893 
894   @ref Backend
895 **/
896 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
897   data->ref_count++;
898   return CEED_ERROR_SUCCESS;
899 }
900 
901 /**
902   @brief Set re-use of CeedQFunctionAssemblyData
903 
904   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
905   @param[in]     reuse_data Boolean flag indicating data re-use
906 
907   @return An error code: 0 - success, otherwise - failure
908 
909   @ref Backend
910 **/
911 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
912   data->reuse_data        = reuse_data;
913   data->needs_data_update = true;
914   return CEED_ERROR_SUCCESS;
915 }
916 
917 /**
918   @brief Mark QFunctionAssemblyData as stale
919 
920   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
921   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
922 
923   @return An error code: 0 - success, otherwise - failure
924 
925   @ref Backend
926 **/
927 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
928   data->needs_data_update = needs_data_update;
929   return CEED_ERROR_SUCCESS;
930 }
931 
932 /**
933   @brief Determine if QFunctionAssemblyData needs update
934 
935   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
936   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
937 
938   @return An error code: 0 - success, otherwise - failure
939 
940   @ref Backend
941 **/
942 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
943   *is_update_needed = !data->reuse_data || data->needs_data_update;
944   return CEED_ERROR_SUCCESS;
945 }
946 
947 /**
948   @brief Copy the pointer to a CeedQFunctionAssemblyData.
949            Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
950            Note: If `*data_copy` is non-NULL, then it is assumed that `*data_copy` is a pointer to a CeedQFunctionAssemblyData.
951              This CeedQFunctionAssemblyData will be destroyed if `*data_copy` is the only reference to this CeedQFunctionAssemblyData.
952 
953   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
954   @param[in,out] data_copy Variable to store copied reference
955 
956   @return An error code: 0 - success, otherwise - failure
957 
958   @ref Backend
959 **/
960 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
961   CeedCall(CeedQFunctionAssemblyDataReference(data));
962   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
963   *data_copy = data;
964   return CEED_ERROR_SUCCESS;
965 }
966 
967 /**
968   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
969 
970   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
971   @param[out] is_setup Boolean flag for setup status
972 
973   @return An error code: 0 - success, otherwise - failure
974 
975   @ref Backend
976 **/
977 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
978   *is_setup = data->is_setup;
979   return CEED_ERROR_SUCCESS;
980 }
981 
982 /**
983   @brief Set internal objects for CeedQFunctionAssemblyData
984 
985   @param[in,out] data CeedQFunctionAssemblyData to set objects
986   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
987   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
988 
989   @return An error code: 0 - success, otherwise - failure
990 
991   @ref Backend
992 **/
993 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
994   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
995   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
996 
997   data->is_setup = true;
998   return CEED_ERROR_SUCCESS;
999 }
1000 
1001 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1002   if (!data->is_setup) {
1003     // LCOV_EXCL_START
1004     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1005     // LCOV_EXCL_STOP
1006   }
1007 
1008   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1009   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1010 
1011   return CEED_ERROR_SUCCESS;
1012 }
1013 
1014 /**
1015   @brief Destroy CeedQFunctionAssemblyData
1016 
1017   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1018 
1019   @return An error code: 0 - success, otherwise - failure
1020 
1021   @ref Backend
1022 **/
1023 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1024   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1025 
1026   CeedCall(CeedDestroy(&(*data)->ceed));
1027   CeedCall(CeedVectorDestroy(&(*data)->vec));
1028   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1029 
1030   CeedCall(CeedFree(data));
1031   return CEED_ERROR_SUCCESS;
1032 }
1033 
1034 /**
1035   @brief Get CeedOperatorAssemblyData
1036 
1037   @param[in]  op   CeedOperator to assemble
1038   @param[out] data CeedQFunctionAssemblyData
1039 
1040   @return An error code: 0 - success, otherwise - failure
1041 
1042   @ref Backend
1043 **/
1044 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1045   if (!op->op_assembled) {
1046     CeedOperatorAssemblyData data;
1047 
1048     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1049     op->op_assembled = data;
1050   }
1051   *data = op->op_assembled;
1052 
1053   return CEED_ERROR_SUCCESS;
1054 }
1055 
1056 /**
1057   @brief Create object holding CeedOperator assembly data
1058 
1059   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1060   @param[in]  op   CeedOperator to be assembled
1061   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1062 
1063   @return An error code: 0 - success, otherwise - failure
1064 
1065   @ref Backend
1066 **/
1067 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1068   CeedCall(CeedCalloc(1, data));
1069   (*data)->ceed = ceed;
1070   CeedCall(CeedReference(ceed));
1071 
1072   // Build OperatorAssembly data
1073   CeedQFunction       qf;
1074   CeedQFunctionField *qf_fields;
1075   CeedOperatorField  *op_fields;
1076   CeedInt             num_input_fields;
1077   CeedCall(CeedOperatorGetQFunction(op, &qf));
1078   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1079   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1080 
1081   // Determine active input basis
1082   CeedInt       num_eval_mode_in = 0, dim = 1;
1083   CeedEvalMode *eval_mode_in = NULL;
1084   CeedBasis     basis_in     = NULL;
1085   for (CeedInt i = 0; i < num_input_fields; i++) {
1086     CeedVector vec;
1087     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1088     if (vec == CEED_VECTOR_ACTIVE) {
1089       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1090       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1091       CeedEvalMode eval_mode;
1092       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1093       switch (eval_mode) {
1094         case CEED_EVAL_NONE:
1095         case CEED_EVAL_INTERP:
1096           CeedCall(CeedRealloc(num_eval_mode_in + 1, &eval_mode_in));
1097           eval_mode_in[num_eval_mode_in] = eval_mode;
1098           num_eval_mode_in += 1;
1099           break;
1100         case CEED_EVAL_GRAD:
1101           CeedCall(CeedRealloc(num_eval_mode_in + dim, &eval_mode_in));
1102           for (CeedInt d = 0; d < dim; d++) {
1103             eval_mode_in[num_eval_mode_in + d] = eval_mode;
1104           }
1105           num_eval_mode_in += dim;
1106           break;
1107         case CEED_EVAL_WEIGHT:
1108         case CEED_EVAL_DIV:
1109         case CEED_EVAL_CURL:
1110           break;  // Caught by QF Assembly
1111       }
1112     }
1113   }
1114   (*data)->num_eval_mode_in = num_eval_mode_in;
1115   (*data)->eval_mode_in     = eval_mode_in;
1116   CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->basis_in));
1117 
1118   // Determine active output basis
1119   CeedInt num_output_fields;
1120   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1121   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1122   CeedInt       num_eval_mode_out = 0;
1123   CeedEvalMode *eval_mode_out     = NULL;
1124   CeedBasis     basis_out         = NULL;
1125   for (CeedInt i = 0; i < num_output_fields; i++) {
1126     CeedVector vec;
1127     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1128     if (vec == CEED_VECTOR_ACTIVE) {
1129       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1130       CeedEvalMode eval_mode;
1131       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1132       switch (eval_mode) {
1133         case CEED_EVAL_NONE:
1134         case CEED_EVAL_INTERP:
1135           CeedCall(CeedRealloc(num_eval_mode_out + 1, &eval_mode_out));
1136           eval_mode_out[num_eval_mode_out] = eval_mode;
1137           num_eval_mode_out += 1;
1138           break;
1139         case CEED_EVAL_GRAD:
1140           CeedCall(CeedRealloc(num_eval_mode_out + dim, &eval_mode_out));
1141           for (CeedInt d = 0; d < dim; d++) {
1142             eval_mode_out[num_eval_mode_out + d] = eval_mode;
1143           }
1144           num_eval_mode_out += dim;
1145           break;
1146         case CEED_EVAL_WEIGHT:
1147         case CEED_EVAL_DIV:
1148         case CEED_EVAL_CURL:
1149           break;  // Caught by QF Assembly
1150       }
1151     }
1152   }
1153   (*data)->num_eval_mode_out = num_eval_mode_out;
1154   (*data)->eval_mode_out     = eval_mode_out;
1155   CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->basis_out));
1156 
1157   return CEED_ERROR_SUCCESS;
1158 }
1159 
1160 /**
1161   @brief Get CeedOperator CeedEvalModes for assembly
1162 
1163   @param[in]  data              CeedOperatorAssemblyData
1164   @param[out] num_eval_mode_in  Pointer to hold number of input CeedEvalModes, or NULL
1165   @param[out] eval_mode_in      Pointer to hold input CeedEvalModes, or NULL
1166   @param[out] num_eval_mode_out Pointer to hold number of output CeedEvalModes, or NULL
1167   @param[out] eval_mode_out     Pointer to hold output CeedEvalModes, or NULL
1168 
1169   @return An error code: 0 - success, otherwise - failure
1170 
1171   @ref Backend
1172 **/
1173 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_eval_mode_in, const CeedEvalMode **eval_mode_in,
1174                                          CeedInt *num_eval_mode_out, const CeedEvalMode **eval_mode_out) {
1175   if (num_eval_mode_in) *num_eval_mode_in = data->num_eval_mode_in;
1176   if (eval_mode_in) *eval_mode_in = data->eval_mode_in;
1177   if (num_eval_mode_out) *num_eval_mode_out = data->num_eval_mode_out;
1178   if (eval_mode_out) *eval_mode_out = data->eval_mode_out;
1179 
1180   return CEED_ERROR_SUCCESS;
1181 }
1182 
1183 /**
1184   @brief Get CeedOperator CeedBasis data for assembly
1185 
1186   @param[in]  data      CeedOperatorAssemblyData
1187   @param[out] basis_in  Pointer to hold active input CeedBasis, or NULL
1188   @param[out] B_in      Pointer to hold assembled active input B, or NULL
1189   @param[out] basis_out Pointer to hold active output CeedBasis, or NULL
1190   @param[out] B_out     Pointer to hold assembled active output B, or NULL
1191 
1192   @return An error code: 0 - success, otherwise - failure
1193 
1194   @ref Backend
1195 **/
1196 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedBasis *basis_in, const CeedScalar **B_in, CeedBasis *basis_out,
1197                                      const CeedScalar **B_out) {
1198   // Assemble B_in, B_out if needed
1199   if (B_in && !data->B_in) {
1200     CeedInt           num_qpts, elem_size;
1201     CeedScalar       *B_in, *identity = NULL;
1202     const CeedScalar *interp_in, *grad_in;
1203     bool              has_eval_none = false;
1204 
1205     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_in, &num_qpts));
1206     CeedCall(CeedBasisGetNumNodes(data->basis_in, &elem_size));
1207     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_in, &B_in));
1208 
1209     for (CeedInt i = 0; i < data->num_eval_mode_in; i++) {
1210       has_eval_none = has_eval_none || (data->eval_mode_in[i] == CEED_EVAL_NONE);
1211     }
1212     if (has_eval_none) {
1213       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1214       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1215         identity[i * elem_size + i] = 1.0;
1216       }
1217     }
1218     CeedCall(CeedBasisGetInterp(data->basis_in, &interp_in));
1219     CeedCall(CeedBasisGetGrad(data->basis_in, &grad_in));
1220 
1221     for (CeedInt q = 0; q < num_qpts; q++) {
1222       for (CeedInt n = 0; n < elem_size; n++) {
1223         CeedInt d_in = -1;
1224         for (CeedInt e_in = 0; e_in < data->num_eval_mode_in; e_in++) {
1225           const CeedInt     qq = data->num_eval_mode_in * q;
1226           const CeedScalar *b  = NULL;
1227 
1228           if (data->eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in++;
1229           CeedOperatorGetBasisPointer(data->eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &b);
1230           B_in[(qq + e_in) * elem_size + n] = b[q * elem_size + n];
1231         }
1232       }
1233     }
1234     data->B_in = B_in;
1235   }
1236 
1237   if (B_out && !data->B_out) {
1238     CeedInt           num_qpts, elem_size;
1239     CeedScalar       *B_out, *identity = NULL;
1240     const CeedScalar *interp_out, *grad_out;
1241     bool              has_eval_none = false;
1242 
1243     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_out, &num_qpts));
1244     CeedCall(CeedBasisGetNumNodes(data->basis_out, &elem_size));
1245     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_out, &B_out));
1246 
1247     for (CeedInt i = 0; i < data->num_eval_mode_out; i++) {
1248       has_eval_none = has_eval_none || (data->eval_mode_out[i] == CEED_EVAL_NONE);
1249     }
1250     if (has_eval_none) {
1251       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1252       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1253         identity[i * elem_size + i] = 1.0;
1254       }
1255     }
1256     CeedCall(CeedBasisGetInterp(data->basis_out, &interp_out));
1257     CeedCall(CeedBasisGetGrad(data->basis_out, &grad_out));
1258 
1259     for (CeedInt q = 0; q < num_qpts; q++) {
1260       for (CeedInt n = 0; n < elem_size; n++) {
1261         CeedInt d_out = -1;
1262         for (CeedInt e_out = 0; e_out < data->num_eval_mode_out; e_out++) {
1263           const CeedInt     qq = data->num_eval_mode_out * q;
1264           const CeedScalar *b  = NULL;
1265 
1266           if (data->eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out++;
1267           CeedOperatorGetBasisPointer(data->eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &b);
1268           B_out[(qq + e_out) * elem_size + n] = b[q * elem_size + n];
1269         }
1270       }
1271     }
1272     data->B_out = B_out;
1273   }
1274 
1275   if (basis_in) *basis_in = data->basis_in;
1276   if (B_in) *B_in = data->B_in;
1277   if (basis_out) *basis_out = data->basis_out;
1278   if (B_out) *B_out = data->B_out;
1279 
1280   return CEED_ERROR_SUCCESS;
1281 }
1282 
1283 /**
1284   @brief Destroy CeedOperatorAssemblyData
1285 
1286   @param[in,out] data CeedOperatorAssemblyData to destroy
1287 
1288   @return An error code: 0 - success, otherwise - failure
1289 
1290   @ref Backend
1291 **/
1292 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1293   if (!*data) return CEED_ERROR_SUCCESS;
1294 
1295   CeedCall(CeedDestroy(&(*data)->ceed));
1296   CeedCall(CeedBasisDestroy(&(*data)->basis_in));
1297   CeedCall(CeedBasisDestroy(&(*data)->basis_out));
1298   CeedCall(CeedFree(&(*data)->eval_mode_in));
1299   CeedCall(CeedFree(&(*data)->eval_mode_out));
1300   CeedCall(CeedFree(&(*data)->B_in));
1301   CeedCall(CeedFree(&(*data)->B_out));
1302 
1303   CeedCall(CeedFree(data));
1304   return CEED_ERROR_SUCCESS;
1305 }
1306 
1307 /// @}
1308 
1309 /// ----------------------------------------------------------------------------
1310 /// CeedOperator Public API
1311 /// ----------------------------------------------------------------------------
1312 /// @addtogroup CeedOperatorUser
1313 /// @{
1314 
1315 /**
1316   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1317 
1318   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1319     The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices
1320 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the
1321 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1322 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1323 and producing the output [dv_0, dv_1, v].
1324 
1325   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1326 
1327   @param[in]  op        CeedOperator to assemble CeedQFunction
1328   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1329   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1330   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1331 
1332   @return An error code: 0 - success, otherwise - failure
1333 
1334   @ref User
1335 **/
1336 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1337   CeedCall(CeedOperatorCheckReady(op));
1338 
1339   if (op->LinearAssembleQFunction) {
1340     // Backend version
1341     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1342   } else {
1343     // Operator fallback
1344     CeedOperator op_fallback;
1345 
1346     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1347     if (op_fallback) {
1348       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1349     } else {
1350       // LCOV_EXCL_START
1351       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1352       // LCOV_EXCL_STOP
1353     }
1354   }
1355   return CEED_ERROR_SUCCESS;
1356 }
1357 
1358 /**
1359   @brief Assemble CeedQFunction and store result internally.
1360            Return copied references of stored data to the caller.
1361            Caller is responsible for ownership and destruction of the copied references.
1362            See also @ref CeedOperatorLinearAssembleQFunction
1363 
1364   @param[in]  op        CeedOperator to assemble CeedQFunction
1365   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1366   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1367   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1368 
1369   @return An error code: 0 - success, otherwise - failure
1370 
1371   @ref User
1372 **/
1373 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1374   CeedCall(CeedOperatorCheckReady(op));
1375 
1376   if (op->LinearAssembleQFunctionUpdate) {
1377     // Backend version
1378     bool                qf_assembled_is_setup;
1379     CeedVector          assembled_vec  = NULL;
1380     CeedElemRestriction assembled_rstr = NULL;
1381 
1382     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1383     if (qf_assembled_is_setup) {
1384       bool update_needed;
1385 
1386       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1387       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1388       if (update_needed) {
1389         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1390       }
1391     } else {
1392       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1393       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1394     }
1395     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1396 
1397     // Copy reference from internally held copy
1398     *assembled = NULL;
1399     *rstr      = NULL;
1400     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1401     CeedCall(CeedVectorDestroy(&assembled_vec));
1402     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1403     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1404   } else {
1405     // Operator fallback
1406     CeedOperator op_fallback;
1407 
1408     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1409     if (op_fallback) {
1410       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1411     } else {
1412       // LCOV_EXCL_START
1413       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1414       // LCOV_EXCL_STOP
1415     }
1416   }
1417 
1418   return CEED_ERROR_SUCCESS;
1419 }
1420 
1421 /**
1422   @brief Assemble the diagonal of a square linear CeedOperator
1423 
1424   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1425 
1426   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1427 
1428   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1429 
1430   @param[in]  op        CeedOperator to assemble CeedQFunction
1431   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1432   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1433 
1434   @return An error code: 0 - success, otherwise - failure
1435 
1436   @ref User
1437 **/
1438 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1439   bool is_composite;
1440   CeedCall(CeedOperatorCheckReady(op));
1441   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1442 
1443   CeedSize input_size = 0, output_size = 0;
1444   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1445   if (input_size != output_size) {
1446     // LCOV_EXCL_START
1447     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1448     // LCOV_EXCL_STOP
1449   }
1450 
1451   // Early exit for empty operator
1452   if (!is_composite) {
1453     CeedInt num_elem = 0;
1454 
1455     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1456     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1457   }
1458 
1459   if (op->LinearAssembleDiagonal) {
1460     // Backend version
1461     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1462     return CEED_ERROR_SUCCESS;
1463   } else if (op->LinearAssembleAddDiagonal) {
1464     // Backend version with zeroing first
1465     CeedCall(CeedVectorSetValue(assembled, 0.0));
1466     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1467     return CEED_ERROR_SUCCESS;
1468   } else {
1469     // Operator fallback
1470     CeedOperator op_fallback;
1471 
1472     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1473     if (op_fallback) {
1474       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1475       return CEED_ERROR_SUCCESS;
1476     }
1477   }
1478   // Default interface implementation
1479   CeedCall(CeedVectorSetValue(assembled, 0.0));
1480   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1481 
1482   return CEED_ERROR_SUCCESS;
1483 }
1484 
1485 /**
1486   @brief Assemble the diagonal of a square linear CeedOperator
1487 
1488   This sums into a CeedVector the diagonal of a linear CeedOperator.
1489 
1490   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1491 
1492   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1493 
1494   @param[in]  op        CeedOperator to assemble CeedQFunction
1495   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1496   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1497 
1498   @return An error code: 0 - success, otherwise - failure
1499 
1500   @ref User
1501 **/
1502 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1503   bool is_composite;
1504   CeedCall(CeedOperatorCheckReady(op));
1505   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1506 
1507   CeedSize input_size = 0, output_size = 0;
1508   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1509   if (input_size != output_size) {
1510     // LCOV_EXCL_START
1511     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1512     // LCOV_EXCL_STOP
1513   }
1514 
1515   // Early exit for empty operator
1516   if (!is_composite) {
1517     CeedInt num_elem = 0;
1518 
1519     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1520     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1521   }
1522 
1523   if (op->LinearAssembleAddDiagonal) {
1524     // Backend version
1525     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1526     return CEED_ERROR_SUCCESS;
1527   } else {
1528     // Operator fallback
1529     CeedOperator op_fallback;
1530 
1531     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1532     if (op_fallback) {
1533       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1534       return CEED_ERROR_SUCCESS;
1535     }
1536   }
1537   // Default interface implementation
1538   if (is_composite) {
1539     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1540   } else {
1541     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1542   }
1543 
1544   return CEED_ERROR_SUCCESS;
1545 }
1546 
1547 /**
1548   @brief Assemble the point block diagonal of a square linear CeedOperator
1549 
1550   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1551 
1552   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1553 
1554   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1555 
1556   @param[in]  op        CeedOperator to assemble CeedQFunction
1557   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1558 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1559 component in].
1560   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1561 
1562   @return An error code: 0 - success, otherwise - failure
1563 
1564   @ref User
1565 **/
1566 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1567   bool is_composite;
1568   CeedCall(CeedOperatorCheckReady(op));
1569   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1570 
1571   CeedSize input_size = 0, output_size = 0;
1572   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1573   if (input_size != output_size) {
1574     // LCOV_EXCL_START
1575     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1576     // LCOV_EXCL_STOP
1577   }
1578 
1579   // Early exit for empty operator
1580   if (!is_composite) {
1581     CeedInt num_elem = 0;
1582 
1583     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1584     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1585   }
1586 
1587   if (op->LinearAssemblePointBlockDiagonal) {
1588     // Backend version
1589     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1590     return CEED_ERROR_SUCCESS;
1591   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1592     // Backend version with zeroing first
1593     CeedCall(CeedVectorSetValue(assembled, 0.0));
1594     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1595     return CEED_ERROR_SUCCESS;
1596   } else {
1597     // Operator fallback
1598     CeedOperator op_fallback;
1599 
1600     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1601     if (op_fallback) {
1602       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1603       return CEED_ERROR_SUCCESS;
1604     }
1605   }
1606   // Default interface implementation
1607   CeedCall(CeedVectorSetValue(assembled, 0.0));
1608   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1609 
1610   return CEED_ERROR_SUCCESS;
1611 }
1612 
1613 /**
1614   @brief Assemble the point block diagonal of a square linear CeedOperator
1615 
1616   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1617 
1618   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1619 
1620   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1621 
1622   @param[in]  op        CeedOperator to assemble CeedQFunction
1623   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1624 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1625 component in].
1626   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1627 
1628   @return An error code: 0 - success, otherwise - failure
1629 
1630   @ref User
1631 **/
1632 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1633   bool is_composite;
1634   CeedCall(CeedOperatorCheckReady(op));
1635   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1636 
1637   CeedSize input_size = 0, output_size = 0;
1638   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1639   if (input_size != output_size) {
1640     // LCOV_EXCL_START
1641     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1642     // LCOV_EXCL_STOP
1643   }
1644 
1645   // Early exit for empty operator
1646   if (!is_composite) {
1647     CeedInt num_elem = 0;
1648 
1649     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1650     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1651   }
1652 
1653   if (op->LinearAssembleAddPointBlockDiagonal) {
1654     // Backend version
1655     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1656     return CEED_ERROR_SUCCESS;
1657   } else {
1658     // Operator fallback
1659     CeedOperator op_fallback;
1660 
1661     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1662     if (op_fallback) {
1663       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1664       return CEED_ERROR_SUCCESS;
1665     }
1666   }
1667   // Default interface implementation
1668   if (is_composite) {
1669     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1670   } else {
1671     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1672   }
1673 
1674   return CEED_ERROR_SUCCESS;
1675 }
1676 
1677 /**
1678    @brief Fully assemble the nonzero pattern of a linear operator.
1679 
1680    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1681 
1682    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1683 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1684 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1685 
1686    This will generally be slow unless your operator is low-order.
1687 
1688    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1689 
1690    @param[in]  op          CeedOperator to assemble
1691    @param[out] num_entries Number of entries in coordinate nonzero pattern
1692    @param[out] rows        Row number for each entry
1693    @param[out] cols        Column number for each entry
1694 
1695    @ref User
1696 **/
1697 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1698   CeedInt       num_suboperators, single_entries;
1699   CeedOperator *sub_operators;
1700   bool          is_composite;
1701   CeedCall(CeedOperatorCheckReady(op));
1702   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1703 
1704   if (op->LinearAssembleSymbolic) {
1705     // Backend version
1706     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1707     return CEED_ERROR_SUCCESS;
1708   } else {
1709     // Operator fallback
1710     CeedOperator op_fallback;
1711 
1712     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1713     if (op_fallback) {
1714       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1715       return CEED_ERROR_SUCCESS;
1716     }
1717   }
1718 
1719   // Default interface implementation
1720 
1721   // count entries and allocate rows, cols arrays
1722   *num_entries = 0;
1723   if (is_composite) {
1724     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1725     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1726     for (CeedInt k = 0; k < num_suboperators; ++k) {
1727       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1728       *num_entries += single_entries;
1729     }
1730   } else {
1731     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1732     *num_entries += single_entries;
1733   }
1734   CeedCall(CeedCalloc(*num_entries, rows));
1735   CeedCall(CeedCalloc(*num_entries, cols));
1736 
1737   // assemble nonzero locations
1738   CeedInt offset = 0;
1739   if (is_composite) {
1740     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1741     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1742     for (CeedInt k = 0; k < num_suboperators; ++k) {
1743       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1744       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1745       offset += single_entries;
1746     }
1747   } else {
1748     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1749   }
1750 
1751   return CEED_ERROR_SUCCESS;
1752 }
1753 
1754 /**
1755    @brief Fully assemble the nonzero entries of a linear operator.
1756 
1757    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1758 
1759    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1760 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1761 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1762 
1763    This will generally be slow unless your operator is low-order.
1764 
1765    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1766 
1767    @param[in]  op     CeedOperator to assemble
1768    @param[out] values Values to assemble into matrix
1769 
1770    @ref User
1771 **/
1772 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1773   CeedInt       num_suboperators, single_entries = 0;
1774   CeedOperator *sub_operators;
1775   bool          is_composite;
1776   CeedCall(CeedOperatorCheckReady(op));
1777   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1778 
1779   // Early exit for empty operator
1780   if (!is_composite) {
1781     CeedInt num_elem = 0;
1782 
1783     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1784     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1785   }
1786 
1787   if (op->LinearAssemble) {
1788     // Backend version
1789     CeedCall(op->LinearAssemble(op, values));
1790     return CEED_ERROR_SUCCESS;
1791   } else {
1792     // Operator fallback
1793     CeedOperator op_fallback;
1794 
1795     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1796     if (op_fallback) {
1797       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
1798       return CEED_ERROR_SUCCESS;
1799     }
1800   }
1801 
1802   // Default interface implementation
1803   CeedInt offset = 0;
1804   if (is_composite) {
1805     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1806     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1807     for (CeedInt k = 0; k < num_suboperators; k++) {
1808       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
1809       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1810       offset += single_entries;
1811     }
1812   } else {
1813     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
1814   }
1815 
1816   return CEED_ERROR_SUCCESS;
1817 }
1818 
1819 /**
1820   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
1821 
1822   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1823 
1824   @param[in]  op               Composite CeedOperator
1825   @param[in]  num_skip_indices Number of suboperators to skip
1826   @param[in]  skip_indices     Array of indices of suboperators to skip
1827   @param[out] mult             Vector to store multiplicity (of size l_size)
1828 
1829   @return An error code: 0 - success, otherwise - failure
1830 
1831   @ref User
1832 **/
1833 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
1834   CeedCall(CeedOperatorCheckReady(op));
1835 
1836   Ceed                ceed;
1837   CeedInt             num_sub_ops;
1838   CeedSize            l_vec_len;
1839   CeedScalar         *mult_array;
1840   CeedVector          ones_l_vec;
1841   CeedElemRestriction elem_restr;
1842   CeedOperator       *sub_ops;
1843 
1844   CeedCall(CeedOperatorGetCeed(op, &ceed));
1845 
1846   // Zero mult vector
1847   CeedCall(CeedVectorSetValue(mult, 0.0));
1848 
1849   // Get suboperators
1850   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_ops));
1851   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_ops));
1852   if (num_sub_ops == 0) return CEED_ERROR_SUCCESS;
1853 
1854   // Work vector
1855   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
1856   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
1857   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
1858   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
1859 
1860   // Compute multiplicity across suboperators
1861   for (CeedInt i = 0; i < num_sub_ops; i++) {
1862     const CeedScalar *sub_mult_array;
1863     CeedVector        sub_mult_l_vec, ones_e_vec;
1864 
1865     // -- Check for suboperator to skip
1866     for (CeedInt j = 0; j < num_skip_indices; j++) {
1867       if (skip_indices[j] == i) continue;
1868     }
1869 
1870     // -- Sub operator multiplicity
1871     CeedCall(CeedOperatorGetActiveElemRestriction(sub_ops[i], &elem_restr));
1872     CeedCall(CeedElemRestrictionCreateVector(elem_restr, &sub_mult_l_vec, &ones_e_vec));
1873     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
1874     CeedCall(CeedElemRestrictionApply(elem_restr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
1875     CeedCall(CeedElemRestrictionApply(elem_restr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
1876     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
1877     // ---- Flag every node present in the current suboperator
1878     for (CeedInt j = 0; j < l_vec_len; j++) {
1879       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
1880     }
1881     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
1882     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
1883     CeedCall(CeedVectorDestroy(&ones_e_vec));
1884   }
1885   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
1886   CeedCall(CeedVectorDestroy(&ones_l_vec));
1887 
1888   return CEED_ERROR_SUCCESS;
1889 }
1890 
1891 /**
1892   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
1893 grid interpolation
1894 
1895   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1896 
1897   @param[in]  op_fine      Fine grid operator
1898   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
1899   @param[in]  rstr_coarse  Coarse grid restriction
1900   @param[in]  basis_coarse Coarse grid active vector basis
1901   @param[out] op_coarse    Coarse grid operator
1902   @param[out] op_prolong   Coarse to fine operator
1903   @param[out] op_restrict  Fine to coarse operator
1904 
1905   @return An error code: 0 - success, otherwise - failure
1906 
1907   @ref User
1908 **/
1909 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1910                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
1911   CeedCall(CeedOperatorCheckReady(op_fine));
1912 
1913   // Build prolongation matrix
1914   CeedBasis basis_fine, basis_c_to_f;
1915   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1916   CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
1917 
1918   // Core code
1919   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1920 
1921   return CEED_ERROR_SUCCESS;
1922 }
1923 
1924 /**
1925   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
1926 
1927   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1928 
1929   @param[in]  op_fine       Fine grid operator
1930   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
1931   @param[in]  rstr_coarse   Coarse grid restriction
1932   @param[in]  basis_coarse  Coarse grid active vector basis
1933   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
1934   @param[out] op_coarse     Coarse grid operator
1935   @param[out] op_prolong    Coarse to fine operator
1936   @param[out] op_restrict   Fine to coarse operator
1937 
1938   @return An error code: 0 - success, otherwise - failure
1939 
1940   @ref User
1941 **/
1942 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1943                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
1944                                              CeedOperator *op_restrict) {
1945   CeedCall(CeedOperatorCheckReady(op_fine));
1946   Ceed ceed;
1947   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1948 
1949   // Check for compatible quadrature spaces
1950   CeedBasis basis_fine;
1951   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1952   CeedInt Q_f, Q_c;
1953   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
1954   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
1955   if (Q_f != Q_c) {
1956     // LCOV_EXCL_START
1957     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
1958     // LCOV_EXCL_STOP
1959   }
1960 
1961   // Coarse to fine basis
1962   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
1963   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
1964   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
1965   CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
1966   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
1967   P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
1968   CeedScalar *q_ref, *q_weight, *grad;
1969   CeedCall(CeedCalloc(P_1d_f, &q_ref));
1970   CeedCall(CeedCalloc(P_1d_f, &q_weight));
1971   CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
1972   CeedBasis basis_c_to_f;
1973   CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
1974   CeedCall(CeedFree(&q_ref));
1975   CeedCall(CeedFree(&q_weight));
1976   CeedCall(CeedFree(&grad));
1977 
1978   // Core code
1979   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1980   return CEED_ERROR_SUCCESS;
1981 }
1982 
1983 /**
1984   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
1985 
1986   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1987 
1988   @param[in]  op_fine       Fine grid operator
1989   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
1990   @param[in]  rstr_coarse   Coarse grid restriction
1991   @param[in]  basis_coarse  Coarse grid active vector basis
1992   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
1993   @param[out] op_coarse     Coarse grid operator
1994   @param[out] op_prolong    Coarse to fine operator
1995   @param[out] op_restrict   Fine to coarse operator
1996 
1997   @return An error code: 0 - success, otherwise - failure
1998 
1999   @ref User
2000 **/
2001 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2002                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2003                                        CeedOperator *op_restrict) {
2004   CeedCall(CeedOperatorCheckReady(op_fine));
2005   Ceed ceed;
2006   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2007 
2008   // Check for compatible quadrature spaces
2009   CeedBasis basis_fine;
2010   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2011   CeedInt Q_f, Q_c;
2012   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2013   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2014   if (Q_f != Q_c) {
2015     // LCOV_EXCL_START
2016     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2017     // LCOV_EXCL_STOP
2018   }
2019 
2020   // Coarse to fine basis
2021   CeedElemTopology topo;
2022   CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2023   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2024   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2025   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2026   CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2027   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2028   CeedScalar *q_ref, *q_weight, *grad;
2029   CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2030   CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2031   CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2032   CeedBasis basis_c_to_f;
2033   CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2034   CeedCall(CeedFree(&q_ref));
2035   CeedCall(CeedFree(&q_weight));
2036   CeedCall(CeedFree(&grad));
2037 
2038   // Core code
2039   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2040   return CEED_ERROR_SUCCESS;
2041 }
2042 
2043 /**
2044   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2045 
2046   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2047     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V.
2048     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T
2049 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
2050 
2051   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2052 
2053   @param[in]  op      CeedOperator to create element inverses
2054   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2055   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2056 
2057   @return An error code: 0 - success, otherwise - failure
2058 
2059   @ref User
2060 **/
2061 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2062   CeedCall(CeedOperatorCheckReady(op));
2063 
2064   if (op->CreateFDMElementInverse) {
2065     // Backend version
2066     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2067     return CEED_ERROR_SUCCESS;
2068   } else {
2069     // Operator fallback
2070     CeedOperator op_fallback;
2071 
2072     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2073     if (op_fallback) {
2074       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2075       return CEED_ERROR_SUCCESS;
2076     }
2077   }
2078 
2079   // Default interface implementation
2080   Ceed ceed, ceed_parent;
2081   CeedCall(CeedOperatorGetCeed(op, &ceed));
2082   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2083   ceed_parent = ceed_parent ? ceed_parent : ceed;
2084   CeedQFunction qf;
2085   CeedCall(CeedOperatorGetQFunction(op, &qf));
2086 
2087   // Determine active input basis
2088   bool                interp = false, grad = false;
2089   CeedBasis           basis = NULL;
2090   CeedElemRestriction rstr  = NULL;
2091   CeedOperatorField  *op_fields;
2092   CeedQFunctionField *qf_fields;
2093   CeedInt             num_input_fields;
2094   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2095   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2096   for (CeedInt i = 0; i < num_input_fields; i++) {
2097     CeedVector vec;
2098     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2099     if (vec == CEED_VECTOR_ACTIVE) {
2100       CeedEvalMode eval_mode;
2101       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2102       interp = interp || eval_mode == CEED_EVAL_INTERP;
2103       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2104       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2105       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2106     }
2107   }
2108   if (!basis) {
2109     // LCOV_EXCL_START
2110     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2111     // LCOV_EXCL_STOP
2112   }
2113   CeedSize l_size = 1;
2114   CeedInt  P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2115   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2116   CeedCall(CeedBasisGetNumNodes(basis, &elem_size));
2117   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2118   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2119   CeedCall(CeedBasisGetDimension(basis, &dim));
2120   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2121   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2122   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2123 
2124   // Build and diagonalize 1D Mass and Laplacian
2125   bool tensor_basis;
2126   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2127   if (!tensor_basis) {
2128     // LCOV_EXCL_START
2129     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2130     // LCOV_EXCL_STOP
2131   }
2132   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2133   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2134   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2135   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2136   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2137   CeedCall(CeedCalloc(P_1d, &lambda));
2138   // -- Build matrices
2139   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2140   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2141   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2142   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2143   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2144 
2145   // -- Diagonalize
2146   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2147   CeedCall(CeedFree(&mass));
2148   CeedCall(CeedFree(&laplace));
2149   for (CeedInt i = 0; i < P_1d; i++) {
2150     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2151   }
2152   CeedCall(CeedFree(&x));
2153 
2154   // Assemble QFunction
2155   CeedVector          assembled;
2156   CeedElemRestriction rstr_qf;
2157   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2158   CeedInt layout[3];
2159   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2160   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2161   CeedScalar max_norm = 0;
2162   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2163 
2164   // Calculate element averages
2165   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2166   CeedScalar       *elem_avg;
2167   const CeedScalar *assembled_array, *q_weight_array;
2168   CeedVector        q_weight;
2169   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2170   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2171   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2172   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2173   CeedCall(CeedCalloc(num_elem, &elem_avg));
2174   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2175   for (CeedInt e = 0; e < num_elem; e++) {
2176     CeedInt count = 0;
2177     for (CeedInt q = 0; q < num_qpts; q++) {
2178       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2179         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2180           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2181           count++;
2182         }
2183       }
2184     }
2185     if (count) {
2186       elem_avg[e] /= count;
2187     } else {
2188       elem_avg[e] = 1.0;
2189     }
2190   }
2191   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2192   CeedCall(CeedVectorDestroy(&assembled));
2193   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2194   CeedCall(CeedVectorDestroy(&q_weight));
2195 
2196   // Build FDM diagonal
2197   CeedVector  q_data;
2198   CeedScalar *q_data_array, *fdm_diagonal;
2199   CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal));
2200   const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON;
2201   for (CeedInt c = 0; c < num_comp; c++) {
2202     for (CeedInt n = 0; n < elem_size; n++) {
2203       if (interp) fdm_diagonal[c * elem_size + n] = 1.0;
2204       if (grad) {
2205         for (CeedInt d = 0; d < dim; d++) {
2206           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2207           fdm_diagonal[c * elem_size + n] += lambda[i];
2208         }
2209       }
2210       if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound;
2211     }
2212   }
2213   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data));
2214   CeedCall(CeedVectorSetValue(q_data, 0.0));
2215   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2216   for (CeedInt e = 0; e < num_elem; e++) {
2217     for (CeedInt c = 0; c < num_comp; c++) {
2218       for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]);
2219     }
2220   }
2221   CeedCall(CeedFree(&elem_avg));
2222   CeedCall(CeedFree(&fdm_diagonal));
2223   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2224 
2225   // Setup FDM operator
2226   // -- Basis
2227   CeedBasis   fdm_basis;
2228   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2229   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2230   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2231   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2232   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2233   CeedCall(CeedFree(&fdm_interp));
2234   CeedCall(CeedFree(&grad_dummy));
2235   CeedCall(CeedFree(&q_ref_dummy));
2236   CeedCall(CeedFree(&q_weight_dummy));
2237   CeedCall(CeedFree(&lambda));
2238 
2239   // -- Restriction
2240   CeedElemRestriction rstr_qd_i;
2241   CeedInt             strides[3] = {1, elem_size, elem_size * num_comp};
2242   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i));
2243   // -- QFunction
2244   CeedQFunction qf_fdm;
2245   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2246   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2247   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2248   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2249   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2250   // -- QFunction context
2251   CeedInt *num_comp_data;
2252   CeedCall(CeedCalloc(1, &num_comp_data));
2253   num_comp_data[0] = num_comp;
2254   CeedQFunctionContext ctx_fdm;
2255   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2256   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2257   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2258   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2259   // -- Operator
2260   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2261   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2262   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2263   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2264 
2265   // Cleanup
2266   CeedCall(CeedVectorDestroy(&q_data));
2267   CeedCall(CeedBasisDestroy(&fdm_basis));
2268   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2269   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2270 
2271   return CEED_ERROR_SUCCESS;
2272 }
2273 
2274 /// @}
2275