xref: /libCEED/interface/ceed-preconditioning.c (revision b34efc53a325cecbb83f0505800999d3997d23e3)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed/backend.h>
11 #include <ceed/ceed.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_restr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_restr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  eval_mode Current basis evaluation mode
184   @param[in]  identity  Pointer to identity matrix
185   @param[in]  interp    Pointer to interpolation matrix
186   @param[in]  grad      Pointer to gradient matrix
187   @param[out] basis_ptr Basis pointer to set
188 
189   @ref Developer
190 **/
191 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad,
192                                                const CeedScalar **basis_ptr) {
193   switch (eval_mode) {
194     case CEED_EVAL_NONE:
195       *basis_ptr = identity;
196       break;
197     case CEED_EVAL_INTERP:
198       *basis_ptr = interp;
199       break;
200     case CEED_EVAL_GRAD:
201       *basis_ptr = grad;
202       break;
203     case CEED_EVAL_WEIGHT:
204     case CEED_EVAL_DIV:
205     case CEED_EVAL_CURL:
206       break;  // Caught by QF Assembly
207   }
208   assert(*basis_ptr != NULL);
209 }
210 
211 /**
212   @brief Create point block restriction for active operator field
213 
214   @param[in]  rstr            Original CeedElemRestriction for active field
215   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
216 
217   @return An error code: 0 - success, otherwise - failure
218 
219   @ref Developer
220 **/
221 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
222   Ceed ceed;
223   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
224   const CeedInt *offsets;
225   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
226 
227   // Expand offsets
228   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
229   CeedSize l_size;
230   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
231   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
232   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
233   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
234   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
235   CeedInt shift = num_comp;
236   if (comp_stride != 1) shift *= num_comp;
237   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
238   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
239     pointblock_offsets[i] = offsets[i] * shift;
240   }
241 
242   // Create new restriction
243   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
244                                      pointblock_offsets, pointblock_rstr));
245 
246   // Cleanup
247   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
248 
249   return CEED_ERROR_SUCCESS;
250 }
251 
252 /**
253   @brief Core logic for assembling operator diagonal or point block diagonal
254 
255   @param[in]  op            CeedOperator to assemble point block diagonal
256   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
257   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
258   @param[out] assembled     CeedVector to store assembled diagonal
259 
260   @return An error code: 0 - success, otherwise - failure
261 
262   @ref Developer
263 **/
264 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
265   Ceed ceed;
266   CeedCall(CeedOperatorGetCeed(op, &ceed));
267 
268   // Assemble QFunction
269   CeedQFunction qf;
270   CeedCall(CeedOperatorGetQFunction(op, &qf));
271   CeedInt num_input_fields, num_output_fields;
272   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
273   CeedVector          assembled_qf;
274   CeedElemRestriction rstr;
275   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr, request));
276   CeedInt layout[3];
277   CeedCall(CeedElemRestrictionGetELayout(rstr, &layout));
278   CeedCall(CeedElemRestrictionDestroy(&rstr));
279 
280   // Get assembly data
281   CeedOperatorAssemblyData data;
282   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
283   const CeedEvalMode *eval_mode_in, *eval_mode_out;
284   CeedInt             num_eval_mode_in, num_eval_mode_out;
285   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
286   CeedBasis basis_in, basis_out;
287   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
288   CeedInt num_comp;
289   CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
290 
291   // Assemble point block diagonal restriction, if needed
292   CeedElemRestriction diag_rstr;
293   CeedCall(CeedOperatorGetActiveElemRestriction(op, &diag_rstr));
294   if (is_pointblock) {
295     CeedElemRestriction point_block_rstr;
296     CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_rstr, &point_block_rstr));
297     diag_rstr = point_block_rstr;
298   }
299 
300   // Create diagonal vector
301   CeedVector elem_diag;
302   CeedCall(CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag));
303 
304   // Assemble element operator diagonals
305   CeedScalar       *elem_diag_array;
306   const CeedScalar *assembled_qf_array;
307   CeedCall(CeedVectorSetValue(elem_diag, 0.0));
308   CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
309   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
310   CeedInt num_elem, num_nodes, num_qpts;
311   CeedCall(CeedElemRestrictionGetNumElements(diag_rstr, &num_elem));
312   CeedCall(CeedBasisGetNumNodes(basis_in, &num_nodes));
313   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
314 
315   // Basis matrices
316   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
317   CeedScalar       *identity      = NULL;
318   bool              has_eval_none = false;
319   for (CeedInt i = 0; i < num_eval_mode_in; i++) {
320     has_eval_none = has_eval_none || (eval_mode_in[i] == CEED_EVAL_NONE);
321   }
322   for (CeedInt i = 0; i < num_eval_mode_out; i++) {
323     has_eval_none = has_eval_none || (eval_mode_out[i] == CEED_EVAL_NONE);
324   }
325   if (has_eval_none) {
326     CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
327     for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
328   }
329   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
330   CeedCall(CeedBasisGetInterp(basis_out, &interp_out));
331   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
332   CeedCall(CeedBasisGetGrad(basis_out, &grad_out));
333   // Compute the diagonal of B^T D B
334   // Each element
335   for (CeedInt e = 0; e < num_elem; e++) {
336     CeedInt d_out = -1;
337     // Each basis eval mode pair
338     for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
339       const CeedScalar *bt = NULL;
340       if (eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out += 1;
341       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * num_nodes], &bt);
342       CeedInt d_in = -1;
343       for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
344         const CeedScalar *b = NULL;
345         if (eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in += 1;
346         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * num_nodes], &b);
347         // Each component
348         for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
349           // Each qpoint/node pair
350           for (CeedInt q = 0; q < num_qpts; q++) {
351             if (is_pointblock) {
352               // Point Block Diagonal
353               for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
354                 const CeedScalar qf_value =
355                     assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_in) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
356                                        e * layout[2]];
357                 for (CeedInt n = 0; n < num_nodes; n++) {
358                   elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
359                       bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
360                 }
361               }
362             } else {
363               // Diagonal Only
364               const CeedScalar qf_value =
365                   assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_out) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
366                                      e * layout[2]];
367               for (CeedInt n = 0; n < num_nodes; n++) {
368                 elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
369               }
370             }
371           }
372         }
373       }
374     }
375   }
376   CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
377   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
378 
379   // Assemble local operator diagonal
380   CeedCall(CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
381 
382   // Cleanup
383   if (is_pointblock) {
384     CeedCall(CeedElemRestrictionDestroy(&diag_rstr));
385   }
386   CeedCall(CeedVectorDestroy(&assembled_qf));
387   CeedCall(CeedVectorDestroy(&elem_diag));
388   CeedCall(CeedFree(&identity));
389 
390   return CEED_ERROR_SUCCESS;
391 }
392 
393 /**
394   @brief Core logic for assembling composite operator diagonal
395 
396   @param[in]  op            CeedOperator to assemble point block diagonal
397   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
398   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
399   @param[out] assembled     CeedVector to store assembled diagonal
400 
401   @return An error code: 0 - success, otherwise - failure
402 
403   @ref Developer
404 **/
405 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
406                                                                  CeedVector assembled) {
407   CeedInt       num_sub;
408   CeedOperator *suboperators;
409   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
410   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
411   for (CeedInt i = 0; i < num_sub; i++) {
412     if (is_pointblock) {
413       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
414     } else {
415       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
416     }
417   }
418   return CEED_ERROR_SUCCESS;
419 }
420 
421 /**
422   @brief Build nonzero pattern for non-composite operator
423 
424   Users should generally use CeedOperatorLinearAssembleSymbolic()
425 
426   @param[in]  op     CeedOperator to assemble nonzero pattern
427   @param[in]  offset Offset for number of entries
428   @param[out] rows   Row number for each entry
429   @param[out] cols   Column number for each entry
430 
431   @return An error code: 0 - success, otherwise - failure
432 
433   @ref Developer
434 **/
435 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
436   Ceed ceed;
437   bool is_composite;
438   CeedCall(CeedOperatorGetCeed(op, &ceed));
439   CeedCall(CeedOperatorIsComposite(op, &is_composite));
440 
441   if (is_composite) {
442     // LCOV_EXCL_START
443     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
444     // LCOV_EXCL_STOP
445   }
446 
447   CeedSize num_nodes;
448   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
449   CeedElemRestriction rstr_in;
450   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
451   CeedInt num_elem, elem_size, num_comp;
452   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
453   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
454   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
455   CeedInt layout_er[3];
456   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
457 
458   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
459 
460   // Determine elem_dof relation
461   CeedVector index_vec;
462   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
463   CeedScalar *array;
464   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
465   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
466   CeedCall(CeedVectorRestoreArray(index_vec, &array));
467   CeedVector elem_dof;
468   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
469   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
470   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
471   const CeedScalar *elem_dof_a;
472   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
473   CeedCall(CeedVectorDestroy(&index_vec));
474 
475   // Determine i, j locations for element matrices
476   CeedInt count = 0;
477   for (CeedInt e = 0; e < num_elem; e++) {
478     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
479       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
480         for (CeedInt i = 0; i < elem_size; i++) {
481           for (CeedInt j = 0; j < elem_size; j++) {
482             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
483             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
484 
485             const CeedInt row = elem_dof_a[elem_dof_index_row];
486             const CeedInt col = elem_dof_a[elem_dof_index_col];
487 
488             rows[offset + count] = row;
489             cols[offset + count] = col;
490             count++;
491           }
492         }
493       }
494     }
495   }
496   if (count != local_num_entries) {
497     // LCOV_EXCL_START
498     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
499     // LCOV_EXCL_STOP
500   }
501   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
502   CeedCall(CeedVectorDestroy(&elem_dof));
503 
504   return CEED_ERROR_SUCCESS;
505 }
506 
507 /**
508   @brief Assemble nonzero entries for non-composite operator
509 
510   Users should generally use CeedOperatorLinearAssemble()
511 
512   @param[in]  op     CeedOperator to assemble
513   @param[in]  offset Offset for number of entries
514   @param[out] values Values to assemble into matrix
515 
516   @return An error code: 0 - success, otherwise - failure
517 
518   @ref Developer
519 **/
520 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
521   Ceed ceed;
522   bool is_composite;
523   CeedCall(CeedOperatorGetCeed(op, &ceed));
524   CeedCall(CeedOperatorIsComposite(op, &is_composite));
525 
526   if (is_composite) {
527     // LCOV_EXCL_START
528     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
529     // LCOV_EXCL_STOP
530   }
531 
532   // Early exit for empty operator
533   {
534     CeedInt num_elem = 0;
535 
536     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
537     if (num_elem == 0) return CEED_ERROR_SUCCESS;
538   }
539 
540   if (op->LinearAssembleSingle) {
541     // Backend version
542     CeedCall(op->LinearAssembleSingle(op, offset, values));
543     return CEED_ERROR_SUCCESS;
544   } else {
545     // Operator fallback
546     CeedOperator op_fallback;
547 
548     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
549     if (op_fallback) {
550       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
551       return CEED_ERROR_SUCCESS;
552     }
553   }
554 
555   // Assemble QFunction
556   CeedQFunction qf;
557   CeedCall(CeedOperatorGetQFunction(op, &qf));
558   CeedVector          assembled_qf;
559   CeedElemRestriction rstr_q;
560   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
561   CeedSize qf_length;
562   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
563 
564   CeedInt            num_input_fields, num_output_fields;
565   CeedOperatorField *input_fields;
566   CeedOperatorField *output_fields;
567   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
568 
569   // Get assembly data
570   CeedOperatorAssemblyData data;
571   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
572   const CeedEvalMode *eval_mode_in, *eval_mode_out;
573   CeedInt             num_eval_mode_in, num_eval_mode_out;
574   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
575   CeedBasis basis_in, basis_out;
576   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
577 
578   if (num_eval_mode_in == 0 || num_eval_mode_out == 0) {
579     // LCOV_EXCL_START
580     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
581     // LCOV_EXCL_STOP
582   }
583 
584   CeedElemRestriction active_rstr;
585   CeedInt             num_elem, elem_size, num_qpts, num_comp;
586   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
587   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
588   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
589   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
590   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
591 
592   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
593 
594   // loop over elements and put in data structure
595   const CeedScalar *interp_in, *grad_in;
596   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
597   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
598 
599   const CeedScalar *assembled_qf_array;
600   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
601 
602   CeedInt layout_qf[3];
603   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
604   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
605 
606   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
607   const CeedScalar *B_mat_in, *B_mat_out;
608   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &B_mat_in, NULL, &B_mat_out));
609   CeedScalar  BTD_mat[elem_size * num_qpts * num_eval_mode_in];
610   CeedScalar  elem_mat[elem_size * elem_size];
611   CeedInt     count = 0;
612   CeedScalar *vals;
613   CeedCall(CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals));
614   for (CeedInt e = 0; e < num_elem; e++) {
615     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
616       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
617         // Compute B^T*D
618         for (CeedInt n = 0; n < elem_size; n++) {
619           for (CeedInt q = 0; q < num_qpts; q++) {
620             for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
621               const CeedInt btd_index = n * (num_qpts * num_eval_mode_in) + (num_eval_mode_in * q + e_in);
622               CeedScalar    sum       = 0.0;
623               for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
624                 const CeedInt b_out_index     = (num_eval_mode_out * q + e_out) * elem_size + n;
625                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_mode_out + e_out) * num_comp + comp_out;
626                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
627                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
628               }
629               BTD_mat[btd_index] = sum;
630             }
631           }
632         }
633         // form element matrix itself (for each block component)
634         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_mode_in));
635 
636         // put element matrix in coordinate data structure
637         for (CeedInt i = 0; i < elem_size; i++) {
638           for (CeedInt j = 0; j < elem_size; j++) {
639             vals[offset + count] = elem_mat[i * elem_size + j];
640             count++;
641           }
642         }
643       }
644     }
645   }
646   if (count != local_num_entries) {
647     // LCOV_EXCL_START
648     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
649     // LCOV_EXCL_STOP
650   }
651   CeedCall(CeedVectorRestoreArray(values, &vals));
652 
653   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
654   CeedCall(CeedVectorDestroy(&assembled_qf));
655 
656   return CEED_ERROR_SUCCESS;
657 }
658 
659 /**
660   @brief Count number of entries for assembled CeedOperator
661 
662   @param[in]  op          CeedOperator to assemble
663   @param[out] num_entries Number of entries in assembled representation
664 
665   @return An error code: 0 - success, otherwise - failure
666 
667   @ref Utility
668 **/
669 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
670   bool                is_composite;
671   CeedElemRestriction rstr;
672   CeedInt             num_elem, elem_size, num_comp;
673 
674   CeedCall(CeedOperatorIsComposite(op, &is_composite));
675   if (is_composite) {
676     // LCOV_EXCL_START
677     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
678     // LCOV_EXCL_STOP
679   }
680   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
681   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
682   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
683   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
684   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
685 
686   return CEED_ERROR_SUCCESS;
687 }
688 
689 /**
690   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
691 
692   @param[in]  op_fine      Fine grid operator
693   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
694   @param[in]  rstr_coarse  Coarse grid restriction
695   @param[in]  basis_coarse Coarse grid active vector basis
696   @param[in]  basis_c_to_f Basis for coarse to fine interpolation
697   @param[out] op_coarse    Coarse grid operator
698   @param[out] op_prolong   Coarse to fine operator
699   @param[out] op_restrict  Fine to coarse operator
700 
701   @return An error code: 0 - success, otherwise - failure
702 
703   @ref Developer
704 **/
705 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
706                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
707   Ceed ceed;
708   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
709 
710   // Check for composite operator
711   bool is_composite;
712   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
713   if (is_composite) {
714     // LCOV_EXCL_START
715     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
716     // LCOV_EXCL_STOP
717   }
718 
719   // Coarse Grid
720   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
721   CeedElemRestriction rstr_fine = NULL;
722   // -- Clone input fields
723   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
724     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
725       rstr_fine = op_fine->input_fields[i]->elem_restr;
726       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
727     } else {
728       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_restr,
729                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
730     }
731   }
732   // -- Clone output fields
733   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
734     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
735       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
736     } else {
737       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_restr,
738                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
739     }
740   }
741   // -- Clone QFunctionAssemblyData
742   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
743 
744   // Multiplicity vector
745   CeedVector mult_vec, mult_e_vec;
746   CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
747   CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
748   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
749   CeedCall(CeedVectorSetValue(mult_vec, 0.0));
750   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
751   CeedCall(CeedVectorDestroy(&mult_e_vec));
752   CeedCall(CeedVectorReciprocal(mult_vec));
753 
754   // Restriction
755   CeedInt num_comp;
756   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
757   CeedQFunction qf_restrict;
758   CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
759   CeedInt *num_comp_r_data;
760   CeedCall(CeedCalloc(1, &num_comp_r_data));
761   num_comp_r_data[0] = num_comp;
762   CeedQFunctionContext ctx_r;
763   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
764   CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
765   CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
766   CeedCall(CeedQFunctionContextDestroy(&ctx_r));
767   CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
768   CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
769   CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
770   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
771 
772   CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
773   CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
774   CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
775   CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
776 
777   // Prolongation
778   CeedQFunction qf_prolong;
779   CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
780   CeedInt *num_comp_p_data;
781   CeedCall(CeedCalloc(1, &num_comp_p_data));
782   num_comp_p_data[0] = num_comp;
783   CeedQFunctionContext ctx_p;
784   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
785   CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
786   CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
787   CeedCall(CeedQFunctionContextDestroy(&ctx_p));
788   CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
789   CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
790   CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
791   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
792 
793   CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
794   CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
795   CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
796   CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
797 
798   // Clone name
799   bool   has_name = op_fine->name;
800   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
801   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
802   {
803     char *prolongation_name;
804     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
805     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
806     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
807     CeedCall(CeedFree(&prolongation_name));
808   }
809   {
810     char *restriction_name;
811     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
812     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
813     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
814     CeedCall(CeedFree(&restriction_name));
815   }
816 
817   // Check
818   CeedCall(CeedOperatorCheckReady(*op_coarse));
819   CeedCall(CeedOperatorCheckReady(*op_prolong));
820   CeedCall(CeedOperatorCheckReady(*op_restrict));
821 
822   // Cleanup
823   CeedCall(CeedVectorDestroy(&mult_vec));
824   CeedCall(CeedBasisDestroy(&basis_c_to_f));
825   CeedCall(CeedQFunctionDestroy(&qf_restrict));
826   CeedCall(CeedQFunctionDestroy(&qf_prolong));
827 
828   return CEED_ERROR_SUCCESS;
829 }
830 
831 /**
832   @brief Build 1D mass matrix and Laplacian with perturbation
833 
834   @param[in]  interp_1d   Interpolation matrix in one dimension
835   @param[in]  grad_1d     Gradient matrix in one dimension
836   @param[in]  q_weight_1d Quadrature weights in one dimension
837   @param[in]  P_1d        Number of basis nodes in one dimension
838   @param[in]  Q_1d        Number of quadrature points in one dimension
839   @param[in]  dim         Dimension of basis
840   @param[out] mass        Assembled mass matrix in one dimension
841   @param[out] laplace     Assembled perturbed Laplacian in one dimension
842 
843   @return An error code: 0 - success, otherwise - failure
844 
845   @ref Developer
846 **/
847 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d,
848                                                       CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
849   for (CeedInt i = 0; i < P_1d; i++) {
850     for (CeedInt j = 0; j < P_1d; j++) {
851       CeedScalar sum = 0.0;
852       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
853       mass[i + j * P_1d] = sum;
854     }
855   }
856   // -- Laplacian
857   for (CeedInt i = 0; i < P_1d; i++) {
858     for (CeedInt j = 0; j < P_1d; j++) {
859       CeedScalar sum = 0.0;
860       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
861       laplace[i + j * P_1d] = sum;
862     }
863   }
864   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
865   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
866   return CEED_ERROR_SUCCESS;
867 }
868 CeedPragmaOptimizeOn;
869 
870 /// @}
871 
872 /// ----------------------------------------------------------------------------
873 /// CeedOperator Backend API
874 /// ----------------------------------------------------------------------------
875 /// @addtogroup CeedOperatorBackend
876 /// @{
877 
878 /**
879   @brief Create object holding CeedQFunction assembly data for CeedOperator
880 
881   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
882   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
883 
884   @return An error code: 0 - success, otherwise - failure
885 
886   @ref Backend
887 **/
888 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
889   CeedCall(CeedCalloc(1, data));
890   (*data)->ref_count = 1;
891   (*data)->ceed      = ceed;
892   CeedCall(CeedReference(ceed));
893 
894   return CEED_ERROR_SUCCESS;
895 }
896 
897 /**
898   @brief Increment the reference counter for a CeedQFunctionAssemblyData
899 
900   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
901 
902   @return An error code: 0 - success, otherwise - failure
903 
904   @ref Backend
905 **/
906 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
907   data->ref_count++;
908   return CEED_ERROR_SUCCESS;
909 }
910 
911 /**
912   @brief Set re-use of CeedQFunctionAssemblyData
913 
914   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
915   @param[in]     reuse_data Boolean flag indicating data re-use
916 
917   @return An error code: 0 - success, otherwise - failure
918 
919   @ref Backend
920 **/
921 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
922   data->reuse_data        = reuse_data;
923   data->needs_data_update = true;
924   return CEED_ERROR_SUCCESS;
925 }
926 
927 /**
928   @brief Mark QFunctionAssemblyData as stale
929 
930   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
931   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
932 
933   @return An error code: 0 - success, otherwise - failure
934 
935   @ref Backend
936 **/
937 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
938   data->needs_data_update = needs_data_update;
939   return CEED_ERROR_SUCCESS;
940 }
941 
942 /**
943   @brief Determine if QFunctionAssemblyData needs update
944 
945   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
946   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
947 
948   @return An error code: 0 - success, otherwise - failure
949 
950   @ref Backend
951 **/
952 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
953   *is_update_needed = !data->reuse_data || data->needs_data_update;
954   return CEED_ERROR_SUCCESS;
955 }
956 
957 /**
958   @brief Copy the pointer to a CeedQFunctionAssemblyData.
959            Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
960            Note: If `*data_copy` is non-NULL, then it is assumed that `*data_copy` is a pointer to a CeedQFunctionAssemblyData.
961              This CeedQFunctionAssemblyData will be destroyed if `*data_copy` is the only reference to this CeedQFunctionAssemblyData.
962 
963   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
964   @param[in,out] data_copy Variable to store copied reference
965 
966   @return An error code: 0 - success, otherwise - failure
967 
968   @ref Backend
969 **/
970 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
971   CeedCall(CeedQFunctionAssemblyDataReference(data));
972   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
973   *data_copy = data;
974   return CEED_ERROR_SUCCESS;
975 }
976 
977 /**
978   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
979 
980   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
981   @param[out] is_setup Boolean flag for setup status
982 
983   @return An error code: 0 - success, otherwise - failure
984 
985   @ref Backend
986 **/
987 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
988   *is_setup = data->is_setup;
989   return CEED_ERROR_SUCCESS;
990 }
991 
992 /**
993   @brief Set internal objects for CeedQFunctionAssemblyData
994 
995   @param[in,out] data CeedQFunctionAssemblyData to set objects
996   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
997   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
998 
999   @return An error code: 0 - success, otherwise - failure
1000 
1001   @ref Backend
1002 **/
1003 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1004   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1005   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1006 
1007   data->is_setup = true;
1008   return CEED_ERROR_SUCCESS;
1009 }
1010 
1011 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1012   if (!data->is_setup) {
1013     // LCOV_EXCL_START
1014     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1015     // LCOV_EXCL_STOP
1016   }
1017 
1018   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1019   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1020 
1021   return CEED_ERROR_SUCCESS;
1022 }
1023 
1024 /**
1025   @brief Destroy CeedQFunctionAssemblyData
1026 
1027   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1028 
1029   @return An error code: 0 - success, otherwise - failure
1030 
1031   @ref Backend
1032 **/
1033 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1034   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1035 
1036   CeedCall(CeedDestroy(&(*data)->ceed));
1037   CeedCall(CeedVectorDestroy(&(*data)->vec));
1038   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1039 
1040   CeedCall(CeedFree(data));
1041   return CEED_ERROR_SUCCESS;
1042 }
1043 
1044 /**
1045   @brief Get CeedOperatorAssemblyData
1046 
1047   @param[in]  op   CeedOperator to assemble
1048   @param[out] data CeedQFunctionAssemblyData
1049 
1050   @return An error code: 0 - success, otherwise - failure
1051 
1052   @ref Backend
1053 **/
1054 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1055   if (!op->op_assembled) {
1056     CeedOperatorAssemblyData data;
1057 
1058     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1059     op->op_assembled = data;
1060   }
1061   *data = op->op_assembled;
1062 
1063   return CEED_ERROR_SUCCESS;
1064 }
1065 
1066 /**
1067   @brief Create object holding CeedOperator assembly data
1068 
1069   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1070   @param[in]  op   CeedOperator to be assembled
1071   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1072 
1073   @return An error code: 0 - success, otherwise - failure
1074 
1075   @ref Backend
1076 **/
1077 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1078   CeedCall(CeedCalloc(1, data));
1079   (*data)->ceed = ceed;
1080   CeedCall(CeedReference(ceed));
1081 
1082   // Build OperatorAssembly data
1083   CeedQFunction       qf;
1084   CeedQFunctionField *qf_fields;
1085   CeedOperatorField  *op_fields;
1086   CeedInt             num_input_fields;
1087   CeedCall(CeedOperatorGetQFunction(op, &qf));
1088   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1089   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1090 
1091   // Determine active input basis
1092   CeedInt       num_eval_mode_in = 0, dim = 1;
1093   CeedEvalMode *eval_mode_in = NULL;
1094   CeedBasis     basis_in     = NULL;
1095   for (CeedInt i = 0; i < num_input_fields; i++) {
1096     CeedVector vec;
1097     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1098     if (vec == CEED_VECTOR_ACTIVE) {
1099       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1100       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1101       CeedEvalMode eval_mode;
1102       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1103       switch (eval_mode) {
1104         case CEED_EVAL_NONE:
1105         case CEED_EVAL_INTERP:
1106           CeedCall(CeedRealloc(num_eval_mode_in + 1, &eval_mode_in));
1107           eval_mode_in[num_eval_mode_in] = eval_mode;
1108           num_eval_mode_in += 1;
1109           break;
1110         case CEED_EVAL_GRAD:
1111           CeedCall(CeedRealloc(num_eval_mode_in + dim, &eval_mode_in));
1112           for (CeedInt d = 0; d < dim; d++) {
1113             eval_mode_in[num_eval_mode_in + d] = eval_mode;
1114           }
1115           num_eval_mode_in += dim;
1116           break;
1117         case CEED_EVAL_WEIGHT:
1118         case CEED_EVAL_DIV:
1119         case CEED_EVAL_CURL:
1120           break;  // Caught by QF Assembly
1121       }
1122     }
1123   }
1124   (*data)->num_eval_mode_in = num_eval_mode_in;
1125   (*data)->eval_mode_in     = eval_mode_in;
1126   CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->basis_in));
1127 
1128   // Determine active output basis
1129   CeedInt num_output_fields;
1130   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1131   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1132   CeedInt       num_eval_mode_out = 0;
1133   CeedEvalMode *eval_mode_out     = NULL;
1134   CeedBasis     basis_out         = NULL;
1135   for (CeedInt i = 0; i < num_output_fields; i++) {
1136     CeedVector vec;
1137     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1138     if (vec == CEED_VECTOR_ACTIVE) {
1139       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1140       CeedEvalMode eval_mode;
1141       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1142       switch (eval_mode) {
1143         case CEED_EVAL_NONE:
1144         case CEED_EVAL_INTERP:
1145           CeedCall(CeedRealloc(num_eval_mode_out + 1, &eval_mode_out));
1146           eval_mode_out[num_eval_mode_out] = eval_mode;
1147           num_eval_mode_out += 1;
1148           break;
1149         case CEED_EVAL_GRAD:
1150           CeedCall(CeedRealloc(num_eval_mode_out + dim, &eval_mode_out));
1151           for (CeedInt d = 0; d < dim; d++) {
1152             eval_mode_out[num_eval_mode_out + d] = eval_mode;
1153           }
1154           num_eval_mode_out += dim;
1155           break;
1156         case CEED_EVAL_WEIGHT:
1157         case CEED_EVAL_DIV:
1158         case CEED_EVAL_CURL:
1159           break;  // Caught by QF Assembly
1160       }
1161     }
1162   }
1163   (*data)->num_eval_mode_out = num_eval_mode_out;
1164   (*data)->eval_mode_out     = eval_mode_out;
1165   CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->basis_out));
1166 
1167   return CEED_ERROR_SUCCESS;
1168 }
1169 
1170 /**
1171   @brief Get CeedOperator CeedEvalModes for assembly
1172 
1173   @param[in]  data              CeedOperatorAssemblyData
1174   @param[out] num_eval_mode_in  Pointer to hold number of input CeedEvalModes, or NULL
1175   @param[out] eval_mode_in      Pointer to hold input CeedEvalModes, or NULL
1176   @param[out] num_eval_mode_out Pointer to hold number of output CeedEvalModes, or NULL
1177   @param[out] eval_mode_out     Pointer to hold output CeedEvalModes, or NULL
1178 
1179   @return An error code: 0 - success, otherwise - failure
1180 
1181   @ref Backend
1182 **/
1183 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_eval_mode_in, const CeedEvalMode **eval_mode_in,
1184                                          CeedInt *num_eval_mode_out, const CeedEvalMode **eval_mode_out) {
1185   if (num_eval_mode_in) *num_eval_mode_in = data->num_eval_mode_in;
1186   if (eval_mode_in) *eval_mode_in = data->eval_mode_in;
1187   if (num_eval_mode_out) *num_eval_mode_out = data->num_eval_mode_out;
1188   if (eval_mode_out) *eval_mode_out = data->eval_mode_out;
1189 
1190   return CEED_ERROR_SUCCESS;
1191 }
1192 
1193 /**
1194   @brief Get CeedOperator CeedBasis data for assembly
1195 
1196   @param[in]  data      CeedOperatorAssemblyData
1197   @param[out] basis_in  Pointer to hold active input CeedBasis, or NULL
1198   @param[out] B_in      Pointer to hold assembled active input B, or NULL
1199   @param[out] basis_out Pointer to hold active output CeedBasis, or NULL
1200   @param[out] B_out     Pointer to hold assembled active output B, or NULL
1201 
1202   @return An error code: 0 - success, otherwise - failure
1203 
1204   @ref Backend
1205 **/
1206 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedBasis *basis_in, const CeedScalar **B_in, CeedBasis *basis_out,
1207                                      const CeedScalar **B_out) {
1208   // Assemble B_in, B_out if needed
1209   if (B_in && !data->B_in) {
1210     CeedInt           num_qpts, elem_size;
1211     CeedScalar       *B_in, *identity = NULL;
1212     const CeedScalar *interp_in, *grad_in;
1213     bool              has_eval_none = false;
1214 
1215     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_in, &num_qpts));
1216     CeedCall(CeedBasisGetNumNodes(data->basis_in, &elem_size));
1217     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_in, &B_in));
1218 
1219     for (CeedInt i = 0; i < data->num_eval_mode_in; i++) {
1220       has_eval_none = has_eval_none || (data->eval_mode_in[i] == CEED_EVAL_NONE);
1221     }
1222     if (has_eval_none) {
1223       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1224       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1225         identity[i * elem_size + i] = 1.0;
1226       }
1227     }
1228     CeedCall(CeedBasisGetInterp(data->basis_in, &interp_in));
1229     CeedCall(CeedBasisGetGrad(data->basis_in, &grad_in));
1230 
1231     for (CeedInt q = 0; q < num_qpts; q++) {
1232       for (CeedInt n = 0; n < elem_size; n++) {
1233         CeedInt d_in = -1;
1234         for (CeedInt e_in = 0; e_in < data->num_eval_mode_in; e_in++) {
1235           const CeedInt     qq = data->num_eval_mode_in * q;
1236           const CeedScalar *b  = NULL;
1237 
1238           if (data->eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in++;
1239           CeedOperatorGetBasisPointer(data->eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &b);
1240           B_in[(qq + e_in) * elem_size + n] = b[q * elem_size + n];
1241         }
1242       }
1243     }
1244     data->B_in = B_in;
1245   }
1246 
1247   if (B_out && !data->B_out) {
1248     CeedInt           num_qpts, elem_size;
1249     CeedScalar       *B_out, *identity = NULL;
1250     const CeedScalar *interp_out, *grad_out;
1251     bool              has_eval_none = false;
1252 
1253     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_out, &num_qpts));
1254     CeedCall(CeedBasisGetNumNodes(data->basis_out, &elem_size));
1255     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_out, &B_out));
1256 
1257     for (CeedInt i = 0; i < data->num_eval_mode_out; i++) {
1258       has_eval_none = has_eval_none || (data->eval_mode_out[i] == CEED_EVAL_NONE);
1259     }
1260     if (has_eval_none) {
1261       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1262       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1263         identity[i * elem_size + i] = 1.0;
1264       }
1265     }
1266     CeedCall(CeedBasisGetInterp(data->basis_out, &interp_out));
1267     CeedCall(CeedBasisGetGrad(data->basis_out, &grad_out));
1268 
1269     for (CeedInt q = 0; q < num_qpts; q++) {
1270       for (CeedInt n = 0; n < elem_size; n++) {
1271         CeedInt d_out = -1;
1272         for (CeedInt e_out = 0; e_out < data->num_eval_mode_out; e_out++) {
1273           const CeedInt     qq = data->num_eval_mode_out * q;
1274           const CeedScalar *b  = NULL;
1275 
1276           if (data->eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out++;
1277           CeedOperatorGetBasisPointer(data->eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &b);
1278           B_out[(qq + e_out) * elem_size + n] = b[q * elem_size + n];
1279         }
1280       }
1281     }
1282     data->B_out = B_out;
1283   }
1284 
1285   if (basis_in) *basis_in = data->basis_in;
1286   if (B_in) *B_in = data->B_in;
1287   if (basis_out) *basis_out = data->basis_out;
1288   if (B_out) *B_out = data->B_out;
1289 
1290   return CEED_ERROR_SUCCESS;
1291 }
1292 
1293 /**
1294   @brief Destroy CeedOperatorAssemblyData
1295 
1296   @param[in,out] data CeedOperatorAssemblyData to destroy
1297 
1298   @return An error code: 0 - success, otherwise - failure
1299 
1300   @ref Backend
1301 **/
1302 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1303   if (!*data) return CEED_ERROR_SUCCESS;
1304 
1305   CeedCall(CeedDestroy(&(*data)->ceed));
1306   CeedCall(CeedBasisDestroy(&(*data)->basis_in));
1307   CeedCall(CeedBasisDestroy(&(*data)->basis_out));
1308   CeedCall(CeedFree(&(*data)->eval_mode_in));
1309   CeedCall(CeedFree(&(*data)->eval_mode_out));
1310   CeedCall(CeedFree(&(*data)->B_in));
1311   CeedCall(CeedFree(&(*data)->B_out));
1312 
1313   CeedCall(CeedFree(data));
1314   return CEED_ERROR_SUCCESS;
1315 }
1316 
1317 /// @}
1318 
1319 /// ----------------------------------------------------------------------------
1320 /// CeedOperator Public API
1321 /// ----------------------------------------------------------------------------
1322 /// @addtogroup CeedOperatorUser
1323 /// @{
1324 
1325 /**
1326   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1327 
1328   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1329     The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices
1330 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the
1331 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1332 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1333 and producing the output [dv_0, dv_1, v].
1334 
1335   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1336 
1337   @param[in]  op        CeedOperator to assemble CeedQFunction
1338   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1339   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1340   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1341 
1342   @return An error code: 0 - success, otherwise - failure
1343 
1344   @ref User
1345 **/
1346 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1347   CeedCall(CeedOperatorCheckReady(op));
1348 
1349   if (op->LinearAssembleQFunction) {
1350     // Backend version
1351     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1352   } else {
1353     // Operator fallback
1354     CeedOperator op_fallback;
1355 
1356     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1357     if (op_fallback) {
1358       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1359     } else {
1360       // LCOV_EXCL_START
1361       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1362       // LCOV_EXCL_STOP
1363     }
1364   }
1365   return CEED_ERROR_SUCCESS;
1366 }
1367 
1368 /**
1369   @brief Assemble CeedQFunction and store result internally.
1370            Return copied references of stored data to the caller.
1371            Caller is responsible for ownership and destruction of the copied references.
1372            See also @ref CeedOperatorLinearAssembleQFunction
1373 
1374   @param[in]  op        CeedOperator to assemble CeedQFunction
1375   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1376   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1377   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1378 
1379   @return An error code: 0 - success, otherwise - failure
1380 
1381   @ref User
1382 **/
1383 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1384   CeedCall(CeedOperatorCheckReady(op));
1385 
1386   if (op->LinearAssembleQFunctionUpdate) {
1387     // Backend version
1388     bool                qf_assembled_is_setup;
1389     CeedVector          assembled_vec  = NULL;
1390     CeedElemRestriction assembled_rstr = NULL;
1391 
1392     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1393     if (qf_assembled_is_setup) {
1394       bool update_needed;
1395 
1396       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1397       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1398       if (update_needed) {
1399         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1400       }
1401     } else {
1402       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1403       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1404     }
1405     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1406 
1407     // Copy reference from internally held copy
1408     *assembled = NULL;
1409     *rstr      = NULL;
1410     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1411     CeedCall(CeedVectorDestroy(&assembled_vec));
1412     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1413     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1414   } else {
1415     // Operator fallback
1416     CeedOperator op_fallback;
1417 
1418     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1419     if (op_fallback) {
1420       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1421     } else {
1422       // LCOV_EXCL_START
1423       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1424       // LCOV_EXCL_STOP
1425     }
1426   }
1427 
1428   return CEED_ERROR_SUCCESS;
1429 }
1430 
1431 /**
1432   @brief Assemble the diagonal of a square linear CeedOperator
1433 
1434   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1435 
1436   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1437 
1438   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1439 
1440   @param[in]  op        CeedOperator to assemble CeedQFunction
1441   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1442   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1443 
1444   @return An error code: 0 - success, otherwise - failure
1445 
1446   @ref User
1447 **/
1448 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1449   bool is_composite;
1450   CeedCall(CeedOperatorCheckReady(op));
1451   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1452 
1453   CeedSize input_size = 0, output_size = 0;
1454   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1455   if (input_size != output_size) {
1456     // LCOV_EXCL_START
1457     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1458     // LCOV_EXCL_STOP
1459   }
1460 
1461   // Early exit for empty operator
1462   if (!is_composite) {
1463     CeedInt num_elem = 0;
1464 
1465     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1466     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1467   }
1468 
1469   if (op->LinearAssembleDiagonal) {
1470     // Backend version
1471     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1472     return CEED_ERROR_SUCCESS;
1473   } else if (op->LinearAssembleAddDiagonal) {
1474     // Backend version with zeroing first
1475     CeedCall(CeedVectorSetValue(assembled, 0.0));
1476     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1477     return CEED_ERROR_SUCCESS;
1478   } else {
1479     // Operator fallback
1480     CeedOperator op_fallback;
1481 
1482     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1483     if (op_fallback) {
1484       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1485       return CEED_ERROR_SUCCESS;
1486     }
1487   }
1488   // Default interface implementation
1489   CeedCall(CeedVectorSetValue(assembled, 0.0));
1490   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1491 
1492   return CEED_ERROR_SUCCESS;
1493 }
1494 
1495 /**
1496   @brief Assemble the diagonal of a square linear CeedOperator
1497 
1498   This sums into a CeedVector the diagonal of a linear CeedOperator.
1499 
1500   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1501 
1502   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1503 
1504   @param[in]  op        CeedOperator to assemble CeedQFunction
1505   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1506   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1507 
1508   @return An error code: 0 - success, otherwise - failure
1509 
1510   @ref User
1511 **/
1512 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1513   bool is_composite;
1514   CeedCall(CeedOperatorCheckReady(op));
1515   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1516 
1517   CeedSize input_size = 0, output_size = 0;
1518   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1519   if (input_size != output_size) {
1520     // LCOV_EXCL_START
1521     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1522     // LCOV_EXCL_STOP
1523   }
1524 
1525   // Early exit for empty operator
1526   if (!is_composite) {
1527     CeedInt num_elem = 0;
1528 
1529     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1530     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1531   }
1532 
1533   if (op->LinearAssembleAddDiagonal) {
1534     // Backend version
1535     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1536     return CEED_ERROR_SUCCESS;
1537   } else {
1538     // Operator fallback
1539     CeedOperator op_fallback;
1540 
1541     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1542     if (op_fallback) {
1543       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1544       return CEED_ERROR_SUCCESS;
1545     }
1546   }
1547   // Default interface implementation
1548   if (is_composite) {
1549     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1550   } else {
1551     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1552   }
1553 
1554   return CEED_ERROR_SUCCESS;
1555 }
1556 
1557 /**
1558   @brief Assemble the point block diagonal of a square linear CeedOperator
1559 
1560   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1561 
1562   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1563 
1564   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1565 
1566   @param[in]  op        CeedOperator to assemble CeedQFunction
1567   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1568 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1569 component in].
1570   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1571 
1572   @return An error code: 0 - success, otherwise - failure
1573 
1574   @ref User
1575 **/
1576 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1577   bool is_composite;
1578   CeedCall(CeedOperatorCheckReady(op));
1579   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1580 
1581   CeedSize input_size = 0, output_size = 0;
1582   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1583   if (input_size != output_size) {
1584     // LCOV_EXCL_START
1585     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1586     // LCOV_EXCL_STOP
1587   }
1588 
1589   // Early exit for empty operator
1590   if (!is_composite) {
1591     CeedInt num_elem = 0;
1592 
1593     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1594     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1595   }
1596 
1597   if (op->LinearAssemblePointBlockDiagonal) {
1598     // Backend version
1599     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1600     return CEED_ERROR_SUCCESS;
1601   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1602     // Backend version with zeroing first
1603     CeedCall(CeedVectorSetValue(assembled, 0.0));
1604     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1605     return CEED_ERROR_SUCCESS;
1606   } else {
1607     // Operator fallback
1608     CeedOperator op_fallback;
1609 
1610     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1611     if (op_fallback) {
1612       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1613       return CEED_ERROR_SUCCESS;
1614     }
1615   }
1616   // Default interface implementation
1617   CeedCall(CeedVectorSetValue(assembled, 0.0));
1618   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1619 
1620   return CEED_ERROR_SUCCESS;
1621 }
1622 
1623 /**
1624   @brief Assemble the point block diagonal of a square linear CeedOperator
1625 
1626   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1627 
1628   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1629 
1630   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1631 
1632   @param[in]  op        CeedOperator to assemble CeedQFunction
1633   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1634 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1635 component in].
1636   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1637 
1638   @return An error code: 0 - success, otherwise - failure
1639 
1640   @ref User
1641 **/
1642 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1643   bool is_composite;
1644   CeedCall(CeedOperatorCheckReady(op));
1645   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1646 
1647   CeedSize input_size = 0, output_size = 0;
1648   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1649   if (input_size != output_size) {
1650     // LCOV_EXCL_START
1651     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1652     // LCOV_EXCL_STOP
1653   }
1654 
1655   // Early exit for empty operator
1656   if (!is_composite) {
1657     CeedInt num_elem = 0;
1658 
1659     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1660     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1661   }
1662 
1663   if (op->LinearAssembleAddPointBlockDiagonal) {
1664     // Backend version
1665     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1666     return CEED_ERROR_SUCCESS;
1667   } else {
1668     // Operator fallback
1669     CeedOperator op_fallback;
1670 
1671     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1672     if (op_fallback) {
1673       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1674       return CEED_ERROR_SUCCESS;
1675     }
1676   }
1677   // Default interface implementation
1678   if (is_composite) {
1679     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1680   } else {
1681     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1682   }
1683 
1684   return CEED_ERROR_SUCCESS;
1685 }
1686 
1687 /**
1688    @brief Fully assemble the nonzero pattern of a linear operator.
1689 
1690    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1691 
1692    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1693 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1694 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1695 
1696    This will generally be slow unless your operator is low-order.
1697 
1698    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1699 
1700    @param[in]  op          CeedOperator to assemble
1701    @param[out] num_entries Number of entries in coordinate nonzero pattern
1702    @param[out] rows        Row number for each entry
1703    @param[out] cols        Column number for each entry
1704 
1705    @ref User
1706 **/
1707 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1708   CeedInt       num_suboperators, single_entries;
1709   CeedOperator *sub_operators;
1710   bool          is_composite;
1711   CeedCall(CeedOperatorCheckReady(op));
1712   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1713 
1714   if (op->LinearAssembleSymbolic) {
1715     // Backend version
1716     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1717     return CEED_ERROR_SUCCESS;
1718   } else {
1719     // Operator fallback
1720     CeedOperator op_fallback;
1721 
1722     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1723     if (op_fallback) {
1724       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1725       return CEED_ERROR_SUCCESS;
1726     }
1727   }
1728 
1729   // Default interface implementation
1730 
1731   // count entries and allocate rows, cols arrays
1732   *num_entries = 0;
1733   if (is_composite) {
1734     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1735     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1736     for (CeedInt k = 0; k < num_suboperators; ++k) {
1737       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1738       *num_entries += single_entries;
1739     }
1740   } else {
1741     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1742     *num_entries += single_entries;
1743   }
1744   CeedCall(CeedCalloc(*num_entries, rows));
1745   CeedCall(CeedCalloc(*num_entries, cols));
1746 
1747   // assemble nonzero locations
1748   CeedInt offset = 0;
1749   if (is_composite) {
1750     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1751     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1752     for (CeedInt k = 0; k < num_suboperators; ++k) {
1753       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1754       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1755       offset += single_entries;
1756     }
1757   } else {
1758     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1759   }
1760 
1761   return CEED_ERROR_SUCCESS;
1762 }
1763 
1764 /**
1765    @brief Fully assemble the nonzero entries of a linear operator.
1766 
1767    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1768 
1769    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1770 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1771 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1772 
1773    This will generally be slow unless your operator is low-order.
1774 
1775    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1776 
1777    @param[in]  op     CeedOperator to assemble
1778    @param[out] values Values to assemble into matrix
1779 
1780    @ref User
1781 **/
1782 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1783   CeedInt       num_suboperators, single_entries = 0;
1784   CeedOperator *sub_operators;
1785   bool          is_composite;
1786   CeedCall(CeedOperatorCheckReady(op));
1787   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1788 
1789   // Early exit for empty operator
1790   if (!is_composite) {
1791     CeedInt num_elem = 0;
1792 
1793     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1794     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1795   }
1796 
1797   if (op->LinearAssemble) {
1798     // Backend version
1799     CeedCall(op->LinearAssemble(op, values));
1800     return CEED_ERROR_SUCCESS;
1801   } else {
1802     // Operator fallback
1803     CeedOperator op_fallback;
1804 
1805     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1806     if (op_fallback) {
1807       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
1808       return CEED_ERROR_SUCCESS;
1809     }
1810   }
1811 
1812   // Default interface implementation
1813   CeedInt offset = 0;
1814   if (is_composite) {
1815     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1816     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1817     for (CeedInt k = 0; k < num_suboperators; k++) {
1818       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
1819       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1820       offset += single_entries;
1821     }
1822   } else {
1823     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
1824   }
1825 
1826   return CEED_ERROR_SUCCESS;
1827 }
1828 
1829 /**
1830   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
1831 
1832   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1833 
1834   @param[in]  op               Composite CeedOperator
1835   @param[in]  num_skip_indices Number of suboperators to skip
1836   @param[in]  skip_indices     Array of indices of suboperators to skip
1837   @param[out] mult             Vector to store multiplicity (of size l_size)
1838 
1839   @return An error code: 0 - success, otherwise - failure
1840 
1841   @ref User
1842 **/
1843 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
1844   CeedCall(CeedOperatorCheckReady(op));
1845 
1846   Ceed                ceed;
1847   CeedInt             num_suboperators;
1848   CeedSize            l_vec_len;
1849   CeedScalar         *mult_array;
1850   CeedVector          ones_l_vec;
1851   CeedElemRestriction elem_restr;
1852   CeedOperator       *sub_operators;
1853 
1854   CeedCall(CeedOperatorGetCeed(op, &ceed));
1855 
1856   // Zero mult vector
1857   CeedCall(CeedVectorSetValue(mult, 0.0));
1858 
1859   // Get suboperators
1860   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1861   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1862   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
1863 
1864   // Work vector
1865   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
1866   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
1867   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
1868   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
1869 
1870   // Compute multiplicity across suboperators
1871   for (CeedInt i = 0; i < num_suboperators; i++) {
1872     const CeedScalar *sub_mult_array;
1873     CeedVector        sub_mult_l_vec, ones_e_vec;
1874 
1875     // -- Check for suboperator to skip
1876     for (CeedInt j = 0; j < num_skip_indices; j++) {
1877       if (skip_indices[j] == i) continue;
1878     }
1879 
1880     // -- Sub operator multiplicity
1881     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_restr));
1882     CeedCall(CeedElemRestrictionCreateVector(elem_restr, &sub_mult_l_vec, &ones_e_vec));
1883     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
1884     CeedCall(CeedElemRestrictionApply(elem_restr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
1885     CeedCall(CeedElemRestrictionApply(elem_restr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
1886     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
1887     // ---- Flag every node present in the current suboperator
1888     for (CeedInt j = 0; j < l_vec_len; j++) {
1889       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
1890     }
1891     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
1892     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
1893     CeedCall(CeedVectorDestroy(&ones_e_vec));
1894   }
1895   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
1896   CeedCall(CeedVectorDestroy(&ones_l_vec));
1897 
1898   return CEED_ERROR_SUCCESS;
1899 }
1900 
1901 /**
1902   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
1903 grid interpolation
1904 
1905   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1906 
1907   @param[in]  op_fine      Fine grid operator
1908   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
1909   @param[in]  rstr_coarse  Coarse grid restriction
1910   @param[in]  basis_coarse Coarse grid active vector basis
1911   @param[out] op_coarse    Coarse grid operator
1912   @param[out] op_prolong   Coarse to fine operator
1913   @param[out] op_restrict  Fine to coarse operator
1914 
1915   @return An error code: 0 - success, otherwise - failure
1916 
1917   @ref User
1918 **/
1919 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1920                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
1921   CeedCall(CeedOperatorCheckReady(op_fine));
1922 
1923   // Build prolongation matrix
1924   CeedBasis basis_fine, basis_c_to_f;
1925   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1926   CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
1927 
1928   // Core code
1929   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1930 
1931   return CEED_ERROR_SUCCESS;
1932 }
1933 
1934 /**
1935   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
1936 
1937   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1938 
1939   @param[in]  op_fine       Fine grid operator
1940   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
1941   @param[in]  rstr_coarse   Coarse grid restriction
1942   @param[in]  basis_coarse  Coarse grid active vector basis
1943   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
1944   @param[out] op_coarse     Coarse grid operator
1945   @param[out] op_prolong    Coarse to fine operator
1946   @param[out] op_restrict   Fine to coarse operator
1947 
1948   @return An error code: 0 - success, otherwise - failure
1949 
1950   @ref User
1951 **/
1952 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1953                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
1954                                              CeedOperator *op_restrict) {
1955   CeedCall(CeedOperatorCheckReady(op_fine));
1956   Ceed ceed;
1957   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1958 
1959   // Check for compatible quadrature spaces
1960   CeedBasis basis_fine;
1961   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1962   CeedInt Q_f, Q_c;
1963   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
1964   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
1965   if (Q_f != Q_c) {
1966     // LCOV_EXCL_START
1967     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
1968     // LCOV_EXCL_STOP
1969   }
1970 
1971   // Coarse to fine basis
1972   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
1973   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
1974   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
1975   CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
1976   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
1977   P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
1978   CeedScalar *q_ref, *q_weight, *grad;
1979   CeedCall(CeedCalloc(P_1d_f, &q_ref));
1980   CeedCall(CeedCalloc(P_1d_f, &q_weight));
1981   CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
1982   CeedBasis basis_c_to_f;
1983   CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
1984   CeedCall(CeedFree(&q_ref));
1985   CeedCall(CeedFree(&q_weight));
1986   CeedCall(CeedFree(&grad));
1987 
1988   // Core code
1989   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1990   return CEED_ERROR_SUCCESS;
1991 }
1992 
1993 /**
1994   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
1995 
1996   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1997 
1998   @param[in]  op_fine       Fine grid operator
1999   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
2000   @param[in]  rstr_coarse   Coarse grid restriction
2001   @param[in]  basis_coarse  Coarse grid active vector basis
2002   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
2003   @param[out] op_coarse     Coarse grid operator
2004   @param[out] op_prolong    Coarse to fine operator
2005   @param[out] op_restrict   Fine to coarse operator
2006 
2007   @return An error code: 0 - success, otherwise - failure
2008 
2009   @ref User
2010 **/
2011 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2012                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2013                                        CeedOperator *op_restrict) {
2014   CeedCall(CeedOperatorCheckReady(op_fine));
2015   Ceed ceed;
2016   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2017 
2018   // Check for compatible quadrature spaces
2019   CeedBasis basis_fine;
2020   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2021   CeedInt Q_f, Q_c;
2022   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2023   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2024   if (Q_f != Q_c) {
2025     // LCOV_EXCL_START
2026     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2027     // LCOV_EXCL_STOP
2028   }
2029 
2030   // Coarse to fine basis
2031   CeedElemTopology topo;
2032   CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2033   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2034   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2035   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2036   CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2037   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2038   CeedScalar *q_ref, *q_weight, *grad;
2039   CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2040   CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2041   CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2042   CeedBasis basis_c_to_f;
2043   CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2044   CeedCall(CeedFree(&q_ref));
2045   CeedCall(CeedFree(&q_weight));
2046   CeedCall(CeedFree(&grad));
2047 
2048   // Core code
2049   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2050   return CEED_ERROR_SUCCESS;
2051 }
2052 
2053 /**
2054   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2055 
2056   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2057     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V.
2058     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T
2059 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
2060 
2061   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2062 
2063   @param[in]  op      CeedOperator to create element inverses
2064   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2065   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2066 
2067   @return An error code: 0 - success, otherwise - failure
2068 
2069   @ref User
2070 **/
2071 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2072   CeedCall(CeedOperatorCheckReady(op));
2073 
2074   if (op->CreateFDMElementInverse) {
2075     // Backend version
2076     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2077     return CEED_ERROR_SUCCESS;
2078   } else {
2079     // Operator fallback
2080     CeedOperator op_fallback;
2081 
2082     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2083     if (op_fallback) {
2084       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2085       return CEED_ERROR_SUCCESS;
2086     }
2087   }
2088 
2089   // Default interface implementation
2090   Ceed ceed, ceed_parent;
2091   CeedCall(CeedOperatorGetCeed(op, &ceed));
2092   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2093   ceed_parent = ceed_parent ? ceed_parent : ceed;
2094   CeedQFunction qf;
2095   CeedCall(CeedOperatorGetQFunction(op, &qf));
2096 
2097   // Determine active input basis
2098   bool                interp = false, grad = false;
2099   CeedBasis           basis = NULL;
2100   CeedElemRestriction rstr  = NULL;
2101   CeedOperatorField  *op_fields;
2102   CeedQFunctionField *qf_fields;
2103   CeedInt             num_input_fields;
2104   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2105   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2106   for (CeedInt i = 0; i < num_input_fields; i++) {
2107     CeedVector vec;
2108     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2109     if (vec == CEED_VECTOR_ACTIVE) {
2110       CeedEvalMode eval_mode;
2111       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2112       interp = interp || eval_mode == CEED_EVAL_INTERP;
2113       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2114       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2115       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2116     }
2117   }
2118   if (!basis) {
2119     // LCOV_EXCL_START
2120     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2121     // LCOV_EXCL_STOP
2122   }
2123   CeedSize l_size = 1;
2124   CeedInt  P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2125   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2126   CeedCall(CeedBasisGetNumNodes(basis, &elem_size));
2127   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2128   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2129   CeedCall(CeedBasisGetDimension(basis, &dim));
2130   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2131   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2132   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2133 
2134   // Build and diagonalize 1D Mass and Laplacian
2135   bool tensor_basis;
2136   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2137   if (!tensor_basis) {
2138     // LCOV_EXCL_START
2139     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2140     // LCOV_EXCL_STOP
2141   }
2142   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2143   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2144   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2145   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2146   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2147   CeedCall(CeedCalloc(P_1d, &lambda));
2148   // -- Build matrices
2149   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2150   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2151   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2152   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2153   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2154 
2155   // -- Diagonalize
2156   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2157   CeedCall(CeedFree(&mass));
2158   CeedCall(CeedFree(&laplace));
2159   for (CeedInt i = 0; i < P_1d; i++) {
2160     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2161   }
2162   CeedCall(CeedFree(&x));
2163 
2164   // Assemble QFunction
2165   CeedVector          assembled;
2166   CeedElemRestriction rstr_qf;
2167   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2168   CeedInt layout[3];
2169   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2170   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2171   CeedScalar max_norm = 0;
2172   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2173 
2174   // Calculate element averages
2175   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2176   CeedScalar       *elem_avg;
2177   const CeedScalar *assembled_array, *q_weight_array;
2178   CeedVector        q_weight;
2179   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2180   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2181   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2182   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2183   CeedCall(CeedCalloc(num_elem, &elem_avg));
2184   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2185   for (CeedInt e = 0; e < num_elem; e++) {
2186     CeedInt count = 0;
2187     for (CeedInt q = 0; q < num_qpts; q++) {
2188       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2189         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2190           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2191           count++;
2192         }
2193       }
2194     }
2195     if (count) {
2196       elem_avg[e] /= count;
2197     } else {
2198       elem_avg[e] = 1.0;
2199     }
2200   }
2201   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2202   CeedCall(CeedVectorDestroy(&assembled));
2203   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2204   CeedCall(CeedVectorDestroy(&q_weight));
2205 
2206   // Build FDM diagonal
2207   CeedVector  q_data;
2208   CeedScalar *q_data_array, *fdm_diagonal;
2209   CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal));
2210   const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON;
2211   for (CeedInt c = 0; c < num_comp; c++) {
2212     for (CeedInt n = 0; n < elem_size; n++) {
2213       if (interp) fdm_diagonal[c * elem_size + n] = 1.0;
2214       if (grad) {
2215         for (CeedInt d = 0; d < dim; d++) {
2216           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2217           fdm_diagonal[c * elem_size + n] += lambda[i];
2218         }
2219       }
2220       if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound;
2221     }
2222   }
2223   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data));
2224   CeedCall(CeedVectorSetValue(q_data, 0.0));
2225   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2226   for (CeedInt e = 0; e < num_elem; e++) {
2227     for (CeedInt c = 0; c < num_comp; c++) {
2228       for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]);
2229     }
2230   }
2231   CeedCall(CeedFree(&elem_avg));
2232   CeedCall(CeedFree(&fdm_diagonal));
2233   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2234 
2235   // Setup FDM operator
2236   // -- Basis
2237   CeedBasis   fdm_basis;
2238   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2239   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2240   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2241   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2242   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2243   CeedCall(CeedFree(&fdm_interp));
2244   CeedCall(CeedFree(&grad_dummy));
2245   CeedCall(CeedFree(&q_ref_dummy));
2246   CeedCall(CeedFree(&q_weight_dummy));
2247   CeedCall(CeedFree(&lambda));
2248 
2249   // -- Restriction
2250   CeedElemRestriction rstr_qd_i;
2251   CeedInt             strides[3] = {1, elem_size, elem_size * num_comp};
2252   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i));
2253   // -- QFunction
2254   CeedQFunction qf_fdm;
2255   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2256   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2257   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2258   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2259   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2260   // -- QFunction context
2261   CeedInt *num_comp_data;
2262   CeedCall(CeedCalloc(1, &num_comp_data));
2263   num_comp_data[0] = num_comp;
2264   CeedQFunctionContext ctx_fdm;
2265   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2266   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2267   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2268   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2269   // -- Operator
2270   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2271   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2272   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2273   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2274 
2275   // Cleanup
2276   CeedCall(CeedVectorDestroy(&q_data));
2277   CeedCall(CeedBasisDestroy(&fdm_basis));
2278   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2279   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2280 
2281   return CEED_ERROR_SUCCESS;
2282 }
2283 
2284 /// @}
2285