xref: /libCEED/interface/ceed-preconditioning.c (revision 85bb9dcf10976ed3d4575e9e1435e62dcd7cbb06)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed/backend.h>
11 #include <ceed/ceed.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_restr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_restr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  eval_mode Current basis evaluation mode
184   @param[in]  identity  Pointer to identity matrix
185   @param[in]  interp    Pointer to interpolation matrix
186   @param[in]  grad      Pointer to gradient matrix
187   @param[out] basis_ptr Basis pointer to set
188 
189   @ref Developer
190 **/
191 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad,
192                                                const CeedScalar **basis_ptr) {
193   switch (eval_mode) {
194     case CEED_EVAL_NONE:
195       *basis_ptr = identity;
196       break;
197     case CEED_EVAL_INTERP:
198       *basis_ptr = interp;
199       break;
200     case CEED_EVAL_GRAD:
201       *basis_ptr = grad;
202       break;
203     case CEED_EVAL_WEIGHT:
204     case CEED_EVAL_DIV:
205     case CEED_EVAL_CURL:
206       break;  // Caught by QF Assembly
207   }
208   assert(*basis_ptr != NULL);
209 }
210 
211 /**
212   @brief Create point block restriction for active operator field
213 
214   @param[in]  rstr            Original CeedElemRestriction for active field
215   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
216 
217   @return An error code: 0 - success, otherwise - failure
218 
219   @ref Developer
220 **/
221 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
222   Ceed ceed;
223   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
224   const CeedInt *offsets;
225   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
226 
227   // Expand offsets
228   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
229   CeedSize l_size;
230   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
231   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
232   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
233   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
234   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
235   CeedInt shift = num_comp;
236   if (comp_stride != 1) shift *= num_comp;
237   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
238   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
239     pointblock_offsets[i] = offsets[i] * shift;
240   }
241 
242   // Create new restriction
243   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
244                                      pointblock_offsets, pointblock_rstr));
245 
246   // Cleanup
247   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
248 
249   return CEED_ERROR_SUCCESS;
250 }
251 
252 /**
253   @brief Core logic for assembling operator diagonal or point block diagonal
254 
255   @param[in]  op            CeedOperator to assemble point block diagonal
256   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
257   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
258   @param[out] assembled     CeedVector to store assembled diagonal
259 
260   @return An error code: 0 - success, otherwise - failure
261 
262   @ref Developer
263 **/
264 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
265   Ceed ceed;
266   CeedCall(CeedOperatorGetCeed(op, &ceed));
267 
268   // Assemble QFunction
269   CeedQFunction qf;
270   CeedCall(CeedOperatorGetQFunction(op, &qf));
271   CeedInt num_input_fields, num_output_fields;
272   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
273   CeedVector          assembled_qf;
274   CeedElemRestriction rstr;
275   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr, request));
276   CeedInt layout[3];
277   CeedCall(CeedElemRestrictionGetELayout(rstr, &layout));
278   CeedCall(CeedElemRestrictionDestroy(&rstr));
279 
280   // Get assembly data
281   CeedOperatorAssemblyData data;
282   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
283   const CeedEvalMode *eval_mode_in, *eval_mode_out;
284   CeedInt             num_eval_mode_in, num_eval_mode_out;
285   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
286   CeedBasis basis_in, basis_out;
287   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
288   CeedInt num_comp;
289   CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
290 
291   // Assemble point block diagonal restriction, if needed
292   CeedElemRestriction diag_rstr;
293   CeedCall(CeedOperatorGetActiveElemRestriction(op, &diag_rstr));
294   if (is_pointblock) {
295     CeedElemRestriction point_block_rstr;
296     CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_rstr, &point_block_rstr));
297     diag_rstr = point_block_rstr;
298   }
299 
300   // Create diagonal vector
301   CeedVector elem_diag;
302   CeedCall(CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag));
303 
304   // Assemble element operator diagonals
305   CeedScalar       *elem_diag_array;
306   const CeedScalar *assembled_qf_array;
307   CeedCall(CeedVectorSetValue(elem_diag, 0.0));
308   CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
309   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
310   CeedInt num_elem, num_nodes, num_qpts;
311   CeedCall(CeedElemRestrictionGetNumElements(diag_rstr, &num_elem));
312   CeedCall(CeedBasisGetNumNodes(basis_in, &num_nodes));
313   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
314 
315   // Basis matrices
316   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
317   CeedScalar       *identity      = NULL;
318   bool              has_eval_none = false;
319   for (CeedInt i = 0; i < num_eval_mode_in; i++) {
320     has_eval_none = has_eval_none || (eval_mode_in[i] == CEED_EVAL_NONE);
321   }
322   for (CeedInt i = 0; i < num_eval_mode_out; i++) {
323     has_eval_none = has_eval_none || (eval_mode_out[i] == CEED_EVAL_NONE);
324   }
325   if (has_eval_none) {
326     CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
327     for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
328   }
329   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
330   CeedCall(CeedBasisGetInterp(basis_out, &interp_out));
331   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
332   CeedCall(CeedBasisGetGrad(basis_out, &grad_out));
333   // Compute the diagonal of B^T D B
334   // Each element
335   for (CeedInt e = 0; e < num_elem; e++) {
336     CeedInt d_out = -1;
337     // Each basis eval mode pair
338     for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
339       const CeedScalar *bt = NULL;
340       if (eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out += 1;
341       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * num_nodes], &bt);
342       CeedInt d_in = -1;
343       for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
344         const CeedScalar *b = NULL;
345         if (eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in += 1;
346         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * num_nodes], &b);
347         // Each component
348         for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
349           // Each qpoint/node pair
350           for (CeedInt q = 0; q < num_qpts; q++) {
351             if (is_pointblock) {
352               // Point Block Diagonal
353               for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
354                 const CeedScalar qf_value =
355                     assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_in) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
356                                        e * layout[2]];
357                 for (CeedInt n = 0; n < num_nodes; n++) {
358                   elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
359                       bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
360                 }
361               }
362             } else {
363               // Diagonal Only
364               const CeedScalar qf_value =
365                   assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_out) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
366                                      e * layout[2]];
367               for (CeedInt n = 0; n < num_nodes; n++) {
368                 elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
369               }
370             }
371           }
372         }
373       }
374     }
375   }
376   CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
377   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
378 
379   // Assemble local operator diagonal
380   CeedCall(CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
381 
382   // Cleanup
383   if (is_pointblock) {
384     CeedCall(CeedElemRestrictionDestroy(&diag_rstr));
385   }
386   CeedCall(CeedVectorDestroy(&assembled_qf));
387   CeedCall(CeedVectorDestroy(&elem_diag));
388   CeedCall(CeedFree(&identity));
389 
390   return CEED_ERROR_SUCCESS;
391 }
392 
393 /**
394   @brief Core logic for assembling composite operator diagonal
395 
396   @param[in]  op            CeedOperator to assemble point block diagonal
397   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
398   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
399   @param[out] assembled     CeedVector to store assembled diagonal
400 
401   @return An error code: 0 - success, otherwise - failure
402 
403   @ref Developer
404 **/
405 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
406                                                                  CeedVector assembled) {
407   CeedInt       num_sub;
408   CeedOperator *suboperators;
409   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
410   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
411   for (CeedInt i = 0; i < num_sub; i++) {
412     if (is_pointblock) {
413       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
414     } else {
415       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
416     }
417   }
418   return CEED_ERROR_SUCCESS;
419 }
420 
421 /**
422   @brief Build nonzero pattern for non-composite operator
423 
424   Users should generally use CeedOperatorLinearAssembleSymbolic()
425 
426   @param[in]  op     CeedOperator to assemble nonzero pattern
427   @param[in]  offset Offset for number of entries
428   @param[out] rows   Row number for each entry
429   @param[out] cols   Column number for each entry
430 
431   @return An error code: 0 - success, otherwise - failure
432 
433   @ref Developer
434 **/
435 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
436   Ceed ceed;
437   bool is_composite;
438   CeedCall(CeedOperatorGetCeed(op, &ceed));
439   CeedCall(CeedOperatorIsComposite(op, &is_composite));
440 
441   if (is_composite) {
442     // LCOV_EXCL_START
443     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
444     // LCOV_EXCL_STOP
445   }
446 
447   CeedSize num_nodes;
448   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
449   CeedElemRestriction rstr_in;
450   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
451   CeedInt num_elem, elem_size, num_comp;
452   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
453   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
454   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
455   CeedInt layout_er[3];
456   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
457 
458   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
459 
460   // Determine elem_dof relation
461   CeedVector index_vec;
462   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
463   CeedScalar *array;
464   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
465   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
466   CeedCall(CeedVectorRestoreArray(index_vec, &array));
467   CeedVector elem_dof;
468   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
469   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
470   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
471   const CeedScalar *elem_dof_a;
472   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
473   CeedCall(CeedVectorDestroy(&index_vec));
474 
475   // Determine i, j locations for element matrices
476   CeedInt count = 0;
477   for (CeedInt e = 0; e < num_elem; e++) {
478     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
479       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
480         for (CeedInt i = 0; i < elem_size; i++) {
481           for (CeedInt j = 0; j < elem_size; j++) {
482             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
483             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
484 
485             const CeedInt row = elem_dof_a[elem_dof_index_row];
486             const CeedInt col = elem_dof_a[elem_dof_index_col];
487 
488             rows[offset + count] = row;
489             cols[offset + count] = col;
490             count++;
491           }
492         }
493       }
494     }
495   }
496   if (count != local_num_entries) {
497     // LCOV_EXCL_START
498     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
499     // LCOV_EXCL_STOP
500   }
501   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
502   CeedCall(CeedVectorDestroy(&elem_dof));
503 
504   return CEED_ERROR_SUCCESS;
505 }
506 
507 /**
508   @brief Assemble nonzero entries for non-composite operator
509 
510   Users should generally use CeedOperatorLinearAssemble()
511 
512   @param[in]  op     CeedOperator to assemble
513   @param[in]  offset Offset for number of entries
514   @param[out] values Values to assemble into matrix
515 
516   @return An error code: 0 - success, otherwise - failure
517 
518   @ref Developer
519 **/
520 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
521   Ceed ceed;
522   bool is_composite;
523   CeedCall(CeedOperatorGetCeed(op, &ceed));
524   CeedCall(CeedOperatorIsComposite(op, &is_composite));
525 
526   if (is_composite) {
527     // LCOV_EXCL_START
528     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
529     // LCOV_EXCL_STOP
530   }
531 
532   // Early exit for empty operator
533   {
534     CeedInt num_elem = 0;
535 
536     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
537     if (num_elem == 0) return CEED_ERROR_SUCCESS;
538   }
539 
540   if (op->LinearAssembleSingle) {
541     // Backend version
542     CeedCall(op->LinearAssembleSingle(op, offset, values));
543     return CEED_ERROR_SUCCESS;
544   } else {
545     // Operator fallback
546     CeedOperator op_fallback;
547 
548     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
549     if (op_fallback) {
550       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
551       return CEED_ERROR_SUCCESS;
552     }
553   }
554 
555   // Assemble QFunction
556   CeedQFunction qf;
557   CeedCall(CeedOperatorGetQFunction(op, &qf));
558   CeedVector          assembled_qf;
559   CeedElemRestriction rstr_q;
560   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
561   CeedSize qf_length;
562   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
563 
564   CeedInt            num_input_fields, num_output_fields;
565   CeedOperatorField *input_fields;
566   CeedOperatorField *output_fields;
567   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
568 
569   // Get assembly data
570   CeedOperatorAssemblyData data;
571   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
572   const CeedEvalMode *eval_mode_in, *eval_mode_out;
573   CeedInt             num_eval_mode_in, num_eval_mode_out;
574   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
575   CeedBasis basis_in, basis_out;
576   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
577 
578   if (num_eval_mode_in == 0 || num_eval_mode_out == 0) {
579     // LCOV_EXCL_START
580     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
581     // LCOV_EXCL_STOP
582   }
583 
584   CeedElemRestriction active_rstr;
585   CeedInt             num_elem, elem_size, num_qpts, num_comp;
586   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
587   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
588   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
589   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
590   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
591 
592   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
593 
594   // loop over elements and put in data structure
595   const CeedScalar *interp_in, *grad_in;
596   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
597   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
598 
599   const CeedScalar *assembled_qf_array;
600   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
601 
602   CeedInt layout_qf[3];
603   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
604   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
605 
606   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
607   const CeedScalar *B_mat_in, *B_mat_out;
608   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &B_mat_in, NULL, &B_mat_out));
609   CeedScalar  BTD_mat[elem_size * num_qpts * num_eval_mode_in];
610   CeedScalar  elem_mat[elem_size * elem_size];
611   CeedInt     count = 0;
612   CeedScalar *vals;
613   CeedCall(CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals));
614   for (CeedInt e = 0; e < num_elem; e++) {
615     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
616       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
617         // Compute B^T*D
618         for (CeedInt n = 0; n < elem_size; n++) {
619           for (CeedInt q = 0; q < num_qpts; q++) {
620             for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
621               const CeedInt btd_index = n * (num_qpts * num_eval_mode_in) + (num_eval_mode_in * q + e_in);
622               CeedScalar    sum       = 0.0;
623               for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
624                 const CeedInt b_out_index     = (num_eval_mode_out * q + e_out) * elem_size + n;
625                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_mode_out + e_out) * num_comp + comp_out;
626                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
627                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
628               }
629               BTD_mat[btd_index] = sum;
630             }
631           }
632         }
633         // form element matrix itself (for each block component)
634         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_mode_in));
635 
636         // put element matrix in coordinate data structure
637         for (CeedInt i = 0; i < elem_size; i++) {
638           for (CeedInt j = 0; j < elem_size; j++) {
639             vals[offset + count] = elem_mat[i * elem_size + j];
640             count++;
641           }
642         }
643       }
644     }
645   }
646   if (count != local_num_entries) {
647     // LCOV_EXCL_START
648     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
649     // LCOV_EXCL_STOP
650   }
651   CeedCall(CeedVectorRestoreArray(values, &vals));
652 
653   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
654   CeedCall(CeedVectorDestroy(&assembled_qf));
655 
656   return CEED_ERROR_SUCCESS;
657 }
658 
659 /**
660   @brief Count number of entries for assembled CeedOperator
661 
662   @param[in]  op          CeedOperator to assemble
663   @param[out] num_entries Number of entries in assembled representation
664 
665   @return An error code: 0 - success, otherwise - failure
666 
667   @ref Utility
668 **/
669 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
670   bool                is_composite;
671   CeedElemRestriction rstr;
672   CeedInt             num_elem, elem_size, num_comp;
673 
674   CeedCall(CeedOperatorIsComposite(op, &is_composite));
675   if (is_composite) {
676     // LCOV_EXCL_START
677     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
678     // LCOV_EXCL_STOP
679   }
680   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
681   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
682   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
683   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
684   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
685 
686   return CEED_ERROR_SUCCESS;
687 }
688 
689 /**
690   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
691 
692   @param[in]  op_fine      Fine grid operator
693   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
694   @param[in]  rstr_coarse  Coarse grid restriction
695   @param[in]  basis_coarse Coarse grid active vector basis
696   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
697   @param[out] op_coarse    Coarse grid operator
698   @param[out] op_prolong   Coarse to fine operator, or NULL
699   @param[out] op_restrict  Fine to coarse operator, or NULL
700 
701   @return An error code: 0 - success, otherwise - failure
702 
703   @ref Developer
704 **/
705 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
706                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
707   Ceed       ceed;
708   CeedVector mult_vec = NULL;
709   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
710 
711   // Check for composite operator
712   bool is_composite;
713   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
714   if (is_composite) {
715     // LCOV_EXCL_START
716     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
717     // LCOV_EXCL_STOP
718   }
719 
720   // Coarse Grid
721   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
722   CeedElemRestriction rstr_fine = NULL;
723   // -- Clone input fields
724   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
725     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
726       rstr_fine = op_fine->input_fields[i]->elem_restr;
727       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
728     } else {
729       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_restr,
730                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
731     }
732   }
733   // -- Clone output fields
734   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
735     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
736       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
737     } else {
738       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_restr,
739                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
740     }
741   }
742   // -- Clone QFunctionAssemblyData
743   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
744 
745   // Multiplicity vector
746   if (op_restrict || op_prolong) {
747     CeedVector mult_e_vec;
748 
749     if (!p_mult_fine) {
750       // LCOV_EXCL_START
751       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
752       // LCOV_EXCL_STOP
753     }
754     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
755     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
756     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
757     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
758     CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
759     CeedCall(CeedVectorDestroy(&mult_e_vec));
760     CeedCall(CeedVectorReciprocal(mult_vec));
761   }
762 
763   // Clone name
764   bool   has_name = op_fine->name;
765   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
766   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
767 
768   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
769   if ((op_restrict || op_prolong) && !basis_c_to_f) {
770     // LCOV_EXCL_START
771     return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis");
772     // LCOV_EXCL_STOP
773   }
774 
775   // Restriction/Prolongation Operators
776   CeedInt num_comp;
777   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
778 
779   // Restriction
780   if (op_restrict) {
781     CeedInt             *num_comp_r_data;
782     CeedQFunction        qf_restrict;
783     CeedQFunctionContext ctx_r;
784 
785     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
786     CeedCall(CeedCalloc(1, &num_comp_r_data));
787     num_comp_r_data[0] = num_comp;
788     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
789     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
790     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
791     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
792     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
793     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
794     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
795     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
796 
797     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
798     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
799     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
800     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
801 
802     // Set name
803     char *restriction_name;
804     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
805     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
806     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
807     CeedCall(CeedFree(&restriction_name));
808 
809     // Check
810     CeedCall(CeedOperatorCheckReady(*op_restrict));
811 
812     // Cleanup
813     CeedCall(CeedQFunctionDestroy(&qf_restrict));
814   }
815 
816   // Prolongation
817   if (op_prolong) {
818     CeedInt             *num_comp_p_data;
819     CeedQFunction        qf_prolong;
820     CeedQFunctionContext ctx_p;
821 
822     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
823     CeedCall(CeedCalloc(1, &num_comp_p_data));
824     num_comp_p_data[0] = num_comp;
825     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
826     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
827     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
828     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
829     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
830     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
831     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
832     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
833 
834     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
835     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
836     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
837     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
838 
839     // Set name
840     char *prolongation_name;
841     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
842     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
843     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
844     CeedCall(CeedFree(&prolongation_name));
845 
846     // Check
847     CeedCall(CeedOperatorCheckReady(*op_prolong));
848 
849     // Cleanup
850     CeedCall(CeedQFunctionDestroy(&qf_prolong));
851   }
852 
853   // Check
854   CeedCall(CeedOperatorCheckReady(*op_coarse));
855 
856   // Cleanup
857   CeedCall(CeedVectorDestroy(&mult_vec));
858   CeedCall(CeedBasisDestroy(&basis_c_to_f));
859 
860   return CEED_ERROR_SUCCESS;
861 }
862 
863 /**
864   @brief Build 1D mass matrix and Laplacian with perturbation
865 
866   @param[in]  interp_1d   Interpolation matrix in one dimension
867   @param[in]  grad_1d     Gradient matrix in one dimension
868   @param[in]  q_weight_1d Quadrature weights in one dimension
869   @param[in]  P_1d        Number of basis nodes in one dimension
870   @param[in]  Q_1d        Number of quadrature points in one dimension
871   @param[in]  dim         Dimension of basis
872   @param[out] mass        Assembled mass matrix in one dimension
873   @param[out] laplace     Assembled perturbed Laplacian in one dimension
874 
875   @return An error code: 0 - success, otherwise - failure
876 
877   @ref Developer
878 **/
879 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d,
880                                                       CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
881   for (CeedInt i = 0; i < P_1d; i++) {
882     for (CeedInt j = 0; j < P_1d; j++) {
883       CeedScalar sum = 0.0;
884       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
885       mass[i + j * P_1d] = sum;
886     }
887   }
888   // -- Laplacian
889   for (CeedInt i = 0; i < P_1d; i++) {
890     for (CeedInt j = 0; j < P_1d; j++) {
891       CeedScalar sum = 0.0;
892       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
893       laplace[i + j * P_1d] = sum;
894     }
895   }
896   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
897   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
898   return CEED_ERROR_SUCCESS;
899 }
900 CeedPragmaOptimizeOn;
901 
902 /// @}
903 
904 /// ----------------------------------------------------------------------------
905 /// CeedOperator Backend API
906 /// ----------------------------------------------------------------------------
907 /// @addtogroup CeedOperatorBackend
908 /// @{
909 
910 /**
911   @brief Create object holding CeedQFunction assembly data for CeedOperator
912 
913   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
914   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
915 
916   @return An error code: 0 - success, otherwise - failure
917 
918   @ref Backend
919 **/
920 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
921   CeedCall(CeedCalloc(1, data));
922   (*data)->ref_count = 1;
923   (*data)->ceed      = ceed;
924   CeedCall(CeedReference(ceed));
925 
926   return CEED_ERROR_SUCCESS;
927 }
928 
929 /**
930   @brief Increment the reference counter for a CeedQFunctionAssemblyData
931 
932   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
933 
934   @return An error code: 0 - success, otherwise - failure
935 
936   @ref Backend
937 **/
938 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
939   data->ref_count++;
940   return CEED_ERROR_SUCCESS;
941 }
942 
943 /**
944   @brief Set re-use of CeedQFunctionAssemblyData
945 
946   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
947   @param[in]     reuse_data Boolean flag indicating data re-use
948 
949   @return An error code: 0 - success, otherwise - failure
950 
951   @ref Backend
952 **/
953 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
954   data->reuse_data        = reuse_data;
955   data->needs_data_update = true;
956   return CEED_ERROR_SUCCESS;
957 }
958 
959 /**
960   @brief Mark QFunctionAssemblyData as stale
961 
962   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
963   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
964 
965   @return An error code: 0 - success, otherwise - failure
966 
967   @ref Backend
968 **/
969 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
970   data->needs_data_update = needs_data_update;
971   return CEED_ERROR_SUCCESS;
972 }
973 
974 /**
975   @brief Determine if QFunctionAssemblyData needs update
976 
977   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
978   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
979 
980   @return An error code: 0 - success, otherwise - failure
981 
982   @ref Backend
983 **/
984 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
985   *is_update_needed = !data->reuse_data || data->needs_data_update;
986   return CEED_ERROR_SUCCESS;
987 }
988 
989 /**
990   @brief Copy the pointer to a CeedQFunctionAssemblyData.
991            Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
992            Note: If `*data_copy` is non-NULL, then it is assumed that `*data_copy` is a pointer to a CeedQFunctionAssemblyData.
993              This CeedQFunctionAssemblyData will be destroyed if `*data_copy` is the only reference to this CeedQFunctionAssemblyData.
994 
995   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
996   @param[in,out] data_copy Variable to store copied reference
997 
998   @return An error code: 0 - success, otherwise - failure
999 
1000   @ref Backend
1001 **/
1002 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1003   CeedCall(CeedQFunctionAssemblyDataReference(data));
1004   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1005   *data_copy = data;
1006   return CEED_ERROR_SUCCESS;
1007 }
1008 
1009 /**
1010   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1011 
1012   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1013   @param[out] is_setup Boolean flag for setup status
1014 
1015   @return An error code: 0 - success, otherwise - failure
1016 
1017   @ref Backend
1018 **/
1019 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1020   *is_setup = data->is_setup;
1021   return CEED_ERROR_SUCCESS;
1022 }
1023 
1024 /**
1025   @brief Set internal objects for CeedQFunctionAssemblyData
1026 
1027   @param[in,out] data CeedQFunctionAssemblyData to set objects
1028   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1029   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1030 
1031   @return An error code: 0 - success, otherwise - failure
1032 
1033   @ref Backend
1034 **/
1035 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1036   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1037   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1038 
1039   data->is_setup = true;
1040   return CEED_ERROR_SUCCESS;
1041 }
1042 
1043 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1044   if (!data->is_setup) {
1045     // LCOV_EXCL_START
1046     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1047     // LCOV_EXCL_STOP
1048   }
1049 
1050   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1051   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1052 
1053   return CEED_ERROR_SUCCESS;
1054 }
1055 
1056 /**
1057   @brief Destroy CeedQFunctionAssemblyData
1058 
1059   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1060 
1061   @return An error code: 0 - success, otherwise - failure
1062 
1063   @ref Backend
1064 **/
1065 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1066   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1067 
1068   CeedCall(CeedDestroy(&(*data)->ceed));
1069   CeedCall(CeedVectorDestroy(&(*data)->vec));
1070   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1071 
1072   CeedCall(CeedFree(data));
1073   return CEED_ERROR_SUCCESS;
1074 }
1075 
1076 /**
1077   @brief Get CeedOperatorAssemblyData
1078 
1079   @param[in]  op   CeedOperator to assemble
1080   @param[out] data CeedQFunctionAssemblyData
1081 
1082   @return An error code: 0 - success, otherwise - failure
1083 
1084   @ref Backend
1085 **/
1086 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1087   if (!op->op_assembled) {
1088     CeedOperatorAssemblyData data;
1089 
1090     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1091     op->op_assembled = data;
1092   }
1093   *data = op->op_assembled;
1094 
1095   return CEED_ERROR_SUCCESS;
1096 }
1097 
1098 /**
1099   @brief Create object holding CeedOperator assembly data
1100 
1101   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1102   @param[in]  op   CeedOperator to be assembled
1103   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1104 
1105   @return An error code: 0 - success, otherwise - failure
1106 
1107   @ref Backend
1108 **/
1109 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1110   CeedCall(CeedCalloc(1, data));
1111   (*data)->ceed = ceed;
1112   CeedCall(CeedReference(ceed));
1113 
1114   // Build OperatorAssembly data
1115   CeedQFunction       qf;
1116   CeedQFunctionField *qf_fields;
1117   CeedOperatorField  *op_fields;
1118   CeedInt             num_input_fields;
1119   CeedCall(CeedOperatorGetQFunction(op, &qf));
1120   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1121   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1122 
1123   // Determine active input basis
1124   CeedInt       num_eval_mode_in = 0, dim = 1;
1125   CeedEvalMode *eval_mode_in = NULL;
1126   CeedBasis     basis_in     = NULL;
1127   for (CeedInt i = 0; i < num_input_fields; i++) {
1128     CeedVector vec;
1129     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1130     if (vec == CEED_VECTOR_ACTIVE) {
1131       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1132       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1133       CeedEvalMode eval_mode;
1134       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1135       switch (eval_mode) {
1136         case CEED_EVAL_NONE:
1137         case CEED_EVAL_INTERP:
1138           CeedCall(CeedRealloc(num_eval_mode_in + 1, &eval_mode_in));
1139           eval_mode_in[num_eval_mode_in] = eval_mode;
1140           num_eval_mode_in += 1;
1141           break;
1142         case CEED_EVAL_GRAD:
1143           CeedCall(CeedRealloc(num_eval_mode_in + dim, &eval_mode_in));
1144           for (CeedInt d = 0; d < dim; d++) {
1145             eval_mode_in[num_eval_mode_in + d] = eval_mode;
1146           }
1147           num_eval_mode_in += dim;
1148           break;
1149         case CEED_EVAL_WEIGHT:
1150         case CEED_EVAL_DIV:
1151         case CEED_EVAL_CURL:
1152           break;  // Caught by QF Assembly
1153       }
1154     }
1155   }
1156   (*data)->num_eval_mode_in = num_eval_mode_in;
1157   (*data)->eval_mode_in     = eval_mode_in;
1158   CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->basis_in));
1159 
1160   // Determine active output basis
1161   CeedInt num_output_fields;
1162   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1163   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1164   CeedInt       num_eval_mode_out = 0;
1165   CeedEvalMode *eval_mode_out     = NULL;
1166   CeedBasis     basis_out         = NULL;
1167   for (CeedInt i = 0; i < num_output_fields; i++) {
1168     CeedVector vec;
1169     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1170     if (vec == CEED_VECTOR_ACTIVE) {
1171       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1172       CeedEvalMode eval_mode;
1173       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1174       switch (eval_mode) {
1175         case CEED_EVAL_NONE:
1176         case CEED_EVAL_INTERP:
1177           CeedCall(CeedRealloc(num_eval_mode_out + 1, &eval_mode_out));
1178           eval_mode_out[num_eval_mode_out] = eval_mode;
1179           num_eval_mode_out += 1;
1180           break;
1181         case CEED_EVAL_GRAD:
1182           CeedCall(CeedRealloc(num_eval_mode_out + dim, &eval_mode_out));
1183           for (CeedInt d = 0; d < dim; d++) {
1184             eval_mode_out[num_eval_mode_out + d] = eval_mode;
1185           }
1186           num_eval_mode_out += dim;
1187           break;
1188         case CEED_EVAL_WEIGHT:
1189         case CEED_EVAL_DIV:
1190         case CEED_EVAL_CURL:
1191           break;  // Caught by QF Assembly
1192       }
1193     }
1194   }
1195   (*data)->num_eval_mode_out = num_eval_mode_out;
1196   (*data)->eval_mode_out     = eval_mode_out;
1197   CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->basis_out));
1198 
1199   return CEED_ERROR_SUCCESS;
1200 }
1201 
1202 /**
1203   @brief Get CeedOperator CeedEvalModes for assembly
1204 
1205   @param[in]  data              CeedOperatorAssemblyData
1206   @param[out] num_eval_mode_in  Pointer to hold number of input CeedEvalModes, or NULL
1207   @param[out] eval_mode_in      Pointer to hold input CeedEvalModes, or NULL
1208   @param[out] num_eval_mode_out Pointer to hold number of output CeedEvalModes, or NULL
1209   @param[out] eval_mode_out     Pointer to hold output CeedEvalModes, or NULL
1210 
1211   @return An error code: 0 - success, otherwise - failure
1212 
1213   @ref Backend
1214 **/
1215 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_eval_mode_in, const CeedEvalMode **eval_mode_in,
1216                                          CeedInt *num_eval_mode_out, const CeedEvalMode **eval_mode_out) {
1217   if (num_eval_mode_in) *num_eval_mode_in = data->num_eval_mode_in;
1218   if (eval_mode_in) *eval_mode_in = data->eval_mode_in;
1219   if (num_eval_mode_out) *num_eval_mode_out = data->num_eval_mode_out;
1220   if (eval_mode_out) *eval_mode_out = data->eval_mode_out;
1221 
1222   return CEED_ERROR_SUCCESS;
1223 }
1224 
1225 /**
1226   @brief Get CeedOperator CeedBasis data for assembly
1227 
1228   @param[in]  data      CeedOperatorAssemblyData
1229   @param[out] basis_in  Pointer to hold active input CeedBasis, or NULL
1230   @param[out] B_in      Pointer to hold assembled active input B, or NULL
1231   @param[out] basis_out Pointer to hold active output CeedBasis, or NULL
1232   @param[out] B_out     Pointer to hold assembled active output B, or NULL
1233 
1234   @return An error code: 0 - success, otherwise - failure
1235 
1236   @ref Backend
1237 **/
1238 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedBasis *basis_in, const CeedScalar **B_in, CeedBasis *basis_out,
1239                                      const CeedScalar **B_out) {
1240   // Assemble B_in, B_out if needed
1241   if (B_in && !data->B_in) {
1242     CeedInt           num_qpts, elem_size;
1243     CeedScalar       *B_in, *identity = NULL;
1244     const CeedScalar *interp_in, *grad_in;
1245     bool              has_eval_none = false;
1246 
1247     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_in, &num_qpts));
1248     CeedCall(CeedBasisGetNumNodes(data->basis_in, &elem_size));
1249     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_in, &B_in));
1250 
1251     for (CeedInt i = 0; i < data->num_eval_mode_in; i++) {
1252       has_eval_none = has_eval_none || (data->eval_mode_in[i] == CEED_EVAL_NONE);
1253     }
1254     if (has_eval_none) {
1255       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1256       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1257         identity[i * elem_size + i] = 1.0;
1258       }
1259     }
1260     CeedCall(CeedBasisGetInterp(data->basis_in, &interp_in));
1261     CeedCall(CeedBasisGetGrad(data->basis_in, &grad_in));
1262 
1263     for (CeedInt q = 0; q < num_qpts; q++) {
1264       for (CeedInt n = 0; n < elem_size; n++) {
1265         CeedInt d_in = -1;
1266         for (CeedInt e_in = 0; e_in < data->num_eval_mode_in; e_in++) {
1267           const CeedInt     qq = data->num_eval_mode_in * q;
1268           const CeedScalar *b  = NULL;
1269 
1270           if (data->eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in++;
1271           CeedOperatorGetBasisPointer(data->eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &b);
1272           B_in[(qq + e_in) * elem_size + n] = b[q * elem_size + n];
1273         }
1274       }
1275     }
1276     data->B_in = B_in;
1277   }
1278 
1279   if (B_out && !data->B_out) {
1280     CeedInt           num_qpts, elem_size;
1281     CeedScalar       *B_out, *identity = NULL;
1282     const CeedScalar *interp_out, *grad_out;
1283     bool              has_eval_none = false;
1284 
1285     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_out, &num_qpts));
1286     CeedCall(CeedBasisGetNumNodes(data->basis_out, &elem_size));
1287     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_out, &B_out));
1288 
1289     for (CeedInt i = 0; i < data->num_eval_mode_out; i++) {
1290       has_eval_none = has_eval_none || (data->eval_mode_out[i] == CEED_EVAL_NONE);
1291     }
1292     if (has_eval_none) {
1293       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1294       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1295         identity[i * elem_size + i] = 1.0;
1296       }
1297     }
1298     CeedCall(CeedBasisGetInterp(data->basis_out, &interp_out));
1299     CeedCall(CeedBasisGetGrad(data->basis_out, &grad_out));
1300 
1301     for (CeedInt q = 0; q < num_qpts; q++) {
1302       for (CeedInt n = 0; n < elem_size; n++) {
1303         CeedInt d_out = -1;
1304         for (CeedInt e_out = 0; e_out < data->num_eval_mode_out; e_out++) {
1305           const CeedInt     qq = data->num_eval_mode_out * q;
1306           const CeedScalar *b  = NULL;
1307 
1308           if (data->eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out++;
1309           CeedOperatorGetBasisPointer(data->eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &b);
1310           B_out[(qq + e_out) * elem_size + n] = b[q * elem_size + n];
1311         }
1312       }
1313     }
1314     data->B_out = B_out;
1315   }
1316 
1317   if (basis_in) *basis_in = data->basis_in;
1318   if (B_in) *B_in = data->B_in;
1319   if (basis_out) *basis_out = data->basis_out;
1320   if (B_out) *B_out = data->B_out;
1321 
1322   return CEED_ERROR_SUCCESS;
1323 }
1324 
1325 /**
1326   @brief Destroy CeedOperatorAssemblyData
1327 
1328   @param[in,out] data CeedOperatorAssemblyData to destroy
1329 
1330   @return An error code: 0 - success, otherwise - failure
1331 
1332   @ref Backend
1333 **/
1334 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1335   if (!*data) return CEED_ERROR_SUCCESS;
1336 
1337   CeedCall(CeedDestroy(&(*data)->ceed));
1338   CeedCall(CeedBasisDestroy(&(*data)->basis_in));
1339   CeedCall(CeedBasisDestroy(&(*data)->basis_out));
1340   CeedCall(CeedFree(&(*data)->eval_mode_in));
1341   CeedCall(CeedFree(&(*data)->eval_mode_out));
1342   CeedCall(CeedFree(&(*data)->B_in));
1343   CeedCall(CeedFree(&(*data)->B_out));
1344 
1345   CeedCall(CeedFree(data));
1346   return CEED_ERROR_SUCCESS;
1347 }
1348 
1349 /// @}
1350 
1351 /// ----------------------------------------------------------------------------
1352 /// CeedOperator Public API
1353 /// ----------------------------------------------------------------------------
1354 /// @addtogroup CeedOperatorUser
1355 /// @{
1356 
1357 /**
1358   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1359 
1360   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1361     The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices
1362 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the
1363 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1364 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1365 and producing the output [dv_0, dv_1, v].
1366 
1367   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1368 
1369   @param[in]  op        CeedOperator to assemble CeedQFunction
1370   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1371   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1372   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1373 
1374   @return An error code: 0 - success, otherwise - failure
1375 
1376   @ref User
1377 **/
1378 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1379   CeedCall(CeedOperatorCheckReady(op));
1380 
1381   if (op->LinearAssembleQFunction) {
1382     // Backend version
1383     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1384   } else {
1385     // Operator fallback
1386     CeedOperator op_fallback;
1387 
1388     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1389     if (op_fallback) {
1390       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1391     } else {
1392       // LCOV_EXCL_START
1393       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1394       // LCOV_EXCL_STOP
1395     }
1396   }
1397   return CEED_ERROR_SUCCESS;
1398 }
1399 
1400 /**
1401   @brief Assemble CeedQFunction and store result internally.
1402            Return copied references of stored data to the caller.
1403            Caller is responsible for ownership and destruction of the copied references.
1404            See also @ref CeedOperatorLinearAssembleQFunction
1405 
1406   @param[in]  op        CeedOperator to assemble CeedQFunction
1407   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1408   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1409   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1410 
1411   @return An error code: 0 - success, otherwise - failure
1412 
1413   @ref User
1414 **/
1415 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1416   CeedCall(CeedOperatorCheckReady(op));
1417 
1418   if (op->LinearAssembleQFunctionUpdate) {
1419     // Backend version
1420     bool                qf_assembled_is_setup;
1421     CeedVector          assembled_vec  = NULL;
1422     CeedElemRestriction assembled_rstr = NULL;
1423 
1424     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1425     if (qf_assembled_is_setup) {
1426       bool update_needed;
1427 
1428       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1429       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1430       if (update_needed) {
1431         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1432       }
1433     } else {
1434       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1435       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1436     }
1437     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1438 
1439     // Copy reference from internally held copy
1440     *assembled = NULL;
1441     *rstr      = NULL;
1442     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1443     CeedCall(CeedVectorDestroy(&assembled_vec));
1444     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1445     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1446   } else {
1447     // Operator fallback
1448     CeedOperator op_fallback;
1449 
1450     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1451     if (op_fallback) {
1452       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1453     } else {
1454       // LCOV_EXCL_START
1455       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1456       // LCOV_EXCL_STOP
1457     }
1458   }
1459 
1460   return CEED_ERROR_SUCCESS;
1461 }
1462 
1463 /**
1464   @brief Assemble the diagonal of a square linear CeedOperator
1465 
1466   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1467 
1468   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1469 
1470   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1471 
1472   @param[in]  op        CeedOperator to assemble CeedQFunction
1473   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1474   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1475 
1476   @return An error code: 0 - success, otherwise - failure
1477 
1478   @ref User
1479 **/
1480 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1481   bool is_composite;
1482   CeedCall(CeedOperatorCheckReady(op));
1483   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1484 
1485   CeedSize input_size = 0, output_size = 0;
1486   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1487   if (input_size != output_size) {
1488     // LCOV_EXCL_START
1489     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1490     // LCOV_EXCL_STOP
1491   }
1492 
1493   // Early exit for empty operator
1494   if (!is_composite) {
1495     CeedInt num_elem = 0;
1496 
1497     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1498     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1499   }
1500 
1501   if (op->LinearAssembleDiagonal) {
1502     // Backend version
1503     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1504     return CEED_ERROR_SUCCESS;
1505   } else if (op->LinearAssembleAddDiagonal) {
1506     // Backend version with zeroing first
1507     CeedCall(CeedVectorSetValue(assembled, 0.0));
1508     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1509     return CEED_ERROR_SUCCESS;
1510   } else {
1511     // Operator fallback
1512     CeedOperator op_fallback;
1513 
1514     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1515     if (op_fallback) {
1516       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1517       return CEED_ERROR_SUCCESS;
1518     }
1519   }
1520   // Default interface implementation
1521   CeedCall(CeedVectorSetValue(assembled, 0.0));
1522   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1523 
1524   return CEED_ERROR_SUCCESS;
1525 }
1526 
1527 /**
1528   @brief Assemble the diagonal of a square linear CeedOperator
1529 
1530   This sums into a CeedVector the diagonal of a linear CeedOperator.
1531 
1532   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1533 
1534   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1535 
1536   @param[in]  op        CeedOperator to assemble CeedQFunction
1537   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1538   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1539 
1540   @return An error code: 0 - success, otherwise - failure
1541 
1542   @ref User
1543 **/
1544 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1545   bool is_composite;
1546   CeedCall(CeedOperatorCheckReady(op));
1547   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1548 
1549   CeedSize input_size = 0, output_size = 0;
1550   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1551   if (input_size != output_size) {
1552     // LCOV_EXCL_START
1553     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1554     // LCOV_EXCL_STOP
1555   }
1556 
1557   // Early exit for empty operator
1558   if (!is_composite) {
1559     CeedInt num_elem = 0;
1560 
1561     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1562     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1563   }
1564 
1565   if (op->LinearAssembleAddDiagonal) {
1566     // Backend version
1567     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1568     return CEED_ERROR_SUCCESS;
1569   } else {
1570     // Operator fallback
1571     CeedOperator op_fallback;
1572 
1573     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1574     if (op_fallback) {
1575       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1576       return CEED_ERROR_SUCCESS;
1577     }
1578   }
1579   // Default interface implementation
1580   if (is_composite) {
1581     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1582   } else {
1583     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1584   }
1585 
1586   return CEED_ERROR_SUCCESS;
1587 }
1588 
1589 /**
1590   @brief Assemble the point block diagonal of a square linear CeedOperator
1591 
1592   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1593 
1594   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1595 
1596   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1597 
1598   @param[in]  op        CeedOperator to assemble CeedQFunction
1599   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1600 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1601 component in].
1602   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1603 
1604   @return An error code: 0 - success, otherwise - failure
1605 
1606   @ref User
1607 **/
1608 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1609   bool is_composite;
1610   CeedCall(CeedOperatorCheckReady(op));
1611   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1612 
1613   CeedSize input_size = 0, output_size = 0;
1614   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1615   if (input_size != output_size) {
1616     // LCOV_EXCL_START
1617     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1618     // LCOV_EXCL_STOP
1619   }
1620 
1621   // Early exit for empty operator
1622   if (!is_composite) {
1623     CeedInt num_elem = 0;
1624 
1625     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1626     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1627   }
1628 
1629   if (op->LinearAssemblePointBlockDiagonal) {
1630     // Backend version
1631     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1632     return CEED_ERROR_SUCCESS;
1633   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1634     // Backend version with zeroing first
1635     CeedCall(CeedVectorSetValue(assembled, 0.0));
1636     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1637     return CEED_ERROR_SUCCESS;
1638   } else {
1639     // Operator fallback
1640     CeedOperator op_fallback;
1641 
1642     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1643     if (op_fallback) {
1644       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1645       return CEED_ERROR_SUCCESS;
1646     }
1647   }
1648   // Default interface implementation
1649   CeedCall(CeedVectorSetValue(assembled, 0.0));
1650   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1651 
1652   return CEED_ERROR_SUCCESS;
1653 }
1654 
1655 /**
1656   @brief Assemble the point block diagonal of a square linear CeedOperator
1657 
1658   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1659 
1660   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1661 
1662   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1663 
1664   @param[in]  op        CeedOperator to assemble CeedQFunction
1665   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1666 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1667 component in].
1668   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1669 
1670   @return An error code: 0 - success, otherwise - failure
1671 
1672   @ref User
1673 **/
1674 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1675   bool is_composite;
1676   CeedCall(CeedOperatorCheckReady(op));
1677   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1678 
1679   CeedSize input_size = 0, output_size = 0;
1680   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1681   if (input_size != output_size) {
1682     // LCOV_EXCL_START
1683     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1684     // LCOV_EXCL_STOP
1685   }
1686 
1687   // Early exit for empty operator
1688   if (!is_composite) {
1689     CeedInt num_elem = 0;
1690 
1691     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1692     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1693   }
1694 
1695   if (op->LinearAssembleAddPointBlockDiagonal) {
1696     // Backend version
1697     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1698     return CEED_ERROR_SUCCESS;
1699   } else {
1700     // Operator fallback
1701     CeedOperator op_fallback;
1702 
1703     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1704     if (op_fallback) {
1705       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1706       return CEED_ERROR_SUCCESS;
1707     }
1708   }
1709   // Default interface implementation
1710   if (is_composite) {
1711     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1712   } else {
1713     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1714   }
1715 
1716   return CEED_ERROR_SUCCESS;
1717 }
1718 
1719 /**
1720    @brief Fully assemble the nonzero pattern of a linear operator.
1721 
1722    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1723 
1724    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1725 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1726 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1727 
1728    This will generally be slow unless your operator is low-order.
1729 
1730    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1731 
1732    @param[in]  op          CeedOperator to assemble
1733    @param[out] num_entries Number of entries in coordinate nonzero pattern
1734    @param[out] rows        Row number for each entry
1735    @param[out] cols        Column number for each entry
1736 
1737    @ref User
1738 **/
1739 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1740   CeedInt       num_suboperators, single_entries;
1741   CeedOperator *sub_operators;
1742   bool          is_composite;
1743   CeedCall(CeedOperatorCheckReady(op));
1744   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1745 
1746   if (op->LinearAssembleSymbolic) {
1747     // Backend version
1748     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1749     return CEED_ERROR_SUCCESS;
1750   } else {
1751     // Operator fallback
1752     CeedOperator op_fallback;
1753 
1754     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1755     if (op_fallback) {
1756       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1757       return CEED_ERROR_SUCCESS;
1758     }
1759   }
1760 
1761   // Default interface implementation
1762 
1763   // count entries and allocate rows, cols arrays
1764   *num_entries = 0;
1765   if (is_composite) {
1766     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1767     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1768     for (CeedInt k = 0; k < num_suboperators; ++k) {
1769       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1770       *num_entries += single_entries;
1771     }
1772   } else {
1773     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1774     *num_entries += single_entries;
1775   }
1776   CeedCall(CeedCalloc(*num_entries, rows));
1777   CeedCall(CeedCalloc(*num_entries, cols));
1778 
1779   // assemble nonzero locations
1780   CeedInt offset = 0;
1781   if (is_composite) {
1782     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1783     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1784     for (CeedInt k = 0; k < num_suboperators; ++k) {
1785       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1786       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1787       offset += single_entries;
1788     }
1789   } else {
1790     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1791   }
1792 
1793   return CEED_ERROR_SUCCESS;
1794 }
1795 
1796 /**
1797    @brief Fully assemble the nonzero entries of a linear operator.
1798 
1799    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1800 
1801    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1802 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1803 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1804 
1805    This will generally be slow unless your operator is low-order.
1806 
1807    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1808 
1809    @param[in]  op     CeedOperator to assemble
1810    @param[out] values Values to assemble into matrix
1811 
1812    @ref User
1813 **/
1814 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1815   CeedInt       num_suboperators, single_entries = 0;
1816   CeedOperator *sub_operators;
1817   bool          is_composite;
1818   CeedCall(CeedOperatorCheckReady(op));
1819   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1820 
1821   // Early exit for empty operator
1822   if (!is_composite) {
1823     CeedInt num_elem = 0;
1824 
1825     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1826     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1827   }
1828 
1829   if (op->LinearAssemble) {
1830     // Backend version
1831     CeedCall(op->LinearAssemble(op, values));
1832     return CEED_ERROR_SUCCESS;
1833   } else {
1834     // Operator fallback
1835     CeedOperator op_fallback;
1836 
1837     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1838     if (op_fallback) {
1839       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
1840       return CEED_ERROR_SUCCESS;
1841     }
1842   }
1843 
1844   // Default interface implementation
1845   CeedInt offset = 0;
1846   if (is_composite) {
1847     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1848     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1849     for (CeedInt k = 0; k < num_suboperators; k++) {
1850       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
1851       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1852       offset += single_entries;
1853     }
1854   } else {
1855     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
1856   }
1857 
1858   return CEED_ERROR_SUCCESS;
1859 }
1860 
1861 /**
1862   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
1863 
1864   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1865 
1866   @param[in]  op               Composite CeedOperator
1867   @param[in]  num_skip_indices Number of suboperators to skip
1868   @param[in]  skip_indices     Array of indices of suboperators to skip
1869   @param[out] mult             Vector to store multiplicity (of size l_size)
1870 
1871   @return An error code: 0 - success, otherwise - failure
1872 
1873   @ref User
1874 **/
1875 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
1876   CeedCall(CeedOperatorCheckReady(op));
1877 
1878   Ceed                ceed;
1879   CeedInt             num_suboperators;
1880   CeedSize            l_vec_len;
1881   CeedScalar         *mult_array;
1882   CeedVector          ones_l_vec;
1883   CeedElemRestriction elem_restr;
1884   CeedOperator       *sub_operators;
1885 
1886   CeedCall(CeedOperatorGetCeed(op, &ceed));
1887 
1888   // Zero mult vector
1889   CeedCall(CeedVectorSetValue(mult, 0.0));
1890 
1891   // Get suboperators
1892   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1893   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1894   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
1895 
1896   // Work vector
1897   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
1898   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
1899   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
1900   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
1901 
1902   // Compute multiplicity across suboperators
1903   for (CeedInt i = 0; i < num_suboperators; i++) {
1904     const CeedScalar *sub_mult_array;
1905     CeedVector        sub_mult_l_vec, ones_e_vec;
1906 
1907     // -- Check for suboperator to skip
1908     for (CeedInt j = 0; j < num_skip_indices; j++) {
1909       if (skip_indices[j] == i) continue;
1910     }
1911 
1912     // -- Sub operator multiplicity
1913     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_restr));
1914     CeedCall(CeedElemRestrictionCreateVector(elem_restr, &sub_mult_l_vec, &ones_e_vec));
1915     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
1916     CeedCall(CeedElemRestrictionApply(elem_restr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
1917     CeedCall(CeedElemRestrictionApply(elem_restr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
1918     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
1919     // ---- Flag every node present in the current suboperator
1920     for (CeedInt j = 0; j < l_vec_len; j++) {
1921       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
1922     }
1923     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
1924     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
1925     CeedCall(CeedVectorDestroy(&ones_e_vec));
1926   }
1927   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
1928   CeedCall(CeedVectorDestroy(&ones_l_vec));
1929 
1930   return CEED_ERROR_SUCCESS;
1931 }
1932 
1933 /**
1934   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
1935 grid interpolation
1936 
1937   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1938 
1939   @param[in]  op_fine      Fine grid operator
1940   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
1941   @param[in]  rstr_coarse  Coarse grid restriction
1942   @param[in]  basis_coarse Coarse grid active vector basis
1943   @param[out] op_coarse    Coarse grid operator
1944   @param[out] op_prolong   Coarse to fine operator, or NULL
1945   @param[out] op_restrict  Fine to coarse operator, or NULL
1946 
1947   @return An error code: 0 - success, otherwise - failure
1948 
1949   @ref User
1950 **/
1951 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1952                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
1953   CeedCall(CeedOperatorCheckReady(op_fine));
1954 
1955   // Build prolongation matrix, if required
1956   CeedBasis basis_c_to_f = NULL;
1957   if (op_prolong || op_restrict) {
1958     CeedBasis basis_fine;
1959     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1960     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
1961   }
1962 
1963   // Core code
1964   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1965 
1966   return CEED_ERROR_SUCCESS;
1967 }
1968 
1969 /**
1970   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
1971 
1972   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1973 
1974   @param[in]  op_fine       Fine grid operator
1975   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
1976   @param[in]  rstr_coarse   Coarse grid restriction
1977   @param[in]  basis_coarse  Coarse grid active vector basis
1978   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
1979   @param[out] op_coarse     Coarse grid operator
1980   @param[out] op_prolong    Coarse to fine operator, or NULL
1981   @param[out] op_restrict   Fine to coarse operator, or NULL
1982 
1983   @return An error code: 0 - success, otherwise - failure
1984 
1985   @ref User
1986 **/
1987 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1988                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
1989                                              CeedOperator *op_restrict) {
1990   CeedCall(CeedOperatorCheckReady(op_fine));
1991   Ceed ceed;
1992   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1993 
1994   // Check for compatible quadrature spaces
1995   CeedBasis basis_fine;
1996   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1997   CeedInt Q_f, Q_c;
1998   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
1999   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2000   if (Q_f != Q_c) {
2001     // LCOV_EXCL_START
2002     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2003     // LCOV_EXCL_STOP
2004   }
2005 
2006   // Create coarse to fine basis, if required
2007   CeedBasis basis_c_to_f = NULL;
2008   if (op_prolong || op_restrict) {
2009     // Check if interpolation matrix is provided
2010     if (!interp_c_to_f) {
2011       // LCOV_EXCL_START
2012       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2013       // LCOV_EXCL_STOP
2014     }
2015     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2016     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2017     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2018     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2019     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2020     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2021     CeedScalar *q_ref, *q_weight, *grad;
2022     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2023     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2024     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2025     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2026     CeedCall(CeedFree(&q_ref));
2027     CeedCall(CeedFree(&q_weight));
2028     CeedCall(CeedFree(&grad));
2029   }
2030 
2031   // Core code
2032   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2033   return CEED_ERROR_SUCCESS;
2034 }
2035 
2036 /**
2037   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2038 
2039   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2040 
2041   @param[in]  op_fine       Fine grid operator
2042   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2043   @param[in]  rstr_coarse   Coarse grid restriction
2044   @param[in]  basis_coarse  Coarse grid active vector basis
2045   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2046   @param[out] op_coarse     Coarse grid operator
2047   @param[out] op_prolong    Coarse to fine operator, or NULL
2048   @param[out] op_restrict   Fine to coarse operator, or NULL
2049 
2050   @return An error code: 0 - success, otherwise - failure
2051 
2052   @ref User
2053 **/
2054 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2055                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2056                                        CeedOperator *op_restrict) {
2057   CeedCall(CeedOperatorCheckReady(op_fine));
2058   Ceed ceed;
2059   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2060 
2061   // Check for compatible quadrature spaces
2062   CeedBasis basis_fine;
2063   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2064   CeedInt Q_f, Q_c;
2065   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2066   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2067   if (Q_f != Q_c) {
2068     // LCOV_EXCL_START
2069     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2070     // LCOV_EXCL_STOP
2071   }
2072 
2073   // Coarse to fine basis
2074   CeedBasis basis_c_to_f = NULL;
2075   if (op_prolong || op_restrict) {
2076     // Check if interpolation matrix is provided
2077     if (!interp_c_to_f) {
2078       // LCOV_EXCL_START
2079       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2080       // LCOV_EXCL_STOP
2081     }
2082     CeedElemTopology topo;
2083     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2084     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2085     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2086     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2087     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2088     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2089     CeedScalar *q_ref, *q_weight, *grad;
2090     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2091     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2092     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2093     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2094     CeedCall(CeedFree(&q_ref));
2095     CeedCall(CeedFree(&q_weight));
2096     CeedCall(CeedFree(&grad));
2097   }
2098 
2099   // Core code
2100   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2101   return CEED_ERROR_SUCCESS;
2102 }
2103 
2104 /**
2105   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2106 
2107   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2108     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V.
2109     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T
2110 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
2111 
2112   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2113 
2114   @param[in]  op      CeedOperator to create element inverses
2115   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2116   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2117 
2118   @return An error code: 0 - success, otherwise - failure
2119 
2120   @ref User
2121 **/
2122 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2123   CeedCall(CeedOperatorCheckReady(op));
2124 
2125   if (op->CreateFDMElementInverse) {
2126     // Backend version
2127     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2128     return CEED_ERROR_SUCCESS;
2129   } else {
2130     // Operator fallback
2131     CeedOperator op_fallback;
2132 
2133     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2134     if (op_fallback) {
2135       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2136       return CEED_ERROR_SUCCESS;
2137     }
2138   }
2139 
2140   // Default interface implementation
2141   Ceed ceed, ceed_parent;
2142   CeedCall(CeedOperatorGetCeed(op, &ceed));
2143   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2144   ceed_parent = ceed_parent ? ceed_parent : ceed;
2145   CeedQFunction qf;
2146   CeedCall(CeedOperatorGetQFunction(op, &qf));
2147 
2148   // Determine active input basis
2149   bool                interp = false, grad = false;
2150   CeedBasis           basis = NULL;
2151   CeedElemRestriction rstr  = NULL;
2152   CeedOperatorField  *op_fields;
2153   CeedQFunctionField *qf_fields;
2154   CeedInt             num_input_fields;
2155   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2156   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2157   for (CeedInt i = 0; i < num_input_fields; i++) {
2158     CeedVector vec;
2159     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2160     if (vec == CEED_VECTOR_ACTIVE) {
2161       CeedEvalMode eval_mode;
2162       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2163       interp = interp || eval_mode == CEED_EVAL_INTERP;
2164       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2165       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2166       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2167     }
2168   }
2169   if (!basis) {
2170     // LCOV_EXCL_START
2171     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2172     // LCOV_EXCL_STOP
2173   }
2174   CeedSize l_size = 1;
2175   CeedInt  P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2176   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2177   CeedCall(CeedBasisGetNumNodes(basis, &elem_size));
2178   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2179   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2180   CeedCall(CeedBasisGetDimension(basis, &dim));
2181   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2182   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2183   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2184 
2185   // Build and diagonalize 1D Mass and Laplacian
2186   bool tensor_basis;
2187   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2188   if (!tensor_basis) {
2189     // LCOV_EXCL_START
2190     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2191     // LCOV_EXCL_STOP
2192   }
2193   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2194   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2195   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2196   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2197   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2198   CeedCall(CeedCalloc(P_1d, &lambda));
2199   // -- Build matrices
2200   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2201   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2202   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2203   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2204   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2205 
2206   // -- Diagonalize
2207   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2208   CeedCall(CeedFree(&mass));
2209   CeedCall(CeedFree(&laplace));
2210   for (CeedInt i = 0; i < P_1d; i++) {
2211     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2212   }
2213   CeedCall(CeedFree(&x));
2214 
2215   // Assemble QFunction
2216   CeedVector          assembled;
2217   CeedElemRestriction rstr_qf;
2218   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2219   CeedInt layout[3];
2220   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2221   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2222   CeedScalar max_norm = 0;
2223   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2224 
2225   // Calculate element averages
2226   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2227   CeedScalar       *elem_avg;
2228   const CeedScalar *assembled_array, *q_weight_array;
2229   CeedVector        q_weight;
2230   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2231   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2232   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2233   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2234   CeedCall(CeedCalloc(num_elem, &elem_avg));
2235   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2236   for (CeedInt e = 0; e < num_elem; e++) {
2237     CeedInt count = 0;
2238     for (CeedInt q = 0; q < num_qpts; q++) {
2239       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2240         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2241           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2242           count++;
2243         }
2244       }
2245     }
2246     if (count) {
2247       elem_avg[e] /= count;
2248     } else {
2249       elem_avg[e] = 1.0;
2250     }
2251   }
2252   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2253   CeedCall(CeedVectorDestroy(&assembled));
2254   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2255   CeedCall(CeedVectorDestroy(&q_weight));
2256 
2257   // Build FDM diagonal
2258   CeedVector  q_data;
2259   CeedScalar *q_data_array, *fdm_diagonal;
2260   CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal));
2261   const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON;
2262   for (CeedInt c = 0; c < num_comp; c++) {
2263     for (CeedInt n = 0; n < elem_size; n++) {
2264       if (interp) fdm_diagonal[c * elem_size + n] = 1.0;
2265       if (grad) {
2266         for (CeedInt d = 0; d < dim; d++) {
2267           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2268           fdm_diagonal[c * elem_size + n] += lambda[i];
2269         }
2270       }
2271       if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound;
2272     }
2273   }
2274   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data));
2275   CeedCall(CeedVectorSetValue(q_data, 0.0));
2276   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2277   for (CeedInt e = 0; e < num_elem; e++) {
2278     for (CeedInt c = 0; c < num_comp; c++) {
2279       for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]);
2280     }
2281   }
2282   CeedCall(CeedFree(&elem_avg));
2283   CeedCall(CeedFree(&fdm_diagonal));
2284   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2285 
2286   // Setup FDM operator
2287   // -- Basis
2288   CeedBasis   fdm_basis;
2289   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2290   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2291   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2292   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2293   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2294   CeedCall(CeedFree(&fdm_interp));
2295   CeedCall(CeedFree(&grad_dummy));
2296   CeedCall(CeedFree(&q_ref_dummy));
2297   CeedCall(CeedFree(&q_weight_dummy));
2298   CeedCall(CeedFree(&lambda));
2299 
2300   // -- Restriction
2301   CeedElemRestriction rstr_qd_i;
2302   CeedInt             strides[3] = {1, elem_size, elem_size * num_comp};
2303   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i));
2304   // -- QFunction
2305   CeedQFunction qf_fdm;
2306   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2307   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2308   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2309   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2310   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2311   // -- QFunction context
2312   CeedInt *num_comp_data;
2313   CeedCall(CeedCalloc(1, &num_comp_data));
2314   num_comp_data[0] = num_comp;
2315   CeedQFunctionContext ctx_fdm;
2316   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2317   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2318   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2319   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2320   // -- Operator
2321   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2322   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2323   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2324   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2325 
2326   // Cleanup
2327   CeedCall(CeedVectorDestroy(&q_data));
2328   CeedCall(CeedBasisDestroy(&fdm_basis));
2329   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2330   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2331 
2332   return CEED_ERROR_SUCCESS;
2333 }
2334 
2335 /// @}
2336