xref: /libCEED/interface/ceed-preconditioning.c (revision 83d6adf320d467417864810b884f58e4c3cda983)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed/backend.h>
11 #include <ceed/ceed.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_restr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_restr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  eval_mode Current basis evaluation mode
184   @param[in]  identity  Pointer to identity matrix
185   @param[in]  interp    Pointer to interpolation matrix
186   @param[in]  grad      Pointer to gradient matrix
187   @param[out] basis_ptr Basis pointer to set
188 
189   @ref Developer
190 **/
191 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad,
192                                                const CeedScalar **basis_ptr) {
193   switch (eval_mode) {
194     case CEED_EVAL_NONE:
195       *basis_ptr = identity;
196       break;
197     case CEED_EVAL_INTERP:
198       *basis_ptr = interp;
199       break;
200     case CEED_EVAL_GRAD:
201       *basis_ptr = grad;
202       break;
203     case CEED_EVAL_WEIGHT:
204     case CEED_EVAL_DIV:
205     case CEED_EVAL_CURL:
206       break;  // Caught by QF Assembly
207   }
208   assert(*basis_ptr != NULL);
209 }
210 
211 /**
212   @brief Create point block restriction for active operator field
213 
214   @param[in]  rstr            Original CeedElemRestriction for active field
215   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
216 
217   @return An error code: 0 - success, otherwise - failure
218 
219   @ref Developer
220 **/
221 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
222   Ceed ceed;
223   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
224   const CeedInt *offsets;
225   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
226 
227   // Expand offsets
228   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
229   CeedSize l_size;
230   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
231   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
232   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
233   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
234   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
235   CeedInt shift = num_comp;
236   if (comp_stride != 1) shift *= num_comp;
237   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
238   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
239     pointblock_offsets[i] = offsets[i] * shift;
240   }
241 
242   // Create new restriction
243   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
244                                      pointblock_offsets, pointblock_rstr));
245 
246   // Cleanup
247   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
248 
249   return CEED_ERROR_SUCCESS;
250 }
251 
252 /**
253   @brief Core logic for assembling operator diagonal or point block diagonal
254 
255   @param[in]  op            CeedOperator to assemble point block diagonal
256   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
257   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
258   @param[out] assembled     CeedVector to store assembled diagonal
259 
260   @return An error code: 0 - success, otherwise - failure
261 
262   @ref Developer
263 **/
264 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
265   Ceed ceed;
266   CeedCall(CeedOperatorGetCeed(op, &ceed));
267 
268   // Assemble QFunction
269   CeedQFunction qf;
270   CeedCall(CeedOperatorGetQFunction(op, &qf));
271   CeedInt num_input_fields, num_output_fields;
272   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
273   CeedVector          assembled_qf;
274   CeedElemRestriction rstr;
275   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr, request));
276   CeedInt layout[3];
277   CeedCall(CeedElemRestrictionGetELayout(rstr, &layout));
278   CeedCall(CeedElemRestrictionDestroy(&rstr));
279 
280   // Get assembly data
281   CeedOperatorAssemblyData data;
282   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
283   const CeedEvalMode *eval_mode_in, *eval_mode_out;
284   CeedInt             num_eval_mode_in, num_eval_mode_out;
285   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
286   CeedBasis basis_in, basis_out;
287   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
288   CeedInt num_comp;
289   CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
290 
291   // Assemble point block diagonal restriction, if needed
292   CeedElemRestriction diag_rstr;
293   CeedCall(CeedOperatorGetActiveElemRestriction(op, &diag_rstr));
294   if (is_pointblock) {
295     CeedElemRestriction point_block_rstr;
296     CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_rstr, &point_block_rstr));
297     diag_rstr = point_block_rstr;
298   }
299 
300   // Create diagonal vector
301   CeedVector elem_diag;
302   CeedCall(CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag));
303 
304   // Assemble element operator diagonals
305   CeedScalar       *elem_diag_array;
306   const CeedScalar *assembled_qf_array;
307   CeedCall(CeedVectorSetValue(elem_diag, 0.0));
308   CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
309   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
310   CeedInt num_elem, num_nodes, num_qpts;
311   CeedCall(CeedElemRestrictionGetNumElements(diag_rstr, &num_elem));
312   CeedCall(CeedBasisGetNumNodes(basis_in, &num_nodes));
313   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
314 
315   // Basis matrices
316   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
317   CeedScalar       *identity      = NULL;
318   bool              has_eval_none = false;
319   for (CeedInt i = 0; i < num_eval_mode_in; i++) {
320     has_eval_none = has_eval_none || (eval_mode_in[i] == CEED_EVAL_NONE);
321   }
322   for (CeedInt i = 0; i < num_eval_mode_out; i++) {
323     has_eval_none = has_eval_none || (eval_mode_out[i] == CEED_EVAL_NONE);
324   }
325   if (has_eval_none) {
326     CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
327     for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
328   }
329   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
330   CeedCall(CeedBasisGetInterp(basis_out, &interp_out));
331   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
332   CeedCall(CeedBasisGetGrad(basis_out, &grad_out));
333   // Compute the diagonal of B^T D B
334   // Each element
335   for (CeedInt e = 0; e < num_elem; e++) {
336     CeedInt d_out = -1;
337     // Each basis eval mode pair
338     for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
339       const CeedScalar *bt = NULL;
340       if (eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out += 1;
341       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * num_nodes], &bt);
342       CeedInt d_in = -1;
343       for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
344         const CeedScalar *b = NULL;
345         if (eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in += 1;
346         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * num_nodes], &b);
347         // Each component
348         for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
349           // Each qpoint/node pair
350           for (CeedInt q = 0; q < num_qpts; q++) {
351             if (is_pointblock) {
352               // Point Block Diagonal
353               for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
354                 const CeedScalar qf_value =
355                     assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_in) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
356                                        e * layout[2]];
357                 for (CeedInt n = 0; n < num_nodes; n++) {
358                   elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
359                       bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
360                 }
361               }
362             } else {
363               // Diagonal Only
364               const CeedScalar qf_value =
365                   assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_out) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
366                                      e * layout[2]];
367               for (CeedInt n = 0; n < num_nodes; n++) {
368                 elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
369               }
370             }
371           }
372         }
373       }
374     }
375   }
376   CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
377   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
378 
379   // Assemble local operator diagonal
380   CeedCall(CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
381 
382   // Cleanup
383   if (is_pointblock) {
384     CeedCall(CeedElemRestrictionDestroy(&diag_rstr));
385   }
386   CeedCall(CeedVectorDestroy(&assembled_qf));
387   CeedCall(CeedVectorDestroy(&elem_diag));
388   CeedCall(CeedFree(&identity));
389 
390   return CEED_ERROR_SUCCESS;
391 }
392 
393 /**
394   @brief Core logic for assembling composite operator diagonal
395 
396   @param[in]  op            CeedOperator to assemble point block diagonal
397   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
398   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
399   @param[out] assembled     CeedVector to store assembled diagonal
400 
401   @return An error code: 0 - success, otherwise - failure
402 
403   @ref Developer
404 **/
405 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
406                                                                  CeedVector assembled) {
407   CeedInt       num_sub;
408   CeedOperator *suboperators;
409   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
410   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
411   for (CeedInt i = 0; i < num_sub; i++) {
412     if (is_pointblock) {
413       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
414     } else {
415       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
416     }
417   }
418   return CEED_ERROR_SUCCESS;
419 }
420 
421 /**
422   @brief Build nonzero pattern for non-composite operator
423 
424   Users should generally use CeedOperatorLinearAssembleSymbolic()
425 
426   @param[in]  op     CeedOperator to assemble nonzero pattern
427   @param[in]  offset Offset for number of entries
428   @param[out] rows   Row number for each entry
429   @param[out] cols   Column number for each entry
430 
431   @return An error code: 0 - success, otherwise - failure
432 
433   @ref Developer
434 **/
435 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
436   Ceed ceed;
437   bool is_composite;
438   CeedCall(CeedOperatorGetCeed(op, &ceed));
439   CeedCall(CeedOperatorIsComposite(op, &is_composite));
440 
441   if (is_composite) {
442     // LCOV_EXCL_START
443     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
444     // LCOV_EXCL_STOP
445   }
446 
447   CeedSize num_nodes;
448   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
449   CeedElemRestriction rstr_in;
450   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
451   CeedInt num_elem, elem_size, num_comp;
452   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
453   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
454   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
455   CeedInt layout_er[3];
456   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
457 
458   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
459 
460   // Determine elem_dof relation
461   CeedVector index_vec;
462   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
463   CeedScalar *array;
464   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
465   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
466   CeedCall(CeedVectorRestoreArray(index_vec, &array));
467   CeedVector elem_dof;
468   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
469   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
470   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
471   const CeedScalar *elem_dof_a;
472   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
473   CeedCall(CeedVectorDestroy(&index_vec));
474 
475   // Determine i, j locations for element matrices
476   CeedInt count = 0;
477   for (CeedInt e = 0; e < num_elem; e++) {
478     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
479       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
480         for (CeedInt i = 0; i < elem_size; i++) {
481           for (CeedInt j = 0; j < elem_size; j++) {
482             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
483             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
484 
485             const CeedInt row = elem_dof_a[elem_dof_index_row];
486             const CeedInt col = elem_dof_a[elem_dof_index_col];
487 
488             rows[offset + count] = row;
489             cols[offset + count] = col;
490             count++;
491           }
492         }
493       }
494     }
495   }
496   if (count != local_num_entries) {
497     // LCOV_EXCL_START
498     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
499     // LCOV_EXCL_STOP
500   }
501   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
502   CeedCall(CeedVectorDestroy(&elem_dof));
503 
504   return CEED_ERROR_SUCCESS;
505 }
506 
507 /**
508   @brief Assemble nonzero entries for non-composite operator
509 
510   Users should generally use CeedOperatorLinearAssemble()
511 
512   @param[in]  op     CeedOperator to assemble
513   @param[in]  offset Offset for number of entries
514   @param[out] values Values to assemble into matrix
515 
516   @return An error code: 0 - success, otherwise - failure
517 
518   @ref Developer
519 **/
520 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
521   Ceed ceed;
522   bool is_composite;
523   CeedCall(CeedOperatorGetCeed(op, &ceed));
524   CeedCall(CeedOperatorIsComposite(op, &is_composite));
525 
526   if (is_composite) {
527     // LCOV_EXCL_START
528     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
529     // LCOV_EXCL_STOP
530   }
531 
532   // Early exit for empty operator
533   {
534     CeedInt num_elem = 0;
535 
536     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
537     if (num_elem == 0) return CEED_ERROR_SUCCESS;
538   }
539 
540   if (op->LinearAssembleSingle) {
541     // Backend version
542     CeedCall(op->LinearAssembleSingle(op, offset, values));
543     return CEED_ERROR_SUCCESS;
544   } else {
545     // Operator fallback
546     CeedOperator op_fallback;
547 
548     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
549     if (op_fallback) {
550       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
551       return CEED_ERROR_SUCCESS;
552     }
553   }
554 
555   // Assemble QFunction
556   CeedQFunction qf;
557   CeedCall(CeedOperatorGetQFunction(op, &qf));
558   CeedVector          assembled_qf;
559   CeedElemRestriction rstr_q;
560   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
561   CeedSize qf_length;
562   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
563 
564   CeedInt            num_input_fields, num_output_fields;
565   CeedOperatorField *input_fields;
566   CeedOperatorField *output_fields;
567   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
568 
569   // Get assembly data
570   CeedOperatorAssemblyData data;
571   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
572   const CeedEvalMode *eval_mode_in, *eval_mode_out;
573   CeedInt             num_eval_mode_in, num_eval_mode_out;
574   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
575   CeedBasis basis_in, basis_out;
576   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
577 
578   if (num_eval_mode_in == 0 || num_eval_mode_out == 0) {
579     // LCOV_EXCL_START
580     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
581     // LCOV_EXCL_STOP
582   }
583 
584   CeedElemRestriction active_rstr;
585   CeedInt             num_elem, elem_size, num_qpts, num_comp;
586   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
587   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
588   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
589   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
590   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
591 
592   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
593 
594   // loop over elements and put in data structure
595   const CeedScalar *interp_in, *grad_in;
596   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
597   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
598 
599   const CeedScalar *assembled_qf_array;
600   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
601 
602   CeedInt layout_qf[3];
603   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
604   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
605 
606   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
607   const CeedScalar *B_mat_in, *B_mat_out;
608   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &B_mat_in, NULL, &B_mat_out));
609   CeedScalar  BTD_mat[elem_size * num_qpts * num_eval_mode_in];
610   CeedScalar  elem_mat[elem_size * elem_size];
611   CeedInt     count = 0;
612   CeedScalar *vals;
613   CeedCall(CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals));
614   for (CeedInt e = 0; e < num_elem; e++) {
615     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
616       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
617         // Compute B^T*D
618         for (CeedInt n = 0; n < elem_size; n++) {
619           for (CeedInt q = 0; q < num_qpts; q++) {
620             for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
621               const CeedInt btd_index = n * (num_qpts * num_eval_mode_in) + (num_eval_mode_in * q + e_in);
622               CeedScalar    sum       = 0.0;
623               for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
624                 const CeedInt b_out_index     = (num_eval_mode_out * q + e_out) * elem_size + n;
625                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_mode_out + e_out) * num_comp + comp_out;
626                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
627                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
628               }
629               BTD_mat[btd_index] = sum;
630             }
631           }
632         }
633         // form element matrix itself (for each block component)
634         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_mode_in));
635 
636         // put element matrix in coordinate data structure
637         for (CeedInt i = 0; i < elem_size; i++) {
638           for (CeedInt j = 0; j < elem_size; j++) {
639             vals[offset + count] = elem_mat[i * elem_size + j];
640             count++;
641           }
642         }
643       }
644     }
645   }
646   if (count != local_num_entries) {
647     // LCOV_EXCL_START
648     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
649     // LCOV_EXCL_STOP
650   }
651   CeedCall(CeedVectorRestoreArray(values, &vals));
652 
653   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
654   CeedCall(CeedVectorDestroy(&assembled_qf));
655 
656   return CEED_ERROR_SUCCESS;
657 }
658 
659 /**
660   @brief Count number of entries for assembled CeedOperator
661 
662   @param[in]  op          CeedOperator to assemble
663   @param[out] num_entries Number of entries in assembled representation
664 
665   @return An error code: 0 - success, otherwise - failure
666 
667   @ref Utility
668 **/
669 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
670   bool                is_composite;
671   CeedElemRestriction rstr;
672   CeedInt             num_elem, elem_size, num_comp;
673 
674   CeedCall(CeedOperatorIsComposite(op, &is_composite));
675   if (is_composite) {
676     // LCOV_EXCL_START
677     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
678     // LCOV_EXCL_STOP
679   }
680   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
681   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
682   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
683   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
684   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
685 
686   return CEED_ERROR_SUCCESS;
687 }
688 
689 /**
690   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
691 
692   @param[in]  op_fine      Fine grid operator
693   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
694   @param[in]  rstr_coarse  Coarse grid restriction
695   @param[in]  basis_coarse Coarse grid active vector basis
696   @param[in]  basis_c_to_f Basis for coarse to fine interpolation
697   @param[out] op_coarse    Coarse grid operator
698   @param[out] op_prolong   Coarse to fine operator
699   @param[out] op_restrict  Fine to coarse operator
700 
701   @return An error code: 0 - success, otherwise - failure
702 
703   @ref Developer
704 **/
705 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
706                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
707   Ceed ceed;
708   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
709 
710   // Check for composite operator
711   bool is_composite;
712   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
713   if (is_composite) {
714     // LCOV_EXCL_START
715     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
716     // LCOV_EXCL_STOP
717   }
718 
719   // Coarse Grid
720   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
721   CeedElemRestriction rstr_fine = NULL;
722   // -- Clone input fields
723   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
724     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
725       rstr_fine = op_fine->input_fields[i]->elem_restr;
726       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
727     } else {
728       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_restr,
729                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
730     }
731   }
732   // -- Clone output fields
733   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
734     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
735       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
736     } else {
737       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_restr,
738                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
739     }
740   }
741   // -- Clone QFunctionAssemblyData
742   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
743 
744   // Multiplicity vector
745   CeedVector mult_vec, mult_e_vec;
746   CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
747   CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
748   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
749   CeedCall(CeedVectorSetValue(mult_vec, 0.0));
750   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
751   CeedCall(CeedVectorDestroy(&mult_e_vec));
752   CeedCall(CeedVectorReciprocal(mult_vec));
753 
754   // Clone name
755   bool   has_name = op_fine->name;
756   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
757   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
758 
759   // Restriction/Prolongation Operators
760 
761   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
762   if ((op_restrict || op_prolong) && !basis_c_to_f) {
763     // LCOV_EXCL_START
764     return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine basis");
765     // LCOV_EXCL_STOP
766   }
767 
768   CeedInt num_comp;
769   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
770 
771   // Restriction
772   if (op_restrict) {
773     CeedQFunction qf_restrict;
774     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
775     CeedInt *num_comp_r_data;
776     CeedCall(CeedCalloc(1, &num_comp_r_data));
777     num_comp_r_data[0] = num_comp;
778     CeedQFunctionContext ctx_r;
779     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
780     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
781     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
782     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
783     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
784     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
785     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
786     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
787 
788     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
789     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
790     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
791     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
792 
793     // Set name
794     char *restriction_name;
795     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
796     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
797     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
798     CeedCall(CeedFree(&restriction_name));
799 
800     // Check
801     CeedCall(CeedOperatorCheckReady(*op_restrict));
802 
803     // Cleanup
804     CeedCall(CeedQFunctionDestroy(&qf_restrict));
805   }
806 
807   // Prolongation
808   if (op_prolong) {
809     CeedQFunction qf_prolong;
810     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
811     CeedInt *num_comp_p_data;
812     CeedCall(CeedCalloc(1, &num_comp_p_data));
813     num_comp_p_data[0] = num_comp;
814     CeedQFunctionContext ctx_p;
815     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
816     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
817     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
818     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
819     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
820     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
821     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
822     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
823 
824     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
825     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
826     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
827     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
828 
829     // Set name
830     char *prolongation_name;
831     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
832     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
833     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
834     CeedCall(CeedFree(&prolongation_name));
835 
836     // Check
837     CeedCall(CeedOperatorCheckReady(*op_prolong));
838 
839     // Cleanup
840     CeedCall(CeedQFunctionDestroy(&qf_prolong));
841   }
842 
843   // Check
844   CeedCall(CeedOperatorCheckReady(*op_coarse));
845 
846   // Cleanup
847   CeedCall(CeedVectorDestroy(&mult_vec));
848   CeedCall(CeedBasisDestroy(&basis_c_to_f));
849 
850   return CEED_ERROR_SUCCESS;
851 }
852 
853 /**
854   @brief Build 1D mass matrix and Laplacian with perturbation
855 
856   @param[in]  interp_1d   Interpolation matrix in one dimension
857   @param[in]  grad_1d     Gradient matrix in one dimension
858   @param[in]  q_weight_1d Quadrature weights in one dimension
859   @param[in]  P_1d        Number of basis nodes in one dimension
860   @param[in]  Q_1d        Number of quadrature points in one dimension
861   @param[in]  dim         Dimension of basis
862   @param[out] mass        Assembled mass matrix in one dimension
863   @param[out] laplace     Assembled perturbed Laplacian in one dimension
864 
865   @return An error code: 0 - success, otherwise - failure
866 
867   @ref Developer
868 **/
869 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d,
870                                                       CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
871   for (CeedInt i = 0; i < P_1d; i++) {
872     for (CeedInt j = 0; j < P_1d; j++) {
873       CeedScalar sum = 0.0;
874       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
875       mass[i + j * P_1d] = sum;
876     }
877   }
878   // -- Laplacian
879   for (CeedInt i = 0; i < P_1d; i++) {
880     for (CeedInt j = 0; j < P_1d; j++) {
881       CeedScalar sum = 0.0;
882       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
883       laplace[i + j * P_1d] = sum;
884     }
885   }
886   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
887   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
888   return CEED_ERROR_SUCCESS;
889 }
890 CeedPragmaOptimizeOn;
891 
892 /// @}
893 
894 /// ----------------------------------------------------------------------------
895 /// CeedOperator Backend API
896 /// ----------------------------------------------------------------------------
897 /// @addtogroup CeedOperatorBackend
898 /// @{
899 
900 /**
901   @brief Create object holding CeedQFunction assembly data for CeedOperator
902 
903   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
904   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
905 
906   @return An error code: 0 - success, otherwise - failure
907 
908   @ref Backend
909 **/
910 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
911   CeedCall(CeedCalloc(1, data));
912   (*data)->ref_count = 1;
913   (*data)->ceed      = ceed;
914   CeedCall(CeedReference(ceed));
915 
916   return CEED_ERROR_SUCCESS;
917 }
918 
919 /**
920   @brief Increment the reference counter for a CeedQFunctionAssemblyData
921 
922   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
923 
924   @return An error code: 0 - success, otherwise - failure
925 
926   @ref Backend
927 **/
928 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
929   data->ref_count++;
930   return CEED_ERROR_SUCCESS;
931 }
932 
933 /**
934   @brief Set re-use of CeedQFunctionAssemblyData
935 
936   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
937   @param[in]     reuse_data Boolean flag indicating data re-use
938 
939   @return An error code: 0 - success, otherwise - failure
940 
941   @ref Backend
942 **/
943 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
944   data->reuse_data        = reuse_data;
945   data->needs_data_update = true;
946   return CEED_ERROR_SUCCESS;
947 }
948 
949 /**
950   @brief Mark QFunctionAssemblyData as stale
951 
952   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
953   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
954 
955   @return An error code: 0 - success, otherwise - failure
956 
957   @ref Backend
958 **/
959 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
960   data->needs_data_update = needs_data_update;
961   return CEED_ERROR_SUCCESS;
962 }
963 
964 /**
965   @brief Determine if QFunctionAssemblyData needs update
966 
967   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
968   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
969 
970   @return An error code: 0 - success, otherwise - failure
971 
972   @ref Backend
973 **/
974 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
975   *is_update_needed = !data->reuse_data || data->needs_data_update;
976   return CEED_ERROR_SUCCESS;
977 }
978 
979 /**
980   @brief Copy the pointer to a CeedQFunctionAssemblyData.
981            Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
982            Note: If `*data_copy` is non-NULL, then it is assumed that `*data_copy` is a pointer to a CeedQFunctionAssemblyData.
983              This CeedQFunctionAssemblyData will be destroyed if `*data_copy` is the only reference to this CeedQFunctionAssemblyData.
984 
985   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
986   @param[in,out] data_copy Variable to store copied reference
987 
988   @return An error code: 0 - success, otherwise - failure
989 
990   @ref Backend
991 **/
992 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
993   CeedCall(CeedQFunctionAssemblyDataReference(data));
994   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
995   *data_copy = data;
996   return CEED_ERROR_SUCCESS;
997 }
998 
999 /**
1000   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1001 
1002   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1003   @param[out] is_setup Boolean flag for setup status
1004 
1005   @return An error code: 0 - success, otherwise - failure
1006 
1007   @ref Backend
1008 **/
1009 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1010   *is_setup = data->is_setup;
1011   return CEED_ERROR_SUCCESS;
1012 }
1013 
1014 /**
1015   @brief Set internal objects for CeedQFunctionAssemblyData
1016 
1017   @param[in,out] data CeedQFunctionAssemblyData to set objects
1018   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1019   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1020 
1021   @return An error code: 0 - success, otherwise - failure
1022 
1023   @ref Backend
1024 **/
1025 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1026   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1027   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1028 
1029   data->is_setup = true;
1030   return CEED_ERROR_SUCCESS;
1031 }
1032 
1033 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1034   if (!data->is_setup) {
1035     // LCOV_EXCL_START
1036     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1037     // LCOV_EXCL_STOP
1038   }
1039 
1040   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1041   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1042 
1043   return CEED_ERROR_SUCCESS;
1044 }
1045 
1046 /**
1047   @brief Destroy CeedQFunctionAssemblyData
1048 
1049   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1050 
1051   @return An error code: 0 - success, otherwise - failure
1052 
1053   @ref Backend
1054 **/
1055 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1056   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1057 
1058   CeedCall(CeedDestroy(&(*data)->ceed));
1059   CeedCall(CeedVectorDestroy(&(*data)->vec));
1060   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1061 
1062   CeedCall(CeedFree(data));
1063   return CEED_ERROR_SUCCESS;
1064 }
1065 
1066 /**
1067   @brief Get CeedOperatorAssemblyData
1068 
1069   @param[in]  op   CeedOperator to assemble
1070   @param[out] data CeedQFunctionAssemblyData
1071 
1072   @return An error code: 0 - success, otherwise - failure
1073 
1074   @ref Backend
1075 **/
1076 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1077   if (!op->op_assembled) {
1078     CeedOperatorAssemblyData data;
1079 
1080     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1081     op->op_assembled = data;
1082   }
1083   *data = op->op_assembled;
1084 
1085   return CEED_ERROR_SUCCESS;
1086 }
1087 
1088 /**
1089   @brief Create object holding CeedOperator assembly data
1090 
1091   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1092   @param[in]  op   CeedOperator to be assembled
1093   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1094 
1095   @return An error code: 0 - success, otherwise - failure
1096 
1097   @ref Backend
1098 **/
1099 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1100   CeedCall(CeedCalloc(1, data));
1101   (*data)->ceed = ceed;
1102   CeedCall(CeedReference(ceed));
1103 
1104   // Build OperatorAssembly data
1105   CeedQFunction       qf;
1106   CeedQFunctionField *qf_fields;
1107   CeedOperatorField  *op_fields;
1108   CeedInt             num_input_fields;
1109   CeedCall(CeedOperatorGetQFunction(op, &qf));
1110   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1111   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1112 
1113   // Determine active input basis
1114   CeedInt       num_eval_mode_in = 0, dim = 1;
1115   CeedEvalMode *eval_mode_in = NULL;
1116   CeedBasis     basis_in     = NULL;
1117   for (CeedInt i = 0; i < num_input_fields; i++) {
1118     CeedVector vec;
1119     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1120     if (vec == CEED_VECTOR_ACTIVE) {
1121       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1122       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1123       CeedEvalMode eval_mode;
1124       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1125       switch (eval_mode) {
1126         case CEED_EVAL_NONE:
1127         case CEED_EVAL_INTERP:
1128           CeedCall(CeedRealloc(num_eval_mode_in + 1, &eval_mode_in));
1129           eval_mode_in[num_eval_mode_in] = eval_mode;
1130           num_eval_mode_in += 1;
1131           break;
1132         case CEED_EVAL_GRAD:
1133           CeedCall(CeedRealloc(num_eval_mode_in + dim, &eval_mode_in));
1134           for (CeedInt d = 0; d < dim; d++) {
1135             eval_mode_in[num_eval_mode_in + d] = eval_mode;
1136           }
1137           num_eval_mode_in += dim;
1138           break;
1139         case CEED_EVAL_WEIGHT:
1140         case CEED_EVAL_DIV:
1141         case CEED_EVAL_CURL:
1142           break;  // Caught by QF Assembly
1143       }
1144     }
1145   }
1146   (*data)->num_eval_mode_in = num_eval_mode_in;
1147   (*data)->eval_mode_in     = eval_mode_in;
1148   CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->basis_in));
1149 
1150   // Determine active output basis
1151   CeedInt num_output_fields;
1152   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1153   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1154   CeedInt       num_eval_mode_out = 0;
1155   CeedEvalMode *eval_mode_out     = NULL;
1156   CeedBasis     basis_out         = NULL;
1157   for (CeedInt i = 0; i < num_output_fields; i++) {
1158     CeedVector vec;
1159     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1160     if (vec == CEED_VECTOR_ACTIVE) {
1161       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1162       CeedEvalMode eval_mode;
1163       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1164       switch (eval_mode) {
1165         case CEED_EVAL_NONE:
1166         case CEED_EVAL_INTERP:
1167           CeedCall(CeedRealloc(num_eval_mode_out + 1, &eval_mode_out));
1168           eval_mode_out[num_eval_mode_out] = eval_mode;
1169           num_eval_mode_out += 1;
1170           break;
1171         case CEED_EVAL_GRAD:
1172           CeedCall(CeedRealloc(num_eval_mode_out + dim, &eval_mode_out));
1173           for (CeedInt d = 0; d < dim; d++) {
1174             eval_mode_out[num_eval_mode_out + d] = eval_mode;
1175           }
1176           num_eval_mode_out += dim;
1177           break;
1178         case CEED_EVAL_WEIGHT:
1179         case CEED_EVAL_DIV:
1180         case CEED_EVAL_CURL:
1181           break;  // Caught by QF Assembly
1182       }
1183     }
1184   }
1185   (*data)->num_eval_mode_out = num_eval_mode_out;
1186   (*data)->eval_mode_out     = eval_mode_out;
1187   CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->basis_out));
1188 
1189   return CEED_ERROR_SUCCESS;
1190 }
1191 
1192 /**
1193   @brief Get CeedOperator CeedEvalModes for assembly
1194 
1195   @param[in]  data              CeedOperatorAssemblyData
1196   @param[out] num_eval_mode_in  Pointer to hold number of input CeedEvalModes, or NULL
1197   @param[out] eval_mode_in      Pointer to hold input CeedEvalModes, or NULL
1198   @param[out] num_eval_mode_out Pointer to hold number of output CeedEvalModes, or NULL
1199   @param[out] eval_mode_out     Pointer to hold output CeedEvalModes, or NULL
1200 
1201   @return An error code: 0 - success, otherwise - failure
1202 
1203   @ref Backend
1204 **/
1205 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_eval_mode_in, const CeedEvalMode **eval_mode_in,
1206                                          CeedInt *num_eval_mode_out, const CeedEvalMode **eval_mode_out) {
1207   if (num_eval_mode_in) *num_eval_mode_in = data->num_eval_mode_in;
1208   if (eval_mode_in) *eval_mode_in = data->eval_mode_in;
1209   if (num_eval_mode_out) *num_eval_mode_out = data->num_eval_mode_out;
1210   if (eval_mode_out) *eval_mode_out = data->eval_mode_out;
1211 
1212   return CEED_ERROR_SUCCESS;
1213 }
1214 
1215 /**
1216   @brief Get CeedOperator CeedBasis data for assembly
1217 
1218   @param[in]  data      CeedOperatorAssemblyData
1219   @param[out] basis_in  Pointer to hold active input CeedBasis, or NULL
1220   @param[out] B_in      Pointer to hold assembled active input B, or NULL
1221   @param[out] basis_out Pointer to hold active output CeedBasis, or NULL
1222   @param[out] B_out     Pointer to hold assembled active output B, or NULL
1223 
1224   @return An error code: 0 - success, otherwise - failure
1225 
1226   @ref Backend
1227 **/
1228 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedBasis *basis_in, const CeedScalar **B_in, CeedBasis *basis_out,
1229                                      const CeedScalar **B_out) {
1230   // Assemble B_in, B_out if needed
1231   if (B_in && !data->B_in) {
1232     CeedInt           num_qpts, elem_size;
1233     CeedScalar       *B_in, *identity = NULL;
1234     const CeedScalar *interp_in, *grad_in;
1235     bool              has_eval_none = false;
1236 
1237     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_in, &num_qpts));
1238     CeedCall(CeedBasisGetNumNodes(data->basis_in, &elem_size));
1239     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_in, &B_in));
1240 
1241     for (CeedInt i = 0; i < data->num_eval_mode_in; i++) {
1242       has_eval_none = has_eval_none || (data->eval_mode_in[i] == CEED_EVAL_NONE);
1243     }
1244     if (has_eval_none) {
1245       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1246       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1247         identity[i * elem_size + i] = 1.0;
1248       }
1249     }
1250     CeedCall(CeedBasisGetInterp(data->basis_in, &interp_in));
1251     CeedCall(CeedBasisGetGrad(data->basis_in, &grad_in));
1252 
1253     for (CeedInt q = 0; q < num_qpts; q++) {
1254       for (CeedInt n = 0; n < elem_size; n++) {
1255         CeedInt d_in = -1;
1256         for (CeedInt e_in = 0; e_in < data->num_eval_mode_in; e_in++) {
1257           const CeedInt     qq = data->num_eval_mode_in * q;
1258           const CeedScalar *b  = NULL;
1259 
1260           if (data->eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in++;
1261           CeedOperatorGetBasisPointer(data->eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &b);
1262           B_in[(qq + e_in) * elem_size + n] = b[q * elem_size + n];
1263         }
1264       }
1265     }
1266     data->B_in = B_in;
1267   }
1268 
1269   if (B_out && !data->B_out) {
1270     CeedInt           num_qpts, elem_size;
1271     CeedScalar       *B_out, *identity = NULL;
1272     const CeedScalar *interp_out, *grad_out;
1273     bool              has_eval_none = false;
1274 
1275     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_out, &num_qpts));
1276     CeedCall(CeedBasisGetNumNodes(data->basis_out, &elem_size));
1277     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_out, &B_out));
1278 
1279     for (CeedInt i = 0; i < data->num_eval_mode_out; i++) {
1280       has_eval_none = has_eval_none || (data->eval_mode_out[i] == CEED_EVAL_NONE);
1281     }
1282     if (has_eval_none) {
1283       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1284       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1285         identity[i * elem_size + i] = 1.0;
1286       }
1287     }
1288     CeedCall(CeedBasisGetInterp(data->basis_out, &interp_out));
1289     CeedCall(CeedBasisGetGrad(data->basis_out, &grad_out));
1290 
1291     for (CeedInt q = 0; q < num_qpts; q++) {
1292       for (CeedInt n = 0; n < elem_size; n++) {
1293         CeedInt d_out = -1;
1294         for (CeedInt e_out = 0; e_out < data->num_eval_mode_out; e_out++) {
1295           const CeedInt     qq = data->num_eval_mode_out * q;
1296           const CeedScalar *b  = NULL;
1297 
1298           if (data->eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out++;
1299           CeedOperatorGetBasisPointer(data->eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &b);
1300           B_out[(qq + e_out) * elem_size + n] = b[q * elem_size + n];
1301         }
1302       }
1303     }
1304     data->B_out = B_out;
1305   }
1306 
1307   if (basis_in) *basis_in = data->basis_in;
1308   if (B_in) *B_in = data->B_in;
1309   if (basis_out) *basis_out = data->basis_out;
1310   if (B_out) *B_out = data->B_out;
1311 
1312   return CEED_ERROR_SUCCESS;
1313 }
1314 
1315 /**
1316   @brief Destroy CeedOperatorAssemblyData
1317 
1318   @param[in,out] data CeedOperatorAssemblyData to destroy
1319 
1320   @return An error code: 0 - success, otherwise - failure
1321 
1322   @ref Backend
1323 **/
1324 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1325   if (!*data) return CEED_ERROR_SUCCESS;
1326 
1327   CeedCall(CeedDestroy(&(*data)->ceed));
1328   CeedCall(CeedBasisDestroy(&(*data)->basis_in));
1329   CeedCall(CeedBasisDestroy(&(*data)->basis_out));
1330   CeedCall(CeedFree(&(*data)->eval_mode_in));
1331   CeedCall(CeedFree(&(*data)->eval_mode_out));
1332   CeedCall(CeedFree(&(*data)->B_in));
1333   CeedCall(CeedFree(&(*data)->B_out));
1334 
1335   CeedCall(CeedFree(data));
1336   return CEED_ERROR_SUCCESS;
1337 }
1338 
1339 /// @}
1340 
1341 /// ----------------------------------------------------------------------------
1342 /// CeedOperator Public API
1343 /// ----------------------------------------------------------------------------
1344 /// @addtogroup CeedOperatorUser
1345 /// @{
1346 
1347 /**
1348   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1349 
1350   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1351     The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices
1352 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the
1353 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1354 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1355 and producing the output [dv_0, dv_1, v].
1356 
1357   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1358 
1359   @param[in]  op        CeedOperator to assemble CeedQFunction
1360   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1361   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1362   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1363 
1364   @return An error code: 0 - success, otherwise - failure
1365 
1366   @ref User
1367 **/
1368 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1369   CeedCall(CeedOperatorCheckReady(op));
1370 
1371   if (op->LinearAssembleQFunction) {
1372     // Backend version
1373     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1374   } else {
1375     // Operator fallback
1376     CeedOperator op_fallback;
1377 
1378     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1379     if (op_fallback) {
1380       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1381     } else {
1382       // LCOV_EXCL_START
1383       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1384       // LCOV_EXCL_STOP
1385     }
1386   }
1387   return CEED_ERROR_SUCCESS;
1388 }
1389 
1390 /**
1391   @brief Assemble CeedQFunction and store result internally.
1392            Return copied references of stored data to the caller.
1393            Caller is responsible for ownership and destruction of the copied references.
1394            See also @ref CeedOperatorLinearAssembleQFunction
1395 
1396   @param[in]  op        CeedOperator to assemble CeedQFunction
1397   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1398   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1399   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1400 
1401   @return An error code: 0 - success, otherwise - failure
1402 
1403   @ref User
1404 **/
1405 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1406   CeedCall(CeedOperatorCheckReady(op));
1407 
1408   if (op->LinearAssembleQFunctionUpdate) {
1409     // Backend version
1410     bool                qf_assembled_is_setup;
1411     CeedVector          assembled_vec  = NULL;
1412     CeedElemRestriction assembled_rstr = NULL;
1413 
1414     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1415     if (qf_assembled_is_setup) {
1416       bool update_needed;
1417 
1418       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1419       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1420       if (update_needed) {
1421         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1422       }
1423     } else {
1424       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1425       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1426     }
1427     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1428 
1429     // Copy reference from internally held copy
1430     *assembled = NULL;
1431     *rstr      = NULL;
1432     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1433     CeedCall(CeedVectorDestroy(&assembled_vec));
1434     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1435     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1436   } else {
1437     // Operator fallback
1438     CeedOperator op_fallback;
1439 
1440     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1441     if (op_fallback) {
1442       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1443     } else {
1444       // LCOV_EXCL_START
1445       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1446       // LCOV_EXCL_STOP
1447     }
1448   }
1449 
1450   return CEED_ERROR_SUCCESS;
1451 }
1452 
1453 /**
1454   @brief Assemble the diagonal of a square linear CeedOperator
1455 
1456   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1457 
1458   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1459 
1460   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1461 
1462   @param[in]  op        CeedOperator to assemble CeedQFunction
1463   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1464   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1465 
1466   @return An error code: 0 - success, otherwise - failure
1467 
1468   @ref User
1469 **/
1470 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1471   bool is_composite;
1472   CeedCall(CeedOperatorCheckReady(op));
1473   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1474 
1475   CeedSize input_size = 0, output_size = 0;
1476   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1477   if (input_size != output_size) {
1478     // LCOV_EXCL_START
1479     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1480     // LCOV_EXCL_STOP
1481   }
1482 
1483   // Early exit for empty operator
1484   if (!is_composite) {
1485     CeedInt num_elem = 0;
1486 
1487     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1488     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1489   }
1490 
1491   if (op->LinearAssembleDiagonal) {
1492     // Backend version
1493     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1494     return CEED_ERROR_SUCCESS;
1495   } else if (op->LinearAssembleAddDiagonal) {
1496     // Backend version with zeroing first
1497     CeedCall(CeedVectorSetValue(assembled, 0.0));
1498     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1499     return CEED_ERROR_SUCCESS;
1500   } else {
1501     // Operator fallback
1502     CeedOperator op_fallback;
1503 
1504     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1505     if (op_fallback) {
1506       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1507       return CEED_ERROR_SUCCESS;
1508     }
1509   }
1510   // Default interface implementation
1511   CeedCall(CeedVectorSetValue(assembled, 0.0));
1512   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1513 
1514   return CEED_ERROR_SUCCESS;
1515 }
1516 
1517 /**
1518   @brief Assemble the diagonal of a square linear CeedOperator
1519 
1520   This sums into a CeedVector the diagonal of a linear CeedOperator.
1521 
1522   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1523 
1524   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1525 
1526   @param[in]  op        CeedOperator to assemble CeedQFunction
1527   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1528   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1529 
1530   @return An error code: 0 - success, otherwise - failure
1531 
1532   @ref User
1533 **/
1534 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1535   bool is_composite;
1536   CeedCall(CeedOperatorCheckReady(op));
1537   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1538 
1539   CeedSize input_size = 0, output_size = 0;
1540   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1541   if (input_size != output_size) {
1542     // LCOV_EXCL_START
1543     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1544     // LCOV_EXCL_STOP
1545   }
1546 
1547   // Early exit for empty operator
1548   if (!is_composite) {
1549     CeedInt num_elem = 0;
1550 
1551     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1552     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1553   }
1554 
1555   if (op->LinearAssembleAddDiagonal) {
1556     // Backend version
1557     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1558     return CEED_ERROR_SUCCESS;
1559   } else {
1560     // Operator fallback
1561     CeedOperator op_fallback;
1562 
1563     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1564     if (op_fallback) {
1565       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1566       return CEED_ERROR_SUCCESS;
1567     }
1568   }
1569   // Default interface implementation
1570   if (is_composite) {
1571     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1572   } else {
1573     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1574   }
1575 
1576   return CEED_ERROR_SUCCESS;
1577 }
1578 
1579 /**
1580   @brief Assemble the point block diagonal of a square linear CeedOperator
1581 
1582   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1583 
1584   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1585 
1586   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1587 
1588   @param[in]  op        CeedOperator to assemble CeedQFunction
1589   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1590 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1591 component in].
1592   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1593 
1594   @return An error code: 0 - success, otherwise - failure
1595 
1596   @ref User
1597 **/
1598 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1599   bool is_composite;
1600   CeedCall(CeedOperatorCheckReady(op));
1601   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1602 
1603   CeedSize input_size = 0, output_size = 0;
1604   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1605   if (input_size != output_size) {
1606     // LCOV_EXCL_START
1607     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1608     // LCOV_EXCL_STOP
1609   }
1610 
1611   // Early exit for empty operator
1612   if (!is_composite) {
1613     CeedInt num_elem = 0;
1614 
1615     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1616     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1617   }
1618 
1619   if (op->LinearAssemblePointBlockDiagonal) {
1620     // Backend version
1621     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1622     return CEED_ERROR_SUCCESS;
1623   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1624     // Backend version with zeroing first
1625     CeedCall(CeedVectorSetValue(assembled, 0.0));
1626     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1627     return CEED_ERROR_SUCCESS;
1628   } else {
1629     // Operator fallback
1630     CeedOperator op_fallback;
1631 
1632     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1633     if (op_fallback) {
1634       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1635       return CEED_ERROR_SUCCESS;
1636     }
1637   }
1638   // Default interface implementation
1639   CeedCall(CeedVectorSetValue(assembled, 0.0));
1640   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1641 
1642   return CEED_ERROR_SUCCESS;
1643 }
1644 
1645 /**
1646   @brief Assemble the point block diagonal of a square linear CeedOperator
1647 
1648   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1649 
1650   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1651 
1652   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1653 
1654   @param[in]  op        CeedOperator to assemble CeedQFunction
1655   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1656 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1657 component in].
1658   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1659 
1660   @return An error code: 0 - success, otherwise - failure
1661 
1662   @ref User
1663 **/
1664 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1665   bool is_composite;
1666   CeedCall(CeedOperatorCheckReady(op));
1667   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1668 
1669   CeedSize input_size = 0, output_size = 0;
1670   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1671   if (input_size != output_size) {
1672     // LCOV_EXCL_START
1673     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1674     // LCOV_EXCL_STOP
1675   }
1676 
1677   // Early exit for empty operator
1678   if (!is_composite) {
1679     CeedInt num_elem = 0;
1680 
1681     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1682     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1683   }
1684 
1685   if (op->LinearAssembleAddPointBlockDiagonal) {
1686     // Backend version
1687     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1688     return CEED_ERROR_SUCCESS;
1689   } else {
1690     // Operator fallback
1691     CeedOperator op_fallback;
1692 
1693     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1694     if (op_fallback) {
1695       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1696       return CEED_ERROR_SUCCESS;
1697     }
1698   }
1699   // Default interface implementation
1700   if (is_composite) {
1701     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1702   } else {
1703     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1704   }
1705 
1706   return CEED_ERROR_SUCCESS;
1707 }
1708 
1709 /**
1710    @brief Fully assemble the nonzero pattern of a linear operator.
1711 
1712    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1713 
1714    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1715 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1716 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1717 
1718    This will generally be slow unless your operator is low-order.
1719 
1720    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1721 
1722    @param[in]  op          CeedOperator to assemble
1723    @param[out] num_entries Number of entries in coordinate nonzero pattern
1724    @param[out] rows        Row number for each entry
1725    @param[out] cols        Column number for each entry
1726 
1727    @ref User
1728 **/
1729 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1730   CeedInt       num_suboperators, single_entries;
1731   CeedOperator *sub_operators;
1732   bool          is_composite;
1733   CeedCall(CeedOperatorCheckReady(op));
1734   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1735 
1736   if (op->LinearAssembleSymbolic) {
1737     // Backend version
1738     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1739     return CEED_ERROR_SUCCESS;
1740   } else {
1741     // Operator fallback
1742     CeedOperator op_fallback;
1743 
1744     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1745     if (op_fallback) {
1746       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1747       return CEED_ERROR_SUCCESS;
1748     }
1749   }
1750 
1751   // Default interface implementation
1752 
1753   // count entries and allocate rows, cols arrays
1754   *num_entries = 0;
1755   if (is_composite) {
1756     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1757     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1758     for (CeedInt k = 0; k < num_suboperators; ++k) {
1759       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1760       *num_entries += single_entries;
1761     }
1762   } else {
1763     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1764     *num_entries += single_entries;
1765   }
1766   CeedCall(CeedCalloc(*num_entries, rows));
1767   CeedCall(CeedCalloc(*num_entries, cols));
1768 
1769   // assemble nonzero locations
1770   CeedInt offset = 0;
1771   if (is_composite) {
1772     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1773     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1774     for (CeedInt k = 0; k < num_suboperators; ++k) {
1775       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1776       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1777       offset += single_entries;
1778     }
1779   } else {
1780     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1781   }
1782 
1783   return CEED_ERROR_SUCCESS;
1784 }
1785 
1786 /**
1787    @brief Fully assemble the nonzero entries of a linear operator.
1788 
1789    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1790 
1791    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1792 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1793 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1794 
1795    This will generally be slow unless your operator is low-order.
1796 
1797    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1798 
1799    @param[in]  op     CeedOperator to assemble
1800    @param[out] values Values to assemble into matrix
1801 
1802    @ref User
1803 **/
1804 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1805   CeedInt       num_suboperators, single_entries = 0;
1806   CeedOperator *sub_operators;
1807   bool          is_composite;
1808   CeedCall(CeedOperatorCheckReady(op));
1809   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1810 
1811   // Early exit for empty operator
1812   if (!is_composite) {
1813     CeedInt num_elem = 0;
1814 
1815     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1816     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1817   }
1818 
1819   if (op->LinearAssemble) {
1820     // Backend version
1821     CeedCall(op->LinearAssemble(op, values));
1822     return CEED_ERROR_SUCCESS;
1823   } else {
1824     // Operator fallback
1825     CeedOperator op_fallback;
1826 
1827     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1828     if (op_fallback) {
1829       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
1830       return CEED_ERROR_SUCCESS;
1831     }
1832   }
1833 
1834   // Default interface implementation
1835   CeedInt offset = 0;
1836   if (is_composite) {
1837     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1838     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1839     for (CeedInt k = 0; k < num_suboperators; k++) {
1840       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
1841       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1842       offset += single_entries;
1843     }
1844   } else {
1845     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
1846   }
1847 
1848   return CEED_ERROR_SUCCESS;
1849 }
1850 
1851 /**
1852   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
1853 
1854   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1855 
1856   @param[in]  op               Composite CeedOperator
1857   @param[in]  num_skip_indices Number of suboperators to skip
1858   @param[in]  skip_indices     Array of indices of suboperators to skip
1859   @param[out] mult             Vector to store multiplicity (of size l_size)
1860 
1861   @return An error code: 0 - success, otherwise - failure
1862 
1863   @ref User
1864 **/
1865 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
1866   CeedCall(CeedOperatorCheckReady(op));
1867 
1868   Ceed                ceed;
1869   CeedInt             num_suboperators;
1870   CeedSize            l_vec_len;
1871   CeedScalar         *mult_array;
1872   CeedVector          ones_l_vec;
1873   CeedElemRestriction elem_restr;
1874   CeedOperator       *sub_operators;
1875 
1876   CeedCall(CeedOperatorGetCeed(op, &ceed));
1877 
1878   // Zero mult vector
1879   CeedCall(CeedVectorSetValue(mult, 0.0));
1880 
1881   // Get suboperators
1882   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1883   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1884   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
1885 
1886   // Work vector
1887   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
1888   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
1889   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
1890   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
1891 
1892   // Compute multiplicity across suboperators
1893   for (CeedInt i = 0; i < num_suboperators; i++) {
1894     const CeedScalar *sub_mult_array;
1895     CeedVector        sub_mult_l_vec, ones_e_vec;
1896 
1897     // -- Check for suboperator to skip
1898     for (CeedInt j = 0; j < num_skip_indices; j++) {
1899       if (skip_indices[j] == i) continue;
1900     }
1901 
1902     // -- Sub operator multiplicity
1903     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_restr));
1904     CeedCall(CeedElemRestrictionCreateVector(elem_restr, &sub_mult_l_vec, &ones_e_vec));
1905     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
1906     CeedCall(CeedElemRestrictionApply(elem_restr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
1907     CeedCall(CeedElemRestrictionApply(elem_restr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
1908     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
1909     // ---- Flag every node present in the current suboperator
1910     for (CeedInt j = 0; j < l_vec_len; j++) {
1911       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
1912     }
1913     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
1914     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
1915     CeedCall(CeedVectorDestroy(&ones_e_vec));
1916   }
1917   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
1918   CeedCall(CeedVectorDestroy(&ones_l_vec));
1919 
1920   return CEED_ERROR_SUCCESS;
1921 }
1922 
1923 /**
1924   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
1925 grid interpolation
1926 
1927   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1928 
1929   @param[in]  op_fine      Fine grid operator
1930   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
1931   @param[in]  rstr_coarse  Coarse grid restriction
1932   @param[in]  basis_coarse Coarse grid active vector basis
1933   @param[out] op_coarse    Coarse grid operator
1934   @param[out] op_prolong   Coarse to fine operator
1935   @param[out] op_restrict  Fine to coarse operator
1936 
1937   @return An error code: 0 - success, otherwise - failure
1938 
1939   @ref User
1940 **/
1941 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1942                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
1943   CeedCall(CeedOperatorCheckReady(op_fine));
1944 
1945   // Build prolongation matrix, if required
1946   CeedBasis basis_c_to_f = NULL;
1947   if (op_prolong || op_restrict) {
1948     CeedBasis basis_fine;
1949     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1950     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
1951   }
1952 
1953   // Core code
1954   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1955 
1956   return CEED_ERROR_SUCCESS;
1957 }
1958 
1959 /**
1960   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
1961 
1962   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1963 
1964   @param[in]  op_fine       Fine grid operator
1965   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
1966   @param[in]  rstr_coarse   Coarse grid restriction
1967   @param[in]  basis_coarse  Coarse grid active vector basis
1968   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
1969   @param[out] op_coarse     Coarse grid operator
1970   @param[out] op_prolong    Coarse to fine operator
1971   @param[out] op_restrict   Fine to coarse operator
1972 
1973   @return An error code: 0 - success, otherwise - failure
1974 
1975   @ref User
1976 **/
1977 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1978                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
1979                                              CeedOperator *op_restrict) {
1980   CeedCall(CeedOperatorCheckReady(op_fine));
1981   Ceed ceed;
1982   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1983 
1984   // Check for compatible quadrature spaces
1985   CeedBasis basis_fine;
1986   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1987   CeedInt Q_f, Q_c;
1988   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
1989   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
1990   if (Q_f != Q_c) {
1991     // LCOV_EXCL_START
1992     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
1993     // LCOV_EXCL_STOP
1994   }
1995 
1996   // Create coarse to fine basis, if required
1997   CeedBasis basis_c_to_f = NULL;
1998   if (op_prolong || op_restrict) {
1999     // Check if interpolation matrix is provided
2000     if (!interp_c_to_f) {
2001       // LCOV_EXCL_START
2002       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2003       // LCOV_EXCL_STOP
2004     }
2005     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2006     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2007     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2008     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2009     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2010     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2011     CeedScalar *q_ref, *q_weight, *grad;
2012     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2013     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2014     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2015     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2016     CeedCall(CeedFree(&q_ref));
2017     CeedCall(CeedFree(&q_weight));
2018     CeedCall(CeedFree(&grad));
2019   }
2020 
2021   // Core code
2022   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2023   return CEED_ERROR_SUCCESS;
2024 }
2025 
2026 /**
2027   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2028 
2029   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2030 
2031   @param[in]  op_fine       Fine grid operator
2032   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
2033   @param[in]  rstr_coarse   Coarse grid restriction
2034   @param[in]  basis_coarse  Coarse grid active vector basis
2035   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
2036   @param[out] op_coarse     Coarse grid operator
2037   @param[out] op_prolong    Coarse to fine operator
2038   @param[out] op_restrict   Fine to coarse operator
2039 
2040   @return An error code: 0 - success, otherwise - failure
2041 
2042   @ref User
2043 **/
2044 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2045                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2046                                        CeedOperator *op_restrict) {
2047   CeedCall(CeedOperatorCheckReady(op_fine));
2048   Ceed ceed;
2049   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2050 
2051   // Check for compatible quadrature spaces
2052   CeedBasis basis_fine;
2053   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2054   CeedInt Q_f, Q_c;
2055   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2056   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2057   if (Q_f != Q_c) {
2058     // LCOV_EXCL_START
2059     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2060     // LCOV_EXCL_STOP
2061   }
2062 
2063   // Coarse to fine basis
2064   CeedBasis basis_c_to_f = NULL;
2065   if (op_prolong || op_restrict) {
2066     // Check if interpolation matrix is provided
2067     if (!interp_c_to_f) {
2068       // LCOV_EXCL_START
2069       return CeedError(ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2070       // LCOV_EXCL_STOP
2071     }
2072     CeedElemTopology topo;
2073     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2074     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2075     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2076     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2077     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2078     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2079     CeedScalar *q_ref, *q_weight, *grad;
2080     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2081     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2082     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2083     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2084     CeedCall(CeedFree(&q_ref));
2085     CeedCall(CeedFree(&q_weight));
2086     CeedCall(CeedFree(&grad));
2087   }
2088 
2089   // Core code
2090   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2091   return CEED_ERROR_SUCCESS;
2092 }
2093 
2094 /**
2095   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2096 
2097   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2098     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V.
2099     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T
2100 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
2101 
2102   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2103 
2104   @param[in]  op      CeedOperator to create element inverses
2105   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2106   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2107 
2108   @return An error code: 0 - success, otherwise - failure
2109 
2110   @ref User
2111 **/
2112 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2113   CeedCall(CeedOperatorCheckReady(op));
2114 
2115   if (op->CreateFDMElementInverse) {
2116     // Backend version
2117     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2118     return CEED_ERROR_SUCCESS;
2119   } else {
2120     // Operator fallback
2121     CeedOperator op_fallback;
2122 
2123     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2124     if (op_fallback) {
2125       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2126       return CEED_ERROR_SUCCESS;
2127     }
2128   }
2129 
2130   // Default interface implementation
2131   Ceed ceed, ceed_parent;
2132   CeedCall(CeedOperatorGetCeed(op, &ceed));
2133   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2134   ceed_parent = ceed_parent ? ceed_parent : ceed;
2135   CeedQFunction qf;
2136   CeedCall(CeedOperatorGetQFunction(op, &qf));
2137 
2138   // Determine active input basis
2139   bool                interp = false, grad = false;
2140   CeedBasis           basis = NULL;
2141   CeedElemRestriction rstr  = NULL;
2142   CeedOperatorField  *op_fields;
2143   CeedQFunctionField *qf_fields;
2144   CeedInt             num_input_fields;
2145   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2146   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2147   for (CeedInt i = 0; i < num_input_fields; i++) {
2148     CeedVector vec;
2149     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2150     if (vec == CEED_VECTOR_ACTIVE) {
2151       CeedEvalMode eval_mode;
2152       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2153       interp = interp || eval_mode == CEED_EVAL_INTERP;
2154       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2155       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2156       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2157     }
2158   }
2159   if (!basis) {
2160     // LCOV_EXCL_START
2161     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2162     // LCOV_EXCL_STOP
2163   }
2164   CeedSize l_size = 1;
2165   CeedInt  P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2166   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2167   CeedCall(CeedBasisGetNumNodes(basis, &elem_size));
2168   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2169   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2170   CeedCall(CeedBasisGetDimension(basis, &dim));
2171   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2172   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2173   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2174 
2175   // Build and diagonalize 1D Mass and Laplacian
2176   bool tensor_basis;
2177   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2178   if (!tensor_basis) {
2179     // LCOV_EXCL_START
2180     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2181     // LCOV_EXCL_STOP
2182   }
2183   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2184   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2185   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2186   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2187   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2188   CeedCall(CeedCalloc(P_1d, &lambda));
2189   // -- Build matrices
2190   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2191   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2192   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2193   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2194   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2195 
2196   // -- Diagonalize
2197   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2198   CeedCall(CeedFree(&mass));
2199   CeedCall(CeedFree(&laplace));
2200   for (CeedInt i = 0; i < P_1d; i++) {
2201     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2202   }
2203   CeedCall(CeedFree(&x));
2204 
2205   // Assemble QFunction
2206   CeedVector          assembled;
2207   CeedElemRestriction rstr_qf;
2208   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2209   CeedInt layout[3];
2210   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2211   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2212   CeedScalar max_norm = 0;
2213   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2214 
2215   // Calculate element averages
2216   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2217   CeedScalar       *elem_avg;
2218   const CeedScalar *assembled_array, *q_weight_array;
2219   CeedVector        q_weight;
2220   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2221   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2222   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2223   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2224   CeedCall(CeedCalloc(num_elem, &elem_avg));
2225   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2226   for (CeedInt e = 0; e < num_elem; e++) {
2227     CeedInt count = 0;
2228     for (CeedInt q = 0; q < num_qpts; q++) {
2229       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2230         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2231           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2232           count++;
2233         }
2234       }
2235     }
2236     if (count) {
2237       elem_avg[e] /= count;
2238     } else {
2239       elem_avg[e] = 1.0;
2240     }
2241   }
2242   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2243   CeedCall(CeedVectorDestroy(&assembled));
2244   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2245   CeedCall(CeedVectorDestroy(&q_weight));
2246 
2247   // Build FDM diagonal
2248   CeedVector  q_data;
2249   CeedScalar *q_data_array, *fdm_diagonal;
2250   CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal));
2251   const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON;
2252   for (CeedInt c = 0; c < num_comp; c++) {
2253     for (CeedInt n = 0; n < elem_size; n++) {
2254       if (interp) fdm_diagonal[c * elem_size + n] = 1.0;
2255       if (grad) {
2256         for (CeedInt d = 0; d < dim; d++) {
2257           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2258           fdm_diagonal[c * elem_size + n] += lambda[i];
2259         }
2260       }
2261       if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound;
2262     }
2263   }
2264   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data));
2265   CeedCall(CeedVectorSetValue(q_data, 0.0));
2266   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2267   for (CeedInt e = 0; e < num_elem; e++) {
2268     for (CeedInt c = 0; c < num_comp; c++) {
2269       for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]);
2270     }
2271   }
2272   CeedCall(CeedFree(&elem_avg));
2273   CeedCall(CeedFree(&fdm_diagonal));
2274   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2275 
2276   // Setup FDM operator
2277   // -- Basis
2278   CeedBasis   fdm_basis;
2279   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2280   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2281   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2282   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2283   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2284   CeedCall(CeedFree(&fdm_interp));
2285   CeedCall(CeedFree(&grad_dummy));
2286   CeedCall(CeedFree(&q_ref_dummy));
2287   CeedCall(CeedFree(&q_weight_dummy));
2288   CeedCall(CeedFree(&lambda));
2289 
2290   // -- Restriction
2291   CeedElemRestriction rstr_qd_i;
2292   CeedInt             strides[3] = {1, elem_size, elem_size * num_comp};
2293   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i));
2294   // -- QFunction
2295   CeedQFunction qf_fdm;
2296   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2297   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2298   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2299   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2300   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2301   // -- QFunction context
2302   CeedInt *num_comp_data;
2303   CeedCall(CeedCalloc(1, &num_comp_data));
2304   num_comp_data[0] = num_comp;
2305   CeedQFunctionContext ctx_fdm;
2306   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2307   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2308   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2309   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2310   // -- Operator
2311   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2312   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2313   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2314   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2315 
2316   // Cleanup
2317   CeedCall(CeedVectorDestroy(&q_data));
2318   CeedCall(CeedBasisDestroy(&fdm_basis));
2319   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2320   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2321 
2322   return CEED_ERROR_SUCCESS;
2323 }
2324 
2325 /// @}
2326