xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision 3245a0eff8db3869c2aa15a690d2f19e36a414e4)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed/backend.h>
11 #include <ceed/ceed.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   Ceed ceed_fallback;
84 
85   // Check not already created
86   if (op->op_fallback) return CEED_ERROR_SUCCESS;
87 
88   // Fallback Ceed
89   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
90   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
91 
92   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
93   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
94 
95   // Clone Op
96   CeedOperator op_fallback;
97   if (op->is_composite) {
98     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
99     for (CeedInt i = 0; i < op->num_suboperators; i++) {
100       CeedOperator op_sub_fallback;
101 
102       CeedCall(CeedOperatorGetFallback(op->sub_operators[i], &op_sub_fallback));
103       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
104     }
105   } else {
106     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
107     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
108     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
109     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
110     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
111     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
112       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_restr, op->input_fields[i]->basis,
113                                     op->input_fields[i]->vec));
114     }
115     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
116       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_restr, op->output_fields[i]->basis,
117                                     op->output_fields[i]->vec));
118     }
119     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
120     if (op_fallback->num_qpts == 0) {
121       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
122     }
123     // Cleanup
124     CeedCall(CeedQFunctionDestroy(&qf_fallback));
125     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
126     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
127   }
128   CeedCall(CeedOperatorSetName(op_fallback, op->name));
129   CeedCall(CeedOperatorCheckReady(op_fallback));
130   op->op_fallback = op_fallback;
131 
132   return CEED_ERROR_SUCCESS;
133 }
134 
135 /**
136   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
137 
138   @param[in]  op          CeedOperator to retrieve fallback for
139   @param[out] op_fallback Fallback CeedOperator
140 
141   @return An error code: 0 - success, otherwise - failure
142 
143   @ref Developer
144 **/
145 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
146   // Create if needed
147   if (!op->op_fallback) {
148     CeedCall(CeedOperatorCreateFallback(op));
149   }
150   if (op->op_fallback) {
151     bool is_debug;
152 
153     CeedCall(CeedIsDebug(op->ceed, &is_debug));
154     if (is_debug) {
155       Ceed        ceed_fallback;
156       const char *resource, *resource_fallback;
157 
158       CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
159       CeedCall(CeedGetResource(op->ceed, &resource));
160       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
161 
162       CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
163       CeedDebug(op->ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
164                 op->op_fallback);
165     }
166   }
167   *op_fallback = op->op_fallback;
168 
169   return CEED_ERROR_SUCCESS;
170 }
171 
172 /**
173   @brief Select correct basis matrix pointer based on CeedEvalMode
174 
175   @param[in]  eval_mode Current basis evaluation mode
176   @param[in]  identity  Pointer to identity matrix
177   @param[in]  interp    Pointer to interpolation matrix
178   @param[in]  grad      Pointer to gradient matrix
179   @param[out] basis_ptr Basis pointer to set
180 
181   @ref Developer
182 **/
183 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad,
184                                                const CeedScalar **basis_ptr) {
185   switch (eval_mode) {
186     case CEED_EVAL_NONE:
187       *basis_ptr = identity;
188       break;
189     case CEED_EVAL_INTERP:
190       *basis_ptr = interp;
191       break;
192     case CEED_EVAL_GRAD:
193       *basis_ptr = grad;
194       break;
195     case CEED_EVAL_WEIGHT:
196     case CEED_EVAL_DIV:
197     case CEED_EVAL_CURL:
198       break;  // Caught by QF Assembly
199   }
200   assert(*basis_ptr != NULL);
201 }
202 
203 /**
204   @brief Create point block restriction for active operator field
205 
206   @param[in]  rstr            Original CeedElemRestriction for active field
207   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
208 
209   @return An error code: 0 - success, otherwise - failure
210 
211   @ref Developer
212 **/
213 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
214   Ceed ceed;
215   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
216   const CeedInt *offsets;
217   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
218 
219   // Expand offsets
220   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
221   CeedSize l_size;
222   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
223   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
224   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
225   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
226   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
227   CeedInt shift = num_comp;
228   if (comp_stride != 1) shift *= num_comp;
229   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
230   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
231     pointblock_offsets[i] = offsets[i] * shift;
232   }
233 
234   // Create new restriction
235   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
236                                      pointblock_offsets, pointblock_rstr));
237 
238   // Cleanup
239   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
240 
241   return CEED_ERROR_SUCCESS;
242 }
243 
244 /**
245   @brief Core logic for assembling operator diagonal or point block diagonal
246 
247   @param[in]  op            CeedOperator to assemble point block diagonal
248   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
249   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
250   @param[out] assembled     CeedVector to store assembled diagonal
251 
252   @return An error code: 0 - success, otherwise - failure
253 
254   @ref Developer
255 **/
256 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
257   Ceed ceed;
258   CeedCall(CeedOperatorGetCeed(op, &ceed));
259 
260   // Assemble QFunction
261   CeedQFunction qf;
262   CeedCall(CeedOperatorGetQFunction(op, &qf));
263   CeedInt num_input_fields, num_output_fields;
264   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
265   CeedVector          assembled_qf;
266   CeedElemRestriction rstr;
267   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr, request));
268   CeedInt layout[3];
269   CeedCall(CeedElemRestrictionGetELayout(rstr, &layout));
270   CeedCall(CeedElemRestrictionDestroy(&rstr));
271 
272   // Get assembly data
273   CeedOperatorAssemblyData data;
274   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
275   const CeedEvalMode *eval_mode_in, *eval_mode_out;
276   CeedInt             num_eval_mode_in, num_eval_mode_out;
277   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
278   CeedBasis basis_in, basis_out;
279   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
280   CeedInt num_comp;
281   CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
282 
283   // Assemble point block diagonal restriction, if needed
284   CeedElemRestriction diag_rstr;
285   CeedCall(CeedOperatorGetActiveElemRestriction(op, &diag_rstr));
286   if (is_pointblock) {
287     CeedElemRestriction point_block_rstr;
288     CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_rstr, &point_block_rstr));
289     diag_rstr = point_block_rstr;
290   }
291 
292   // Create diagonal vector
293   CeedVector elem_diag;
294   CeedCall(CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag));
295 
296   // Assemble element operator diagonals
297   CeedScalar       *elem_diag_array;
298   const CeedScalar *assembled_qf_array;
299   CeedCall(CeedVectorSetValue(elem_diag, 0.0));
300   CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
301   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
302   CeedInt num_elem, num_nodes, num_qpts;
303   CeedCall(CeedElemRestrictionGetNumElements(diag_rstr, &num_elem));
304   CeedCall(CeedBasisGetNumNodes(basis_in, &num_nodes));
305   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
306 
307   // Basis matrices
308   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
309   CeedScalar       *identity      = NULL;
310   bool              has_eval_none = false;
311   for (CeedInt i = 0; i < num_eval_mode_in; i++) {
312     has_eval_none = has_eval_none || (eval_mode_in[i] == CEED_EVAL_NONE);
313   }
314   for (CeedInt i = 0; i < num_eval_mode_out; i++) {
315     has_eval_none = has_eval_none || (eval_mode_out[i] == CEED_EVAL_NONE);
316   }
317   if (has_eval_none) {
318     CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
319     for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
320   }
321   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
322   CeedCall(CeedBasisGetInterp(basis_out, &interp_out));
323   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
324   CeedCall(CeedBasisGetGrad(basis_out, &grad_out));
325   // Compute the diagonal of B^T D B
326   // Each element
327   for (CeedInt e = 0; e < num_elem; e++) {
328     CeedInt d_out = -1;
329     // Each basis eval mode pair
330     for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
331       const CeedScalar *bt = NULL;
332       if (eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out += 1;
333       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * num_nodes], &bt);
334       CeedInt d_in = -1;
335       for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
336         const CeedScalar *b = NULL;
337         if (eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in += 1;
338         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * num_nodes], &b);
339         // Each component
340         for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
341           // Each qpoint/node pair
342           for (CeedInt q = 0; q < num_qpts; q++) {
343             if (is_pointblock) {
344               // Point Block Diagonal
345               for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
346                 const CeedScalar qf_value =
347                     assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_in) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
348                                        e * layout[2]];
349                 for (CeedInt n = 0; n < num_nodes; n++) {
350                   elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
351                       bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
352                 }
353               }
354             } else {
355               // Diagonal Only
356               const CeedScalar qf_value =
357                   assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_out) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
358                                      e * layout[2]];
359               for (CeedInt n = 0; n < num_nodes; n++) {
360                 elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
361               }
362             }
363           }
364         }
365       }
366     }
367   }
368   CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
369   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
370 
371   // Assemble local operator diagonal
372   CeedCall(CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
373 
374   // Cleanup
375   if (is_pointblock) {
376     CeedCall(CeedElemRestrictionDestroy(&diag_rstr));
377   }
378   CeedCall(CeedVectorDestroy(&assembled_qf));
379   CeedCall(CeedVectorDestroy(&elem_diag));
380   CeedCall(CeedFree(&identity));
381 
382   return CEED_ERROR_SUCCESS;
383 }
384 
385 /**
386   @brief Core logic for assembling composite operator diagonal
387 
388   @param[in]  op            CeedOperator to assemble point block diagonal
389   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
390   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
391   @param[out] assembled     CeedVector to store assembled diagonal
392 
393   @return An error code: 0 - success, otherwise - failure
394 
395   @ref Developer
396 **/
397 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
398                                                                  CeedVector assembled) {
399   CeedInt       num_sub;
400   CeedOperator *suboperators;
401   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
402   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
403   for (CeedInt i = 0; i < num_sub; i++) {
404     if (is_pointblock) {
405       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
406     } else {
407       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
408     }
409   }
410   return CEED_ERROR_SUCCESS;
411 }
412 
413 /**
414   @brief Build nonzero pattern for non-composite operator
415 
416   Users should generally use CeedOperatorLinearAssembleSymbolic()
417 
418   @param[in]  op     CeedOperator to assemble nonzero pattern
419   @param[in]  offset Offset for number of entries
420   @param[out] rows   Row number for each entry
421   @param[out] cols   Column number for each entry
422 
423   @return An error code: 0 - success, otherwise - failure
424 
425   @ref Developer
426 **/
427 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
428   Ceed ceed = op->ceed;
429   if (op->is_composite) {
430     // LCOV_EXCL_START
431     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
432     // LCOV_EXCL_STOP
433   }
434 
435   CeedSize num_nodes;
436   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
437   CeedElemRestriction rstr_in;
438   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
439   CeedInt num_elem, elem_size, num_comp;
440   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
441   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
442   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
443   CeedInt layout_er[3];
444   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
445 
446   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
447 
448   // Determine elem_dof relation
449   CeedVector index_vec;
450   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
451   CeedScalar *array;
452   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
453   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
454   CeedCall(CeedVectorRestoreArray(index_vec, &array));
455   CeedVector elem_dof;
456   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
457   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
458   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
459   const CeedScalar *elem_dof_a;
460   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
461   CeedCall(CeedVectorDestroy(&index_vec));
462 
463   // Determine i, j locations for element matrices
464   CeedInt count = 0;
465   for (CeedInt e = 0; e < num_elem; e++) {
466     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
467       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
468         for (CeedInt i = 0; i < elem_size; i++) {
469           for (CeedInt j = 0; j < elem_size; j++) {
470             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
471             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
472 
473             const CeedInt row = elem_dof_a[elem_dof_index_row];
474             const CeedInt col = elem_dof_a[elem_dof_index_col];
475 
476             rows[offset + count] = row;
477             cols[offset + count] = col;
478             count++;
479           }
480         }
481       }
482     }
483   }
484   if (count != local_num_entries) {
485     // LCOV_EXCL_START
486     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
487     // LCOV_EXCL_STOP
488   }
489   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
490   CeedCall(CeedVectorDestroy(&elem_dof));
491 
492   return CEED_ERROR_SUCCESS;
493 }
494 
495 /**
496   @brief Assemble nonzero entries for non-composite operator
497 
498   Users should generally use CeedOperatorLinearAssemble()
499 
500   @param[in]  op     CeedOperator to assemble
501   @param[in]  offset Offset for number of entries
502   @param[out] values Values to assemble into matrix
503 
504   @return An error code: 0 - success, otherwise - failure
505 
506   @ref Developer
507 **/
508 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
509   Ceed ceed = op->ceed;
510   if (op->is_composite) {
511     // LCOV_EXCL_START
512     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
513     // LCOV_EXCL_STOP
514   }
515   if (op->num_elem == 0) return CEED_ERROR_SUCCESS;
516 
517   if (op->LinearAssembleSingle) {
518     // Backend version
519     CeedCall(op->LinearAssembleSingle(op, offset, values));
520     return CEED_ERROR_SUCCESS;
521   } else {
522     // Operator fallback
523     CeedOperator op_fallback;
524 
525     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
526     if (op_fallback) {
527       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
528       return CEED_ERROR_SUCCESS;
529     }
530   }
531 
532   // Assemble QFunction
533   CeedQFunction qf;
534   CeedCall(CeedOperatorGetQFunction(op, &qf));
535   CeedVector          assembled_qf;
536   CeedElemRestriction rstr_q;
537   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
538   CeedSize qf_length;
539   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
540 
541   CeedInt            num_input_fields, num_output_fields;
542   CeedOperatorField *input_fields;
543   CeedOperatorField *output_fields;
544   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
545 
546   // Get assembly data
547   CeedOperatorAssemblyData data;
548   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
549   const CeedEvalMode *eval_mode_in, *eval_mode_out;
550   CeedInt             num_eval_mode_in, num_eval_mode_out;
551   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
552   CeedBasis basis_in, basis_out;
553   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
554 
555   if (num_eval_mode_in == 0 || num_eval_mode_out == 0) {
556     // LCOV_EXCL_START
557     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
558     // LCOV_EXCL_STOP
559   }
560 
561   CeedElemRestriction active_rstr;
562   CeedInt             num_elem, elem_size, num_qpts, num_comp;
563   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
564   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
565   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
566   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
567   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
568 
569   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
570 
571   // loop over elements and put in data structure
572   const CeedScalar *interp_in, *grad_in;
573   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
574   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
575 
576   const CeedScalar *assembled_qf_array;
577   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
578 
579   CeedInt layout_qf[3];
580   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
581   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
582 
583   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
584   const CeedScalar *B_mat_in, *B_mat_out;
585   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &B_mat_in, NULL, &B_mat_out));
586   CeedScalar  BTD_mat[elem_size * num_qpts * num_eval_mode_in];
587   CeedScalar  elem_mat[elem_size * elem_size];
588   CeedInt     count = 0;
589   CeedScalar *vals;
590   CeedCall(CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals));
591   for (CeedInt e = 0; e < num_elem; e++) {
592     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
593       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
594         // Compute B^T*D
595         for (CeedInt n = 0; n < elem_size; n++) {
596           for (CeedInt q = 0; q < num_qpts; q++) {
597             for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
598               const CeedInt btd_index = n * (num_qpts * num_eval_mode_in) + (num_eval_mode_in * q + e_in);
599               CeedScalar    sum       = 0.0;
600               for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
601                 const CeedInt b_out_index     = (num_eval_mode_out * q + e_out) * elem_size + n;
602                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_mode_out + e_out) * num_comp + comp_out;
603                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
604                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
605               }
606               BTD_mat[btd_index] = sum;
607             }
608           }
609         }
610         // form element matrix itself (for each block component)
611         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_mode_in));
612 
613         // put element matrix in coordinate data structure
614         for (CeedInt i = 0; i < elem_size; i++) {
615           for (CeedInt j = 0; j < elem_size; j++) {
616             vals[offset + count] = elem_mat[i * elem_size + j];
617             count++;
618           }
619         }
620       }
621     }
622   }
623   if (count != local_num_entries) {
624     // LCOV_EXCL_START
625     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
626     // LCOV_EXCL_STOP
627   }
628   CeedCall(CeedVectorRestoreArray(values, &vals));
629 
630   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
631   CeedCall(CeedVectorDestroy(&assembled_qf));
632 
633   return CEED_ERROR_SUCCESS;
634 }
635 
636 /**
637   @brief Count number of entries for assembled CeedOperator
638 
639   @param[in]  op          CeedOperator to assemble
640   @param[out] num_entries Number of entries in assembled representation
641 
642   @return An error code: 0 - success, otherwise - failure
643 
644   @ref Utility
645 **/
646 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
647   CeedElemRestriction rstr;
648   CeedInt             num_elem, elem_size, num_comp;
649 
650   if (op->is_composite) {
651     // LCOV_EXCL_START
652     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
653     // LCOV_EXCL_STOP
654   }
655   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
656   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
657   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
658   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
659   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
660 
661   return CEED_ERROR_SUCCESS;
662 }
663 
664 /**
665   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
666 
667   @param[in]  op_fine      Fine grid operator
668   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
669   @param[in]  rstr_coarse  Coarse grid restriction
670   @param[in]  basis_coarse Coarse grid active vector basis
671   @param[in]  basis_c_to_f Basis for coarse to fine interpolation
672   @param[out] op_coarse    Coarse grid operator
673   @param[out] op_prolong   Coarse to fine operator
674   @param[out] op_restrict  Fine to coarse operator
675 
676   @return An error code: 0 - success, otherwise - failure
677 
678   @ref Developer
679 **/
680 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
681                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
682   Ceed ceed;
683   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
684 
685   // Check for composite operator
686   bool is_composite;
687   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
688   if (is_composite) {
689     // LCOV_EXCL_START
690     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
691     // LCOV_EXCL_STOP
692   }
693 
694   // Coarse Grid
695   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
696   CeedElemRestriction rstr_fine = NULL;
697   // -- Clone input fields
698   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
699     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
700       rstr_fine = op_fine->input_fields[i]->elem_restr;
701       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
702     } else {
703       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_restr,
704                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
705     }
706   }
707   // -- Clone output fields
708   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
709     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
710       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
711     } else {
712       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_restr,
713                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
714     }
715   }
716   // -- Clone QFunctionAssemblyData
717   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
718 
719   // Multiplicity vector
720   CeedVector mult_vec, mult_e_vec;
721   CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
722   CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
723   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
724   CeedCall(CeedVectorSetValue(mult_vec, 0.0));
725   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
726   CeedCall(CeedVectorDestroy(&mult_e_vec));
727   CeedCall(CeedVectorReciprocal(mult_vec));
728 
729   // Restriction
730   CeedInt num_comp;
731   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
732   CeedQFunction qf_restrict;
733   CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
734   CeedInt *num_comp_r_data;
735   CeedCall(CeedCalloc(1, &num_comp_r_data));
736   num_comp_r_data[0] = num_comp;
737   CeedQFunctionContext ctx_r;
738   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
739   CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
740   CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
741   CeedCall(CeedQFunctionContextDestroy(&ctx_r));
742   CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
743   CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
744   CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
745   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
746 
747   CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
748   CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
749   CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
750   CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
751 
752   // Prolongation
753   CeedQFunction qf_prolong;
754   CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
755   CeedInt *num_comp_p_data;
756   CeedCall(CeedCalloc(1, &num_comp_p_data));
757   num_comp_p_data[0] = num_comp;
758   CeedQFunctionContext ctx_p;
759   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
760   CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
761   CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
762   CeedCall(CeedQFunctionContextDestroy(&ctx_p));
763   CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
764   CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
765   CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
766   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
767 
768   CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
769   CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
770   CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
771   CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
772 
773   // Clone name
774   bool   has_name = op_fine->name;
775   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
776   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
777   {
778     char *prolongation_name;
779     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
780     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
781     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
782     CeedCall(CeedFree(&prolongation_name));
783   }
784   {
785     char *restriction_name;
786     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
787     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
788     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
789     CeedCall(CeedFree(&restriction_name));
790   }
791 
792   // Check
793   CeedCall(CeedOperatorCheckReady(*op_coarse));
794   CeedCall(CeedOperatorCheckReady(*op_prolong));
795   CeedCall(CeedOperatorCheckReady(*op_restrict));
796 
797   // Cleanup
798   CeedCall(CeedVectorDestroy(&mult_vec));
799   CeedCall(CeedBasisDestroy(&basis_c_to_f));
800   CeedCall(CeedQFunctionDestroy(&qf_restrict));
801   CeedCall(CeedQFunctionDestroy(&qf_prolong));
802 
803   return CEED_ERROR_SUCCESS;
804 }
805 
806 /**
807   @brief Build 1D mass matrix and Laplacian with perturbation
808 
809   @param[in]  interp_1d   Interpolation matrix in one dimension
810   @param[in]  grad_1d     Gradient matrix in one dimension
811   @param[in]  q_weight_1d Quadrature weights in one dimension
812   @param[in]  P_1d        Number of basis nodes in one dimension
813   @param[in]  Q_1d        Number of quadrature points in one dimension
814   @param[in]  dim         Dimension of basis
815   @param[out] mass        Assembled mass matrix in one dimension
816   @param[out] laplace     Assembled perturbed Laplacian in one dimension
817 
818   @return An error code: 0 - success, otherwise - failure
819 
820   @ref Developer
821 **/
822 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d,
823                                                       CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
824   for (CeedInt i = 0; i < P_1d; i++) {
825     for (CeedInt j = 0; j < P_1d; j++) {
826       CeedScalar sum = 0.0;
827       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
828       mass[i + j * P_1d] = sum;
829     }
830   }
831   // -- Laplacian
832   for (CeedInt i = 0; i < P_1d; i++) {
833     for (CeedInt j = 0; j < P_1d; j++) {
834       CeedScalar sum = 0.0;
835       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
836       laplace[i + j * P_1d] = sum;
837     }
838   }
839   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
840   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
841   return CEED_ERROR_SUCCESS;
842 }
843 CeedPragmaOptimizeOn;
844 
845 /// @}
846 
847 /// ----------------------------------------------------------------------------
848 /// CeedOperator Backend API
849 /// ----------------------------------------------------------------------------
850 /// @addtogroup CeedOperatorBackend
851 /// @{
852 
853 /**
854   @brief Create object holding CeedQFunction assembly data for CeedOperator
855 
856   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
857   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
858 
859   @return An error code: 0 - success, otherwise - failure
860 
861   @ref Backend
862 **/
863 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
864   CeedCall(CeedCalloc(1, data));
865   (*data)->ref_count = 1;
866   (*data)->ceed      = ceed;
867   CeedCall(CeedReference(ceed));
868 
869   return CEED_ERROR_SUCCESS;
870 }
871 
872 /**
873   @brief Increment the reference counter for a CeedQFunctionAssemblyData
874 
875   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
876 
877   @return An error code: 0 - success, otherwise - failure
878 
879   @ref Backend
880 **/
881 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
882   data->ref_count++;
883   return CEED_ERROR_SUCCESS;
884 }
885 
886 /**
887   @brief Set re-use of CeedQFunctionAssemblyData
888 
889   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
890   @param[in]     reuse_data Boolean flag indicating data re-use
891 
892   @return An error code: 0 - success, otherwise - failure
893 
894   @ref Backend
895 **/
896 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
897   data->reuse_data        = reuse_data;
898   data->needs_data_update = true;
899   return CEED_ERROR_SUCCESS;
900 }
901 
902 /**
903   @brief Mark QFunctionAssemblyData as stale
904 
905   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
906   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
907 
908   @return An error code: 0 - success, otherwise - failure
909 
910   @ref Backend
911 **/
912 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
913   data->needs_data_update = needs_data_update;
914   return CEED_ERROR_SUCCESS;
915 }
916 
917 /**
918   @brief Determine if QFunctionAssemblyData needs update
919 
920   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
921   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
922 
923   @return An error code: 0 - success, otherwise - failure
924 
925   @ref Backend
926 **/
927 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
928   *is_update_needed = !data->reuse_data || data->needs_data_update;
929   return CEED_ERROR_SUCCESS;
930 }
931 
932 /**
933   @brief Copy the pointer to a CeedQFunctionAssemblyData.
934            Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
935            Note: If `*data_copy` is non-NULL, then it is assumed that `*data_copy` is a pointer to a CeedQFunctionAssemblyData.
936              This CeedQFunctionAssemblyData will be destroyed if `*data_copy` is the only reference to this CeedQFunctionAssemblyData.
937 
938   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
939   @param[in,out] data_copy Variable to store copied reference
940 
941   @return An error code: 0 - success, otherwise - failure
942 
943   @ref Backend
944 **/
945 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
946   CeedCall(CeedQFunctionAssemblyDataReference(data));
947   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
948   *data_copy = data;
949   return CEED_ERROR_SUCCESS;
950 }
951 
952 /**
953   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
954 
955   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
956   @param[out] is_setup Boolean flag for setup status
957 
958   @return An error code: 0 - success, otherwise - failure
959 
960   @ref Backend
961 **/
962 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
963   *is_setup = data->is_setup;
964   return CEED_ERROR_SUCCESS;
965 }
966 
967 /**
968   @brief Set internal objects for CeedQFunctionAssemblyData
969 
970   @param[in,out] data CeedQFunctionAssemblyData to set objects
971   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
972   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
973 
974   @return An error code: 0 - success, otherwise - failure
975 
976   @ref Backend
977 **/
978 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
979   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
980   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
981 
982   data->is_setup = true;
983   return CEED_ERROR_SUCCESS;
984 }
985 
986 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
987   if (!data->is_setup) {
988     // LCOV_EXCL_START
989     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
990     // LCOV_EXCL_STOP
991   }
992 
993   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
994   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
995 
996   return CEED_ERROR_SUCCESS;
997 }
998 
999 /**
1000   @brief Destroy CeedQFunctionAssemblyData
1001 
1002   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1003 
1004   @return An error code: 0 - success, otherwise - failure
1005 
1006   @ref Backend
1007 **/
1008 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1009   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1010 
1011   CeedCall(CeedDestroy(&(*data)->ceed));
1012   CeedCall(CeedVectorDestroy(&(*data)->vec));
1013   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1014 
1015   CeedCall(CeedFree(data));
1016   return CEED_ERROR_SUCCESS;
1017 }
1018 
1019 /**
1020   @brief Get CeedOperatorAssemblyData
1021 
1022   @param[in]  op   CeedOperator to assemble
1023   @param[out] data CeedQFunctionAssemblyData
1024 
1025   @return An error code: 0 - success, otherwise - failure
1026 
1027   @ref Backend
1028 **/
1029 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1030   if (!op->op_assembled) {
1031     CeedOperatorAssemblyData data;
1032 
1033     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1034     op->op_assembled = data;
1035   }
1036   *data = op->op_assembled;
1037 
1038   return CEED_ERROR_SUCCESS;
1039 }
1040 
1041 /**
1042   @brief Create object holding CeedOperator assembly data
1043 
1044   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1045   @param[in]  op   CeedOperator to be assembled
1046   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1047 
1048   @return An error code: 0 - success, otherwise - failure
1049 
1050   @ref Backend
1051 **/
1052 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1053   CeedCall(CeedCalloc(1, data));
1054   (*data)->ceed = ceed;
1055   CeedCall(CeedReference(ceed));
1056 
1057   // Build OperatorAssembly data
1058   CeedQFunction       qf;
1059   CeedQFunctionField *qf_fields;
1060   CeedOperatorField  *op_fields;
1061   CeedInt             num_input_fields;
1062   CeedCall(CeedOperatorGetQFunction(op, &qf));
1063   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1064   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1065 
1066   // Determine active input basis
1067   CeedInt       num_eval_mode_in = 0, dim = 1;
1068   CeedEvalMode *eval_mode_in = NULL;
1069   CeedBasis     basis_in     = NULL;
1070   for (CeedInt i = 0; i < num_input_fields; i++) {
1071     CeedVector vec;
1072     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1073     if (vec == CEED_VECTOR_ACTIVE) {
1074       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1075       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1076       CeedEvalMode eval_mode;
1077       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1078       switch (eval_mode) {
1079         case CEED_EVAL_NONE:
1080         case CEED_EVAL_INTERP:
1081           CeedCall(CeedRealloc(num_eval_mode_in + 1, &eval_mode_in));
1082           eval_mode_in[num_eval_mode_in] = eval_mode;
1083           num_eval_mode_in += 1;
1084           break;
1085         case CEED_EVAL_GRAD:
1086           CeedCall(CeedRealloc(num_eval_mode_in + dim, &eval_mode_in));
1087           for (CeedInt d = 0; d < dim; d++) {
1088             eval_mode_in[num_eval_mode_in + d] = eval_mode;
1089           }
1090           num_eval_mode_in += dim;
1091           break;
1092         case CEED_EVAL_WEIGHT:
1093         case CEED_EVAL_DIV:
1094         case CEED_EVAL_CURL:
1095           break;  // Caught by QF Assembly
1096       }
1097     }
1098   }
1099   (*data)->num_eval_mode_in = num_eval_mode_in;
1100   (*data)->eval_mode_in     = eval_mode_in;
1101   CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->basis_in));
1102 
1103   // Determine active output basis
1104   CeedInt num_output_fields;
1105   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1106   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1107   CeedInt       num_eval_mode_out = 0;
1108   CeedEvalMode *eval_mode_out     = NULL;
1109   CeedBasis     basis_out         = NULL;
1110   for (CeedInt i = 0; i < num_output_fields; i++) {
1111     CeedVector vec;
1112     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1113     if (vec == CEED_VECTOR_ACTIVE) {
1114       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1115       CeedEvalMode eval_mode;
1116       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1117       switch (eval_mode) {
1118         case CEED_EVAL_NONE:
1119         case CEED_EVAL_INTERP:
1120           CeedCall(CeedRealloc(num_eval_mode_out + 1, &eval_mode_out));
1121           eval_mode_out[num_eval_mode_out] = eval_mode;
1122           num_eval_mode_out += 1;
1123           break;
1124         case CEED_EVAL_GRAD:
1125           CeedCall(CeedRealloc(num_eval_mode_out + dim, &eval_mode_out));
1126           for (CeedInt d = 0; d < dim; d++) {
1127             eval_mode_out[num_eval_mode_out + d] = eval_mode;
1128           }
1129           num_eval_mode_out += dim;
1130           break;
1131         case CEED_EVAL_WEIGHT:
1132         case CEED_EVAL_DIV:
1133         case CEED_EVAL_CURL:
1134           break;  // Caught by QF Assembly
1135       }
1136     }
1137   }
1138   (*data)->num_eval_mode_out = num_eval_mode_out;
1139   (*data)->eval_mode_out     = eval_mode_out;
1140   CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->basis_out));
1141 
1142   return CEED_ERROR_SUCCESS;
1143 }
1144 
1145 /**
1146   @brief Get CeedOperator CeedEvalModes for assembly
1147 
1148   @param[in]  data              CeedOperatorAssemblyData
1149   @param[out] num_eval_mode_in  Pointer to hold number of input CeedEvalModes, or NULL
1150   @param[out] eval_mode_in      Pointer to hold input CeedEvalModes, or NULL
1151   @param[out] num_eval_mode_out Pointer to hold number of output CeedEvalModes, or NULL
1152   @param[out] eval_mode_out     Pointer to hold output CeedEvalModes, or NULL
1153 
1154   @return An error code: 0 - success, otherwise - failure
1155 
1156   @ref Backend
1157 **/
1158 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_eval_mode_in, const CeedEvalMode **eval_mode_in,
1159                                          CeedInt *num_eval_mode_out, const CeedEvalMode **eval_mode_out) {
1160   if (num_eval_mode_in) *num_eval_mode_in = data->num_eval_mode_in;
1161   if (eval_mode_in) *eval_mode_in = data->eval_mode_in;
1162   if (num_eval_mode_out) *num_eval_mode_out = data->num_eval_mode_out;
1163   if (eval_mode_out) *eval_mode_out = data->eval_mode_out;
1164 
1165   return CEED_ERROR_SUCCESS;
1166 }
1167 
1168 /**
1169   @brief Get CeedOperator CeedBasis data for assembly
1170 
1171   @param[in]  data      CeedOperatorAssemblyData
1172   @param[out] basis_in  Pointer to hold active input CeedBasis, or NULL
1173   @param[out] B_in      Pointer to hold assembled active input B, or NULL
1174   @param[out] basis_out Pointer to hold active output CeedBasis, or NULL
1175   @param[out] B_out     Pointer to hold assembled active output B, or NULL
1176 
1177   @return An error code: 0 - success, otherwise - failure
1178 
1179   @ref Backend
1180 **/
1181 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedBasis *basis_in, const CeedScalar **B_in, CeedBasis *basis_out,
1182                                      const CeedScalar **B_out) {
1183   // Assemble B_in, B_out if needed
1184   if (B_in && !data->B_in) {
1185     CeedInt           num_qpts, elem_size;
1186     CeedScalar       *B_in, *identity = NULL;
1187     const CeedScalar *interp_in, *grad_in;
1188     bool              has_eval_none = false;
1189 
1190     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_in, &num_qpts));
1191     CeedCall(CeedBasisGetNumNodes(data->basis_in, &elem_size));
1192     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_in, &B_in));
1193 
1194     for (CeedInt i = 0; i < data->num_eval_mode_in; i++) {
1195       has_eval_none = has_eval_none || (data->eval_mode_in[i] == CEED_EVAL_NONE);
1196     }
1197     if (has_eval_none) {
1198       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1199       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1200         identity[i * elem_size + i] = 1.0;
1201       }
1202     }
1203     CeedCall(CeedBasisGetInterp(data->basis_in, &interp_in));
1204     CeedCall(CeedBasisGetGrad(data->basis_in, &grad_in));
1205 
1206     for (CeedInt q = 0; q < num_qpts; q++) {
1207       for (CeedInt n = 0; n < elem_size; n++) {
1208         CeedInt d_in = -1;
1209         for (CeedInt e_in = 0; e_in < data->num_eval_mode_in; e_in++) {
1210           const CeedInt     qq = data->num_eval_mode_in * q;
1211           const CeedScalar *b  = NULL;
1212 
1213           if (data->eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in++;
1214           CeedOperatorGetBasisPointer(data->eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &b);
1215           B_in[(qq + e_in) * elem_size + n] = b[q * elem_size + n];
1216         }
1217       }
1218     }
1219     data->B_in = B_in;
1220   }
1221 
1222   if (B_out && !data->B_out) {
1223     CeedInt           num_qpts, elem_size;
1224     CeedScalar       *B_out, *identity = NULL;
1225     const CeedScalar *interp_out, *grad_out;
1226     bool              has_eval_none = false;
1227 
1228     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_out, &num_qpts));
1229     CeedCall(CeedBasisGetNumNodes(data->basis_out, &elem_size));
1230     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_out, &B_out));
1231 
1232     for (CeedInt i = 0; i < data->num_eval_mode_out; i++) {
1233       has_eval_none = has_eval_none || (data->eval_mode_out[i] == CEED_EVAL_NONE);
1234     }
1235     if (has_eval_none) {
1236       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1237       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1238         identity[i * elem_size + i] = 1.0;
1239       }
1240     }
1241     CeedCall(CeedBasisGetInterp(data->basis_out, &interp_out));
1242     CeedCall(CeedBasisGetGrad(data->basis_out, &grad_out));
1243 
1244     for (CeedInt q = 0; q < num_qpts; q++) {
1245       for (CeedInt n = 0; n < elem_size; n++) {
1246         CeedInt d_out = -1;
1247         for (CeedInt e_out = 0; e_out < data->num_eval_mode_out; e_out++) {
1248           const CeedInt     qq = data->num_eval_mode_out * q;
1249           const CeedScalar *b  = NULL;
1250 
1251           if (data->eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out++;
1252           CeedOperatorGetBasisPointer(data->eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &b);
1253           B_out[(qq + e_out) * elem_size + n] = b[q * elem_size + n];
1254         }
1255       }
1256     }
1257     data->B_out = B_out;
1258   }
1259 
1260   if (basis_in) *basis_in = data->basis_in;
1261   if (B_in) *B_in = data->B_in;
1262   if (basis_out) *basis_out = data->basis_out;
1263   if (B_out) *B_out = data->B_out;
1264 
1265   return CEED_ERROR_SUCCESS;
1266 }
1267 
1268 /**
1269   @brief Destroy CeedOperatorAssemblyData
1270 
1271   @param[in,out] data CeedOperatorAssemblyData to destroy
1272 
1273   @return An error code: 0 - success, otherwise - failure
1274 
1275   @ref Backend
1276 **/
1277 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1278   if (!*data) return CEED_ERROR_SUCCESS;
1279 
1280   CeedCall(CeedDestroy(&(*data)->ceed));
1281   CeedCall(CeedBasisDestroy(&(*data)->basis_in));
1282   CeedCall(CeedBasisDestroy(&(*data)->basis_out));
1283   CeedCall(CeedFree(&(*data)->eval_mode_in));
1284   CeedCall(CeedFree(&(*data)->eval_mode_out));
1285   CeedCall(CeedFree(&(*data)->B_in));
1286   CeedCall(CeedFree(&(*data)->B_out));
1287 
1288   CeedCall(CeedFree(data));
1289   return CEED_ERROR_SUCCESS;
1290 }
1291 
1292 /// @}
1293 
1294 /// ----------------------------------------------------------------------------
1295 /// CeedOperator Public API
1296 /// ----------------------------------------------------------------------------
1297 /// @addtogroup CeedOperatorUser
1298 /// @{
1299 
1300 /**
1301   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1302 
1303   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1304     The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices
1305 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the
1306 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1307 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1308 and producing the output [dv_0, dv_1, v].
1309 
1310   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1311 
1312   @param[in]  op        CeedOperator to assemble CeedQFunction
1313   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1314   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1315   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1316 
1317   @return An error code: 0 - success, otherwise - failure
1318 
1319   @ref User
1320 **/
1321 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1322   CeedCall(CeedOperatorCheckReady(op));
1323 
1324   if (op->LinearAssembleQFunction) {
1325     // Backend version
1326     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1327   } else {
1328     // Operator fallback
1329     CeedOperator op_fallback;
1330 
1331     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1332     if (op_fallback) {
1333       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1334     } else {
1335       // LCOV_EXCL_START
1336       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1337       // LCOV_EXCL_STOP
1338     }
1339   }
1340   return CEED_ERROR_SUCCESS;
1341 }
1342 
1343 /**
1344   @brief Assemble CeedQFunction and store result internally.
1345            Return copied references of stored data to the caller.
1346            Caller is responsible for ownership and destruction of the copied references.
1347            See also @ref CeedOperatorLinearAssembleQFunction
1348 
1349   @param[in]  op        CeedOperator to assemble CeedQFunction
1350   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1351   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1352   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1353 
1354   @return An error code: 0 - success, otherwise - failure
1355 
1356   @ref User
1357 **/
1358 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1359   CeedCall(CeedOperatorCheckReady(op));
1360 
1361   if (op->LinearAssembleQFunctionUpdate) {
1362     // Backend version
1363     bool                qf_assembled_is_setup;
1364     CeedVector          assembled_vec  = NULL;
1365     CeedElemRestriction assembled_rstr = NULL;
1366 
1367     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1368     if (qf_assembled_is_setup) {
1369       bool update_needed;
1370 
1371       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1372       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1373       if (update_needed) {
1374         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1375       }
1376     } else {
1377       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1378       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1379     }
1380     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1381 
1382     // Copy reference from internally held copy
1383     *assembled = NULL;
1384     *rstr      = NULL;
1385     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1386     CeedCall(CeedVectorDestroy(&assembled_vec));
1387     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1388     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1389   } else {
1390     // Operator fallback
1391     CeedOperator op_fallback;
1392 
1393     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1394     if (op_fallback) {
1395       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1396     } else {
1397       // LCOV_EXCL_START
1398       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1399       // LCOV_EXCL_STOP
1400     }
1401   }
1402 
1403   return CEED_ERROR_SUCCESS;
1404 }
1405 
1406 /**
1407   @brief Assemble the diagonal of a square linear CeedOperator
1408 
1409   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1410 
1411   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1412 
1413   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1414 
1415   @param[in]  op        CeedOperator to assemble CeedQFunction
1416   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1417   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1418 
1419   @return An error code: 0 - success, otherwise - failure
1420 
1421   @ref User
1422 **/
1423 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1424   CeedCall(CeedOperatorCheckReady(op));
1425 
1426   CeedSize input_size = 0, output_size = 0;
1427   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1428   if (input_size != output_size) {
1429     // LCOV_EXCL_START
1430     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1431     // LCOV_EXCL_STOP
1432   }
1433 
1434   if (op->LinearAssembleDiagonal) {
1435     // Backend version
1436     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1437     return CEED_ERROR_SUCCESS;
1438   } else if (op->LinearAssembleAddDiagonal) {
1439     // Backend version with zeroing first
1440     CeedCall(CeedVectorSetValue(assembled, 0.0));
1441     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1442     return CEED_ERROR_SUCCESS;
1443   } else {
1444     // Operator fallback
1445     CeedOperator op_fallback;
1446 
1447     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1448     if (op_fallback) {
1449       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1450       return CEED_ERROR_SUCCESS;
1451     }
1452   }
1453   // Default interface implementation
1454   CeedCall(CeedVectorSetValue(assembled, 0.0));
1455   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1456 
1457   return CEED_ERROR_SUCCESS;
1458 }
1459 
1460 /**
1461   @brief Assemble the diagonal of a square linear CeedOperator
1462 
1463   This sums into a CeedVector the diagonal of a linear CeedOperator.
1464 
1465   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1466 
1467   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1468 
1469   @param[in]  op        CeedOperator to assemble CeedQFunction
1470   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1471   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1472 
1473   @return An error code: 0 - success, otherwise - failure
1474 
1475   @ref User
1476 **/
1477 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1478   CeedCall(CeedOperatorCheckReady(op));
1479 
1480   CeedSize input_size = 0, output_size = 0;
1481   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1482   if (input_size != output_size) {
1483     // LCOV_EXCL_START
1484     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1485     // LCOV_EXCL_STOP
1486   }
1487 
1488   if (op->LinearAssembleAddDiagonal) {
1489     // Backend version
1490     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1491     return CEED_ERROR_SUCCESS;
1492   } else {
1493     // Operator fallback
1494     CeedOperator op_fallback;
1495 
1496     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1497     if (op_fallback) {
1498       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1499       return CEED_ERROR_SUCCESS;
1500     }
1501   }
1502   // Default interface implementation
1503   bool is_composite;
1504   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1505   if (is_composite) {
1506     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1507   } else {
1508     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1509   }
1510 
1511   return CEED_ERROR_SUCCESS;
1512 }
1513 
1514 /**
1515   @brief Assemble the point block diagonal of a square linear CeedOperator
1516 
1517   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1518 
1519   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1520 
1521   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1522 
1523   @param[in]  op        CeedOperator to assemble CeedQFunction
1524   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1525 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1526 component in].
1527   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1528 
1529   @return An error code: 0 - success, otherwise - failure
1530 
1531   @ref User
1532 **/
1533 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1534   CeedCall(CeedOperatorCheckReady(op));
1535 
1536   CeedSize input_size = 0, output_size = 0;
1537   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1538   if (input_size != output_size) {
1539     // LCOV_EXCL_START
1540     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1541     // LCOV_EXCL_STOP
1542   }
1543 
1544   if (op->LinearAssemblePointBlockDiagonal) {
1545     // Backend version
1546     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1547     return CEED_ERROR_SUCCESS;
1548   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1549     // Backend version with zeroing first
1550     CeedCall(CeedVectorSetValue(assembled, 0.0));
1551     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1552     return CEED_ERROR_SUCCESS;
1553   } else {
1554     // Operator fallback
1555     CeedOperator op_fallback;
1556 
1557     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1558     if (op_fallback) {
1559       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1560       return CEED_ERROR_SUCCESS;
1561     }
1562   }
1563   // Default interface implementation
1564   CeedCall(CeedVectorSetValue(assembled, 0.0));
1565   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1566 
1567   return CEED_ERROR_SUCCESS;
1568 }
1569 
1570 /**
1571   @brief Assemble the point block diagonal of a square linear CeedOperator
1572 
1573   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1574 
1575   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1576 
1577   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1578 
1579   @param[in]  op        CeedOperator to assemble CeedQFunction
1580   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1581 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1582 component in].
1583   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1584 
1585   @return An error code: 0 - success, otherwise - failure
1586 
1587   @ref User
1588 **/
1589 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1590   CeedCall(CeedOperatorCheckReady(op));
1591 
1592   CeedSize input_size = 0, output_size = 0;
1593   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1594   if (input_size != output_size) {
1595     // LCOV_EXCL_START
1596     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1597     // LCOV_EXCL_STOP
1598   }
1599 
1600   if (op->LinearAssembleAddPointBlockDiagonal) {
1601     // Backend version
1602     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1603     return CEED_ERROR_SUCCESS;
1604   } else {
1605     // Operator fallback
1606     CeedOperator op_fallback;
1607 
1608     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1609     if (op_fallback) {
1610       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1611       return CEED_ERROR_SUCCESS;
1612     }
1613   }
1614   // Default interface implementation
1615   bool is_composite;
1616   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1617   if (is_composite) {
1618     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1619   } else {
1620     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1621   }
1622 
1623   return CEED_ERROR_SUCCESS;
1624 }
1625 
1626 /**
1627    @brief Fully assemble the nonzero pattern of a linear operator.
1628 
1629    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1630 
1631    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1632 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1633 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1634 
1635    This will generally be slow unless your operator is low-order.
1636 
1637    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1638 
1639    @param[in]  op          CeedOperator to assemble
1640    @param[out] num_entries Number of entries in coordinate nonzero pattern
1641    @param[out] rows        Row number for each entry
1642    @param[out] cols        Column number for each entry
1643 
1644    @ref User
1645 **/
1646 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1647   CeedInt       num_suboperators, single_entries;
1648   CeedOperator *sub_operators;
1649   bool          is_composite;
1650   CeedCall(CeedOperatorCheckReady(op));
1651 
1652   if (op->LinearAssembleSymbolic) {
1653     // Backend version
1654     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1655     return CEED_ERROR_SUCCESS;
1656   } else {
1657     // Operator fallback
1658     CeedOperator op_fallback;
1659 
1660     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1661     if (op_fallback) {
1662       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1663       return CEED_ERROR_SUCCESS;
1664     }
1665   }
1666 
1667   // Default interface implementation
1668 
1669   // count entries and allocate rows, cols arrays
1670   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1671   *num_entries = 0;
1672   if (is_composite) {
1673     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1674     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1675     for (CeedInt k = 0; k < num_suboperators; ++k) {
1676       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1677       *num_entries += single_entries;
1678     }
1679   } else {
1680     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1681     *num_entries += single_entries;
1682   }
1683   CeedCall(CeedCalloc(*num_entries, rows));
1684   CeedCall(CeedCalloc(*num_entries, cols));
1685 
1686   // assemble nonzero locations
1687   CeedInt offset = 0;
1688   if (is_composite) {
1689     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1690     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1691     for (CeedInt k = 0; k < num_suboperators; ++k) {
1692       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1693       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1694       offset += single_entries;
1695     }
1696   } else {
1697     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1698   }
1699 
1700   return CEED_ERROR_SUCCESS;
1701 }
1702 
1703 /**
1704    @brief Fully assemble the nonzero entries of a linear operator.
1705 
1706    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1707 
1708    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1709 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1710 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1711 
1712    This will generally be slow unless your operator is low-order.
1713 
1714    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1715 
1716    @param[in]  op     CeedOperator to assemble
1717    @param[out] values Values to assemble into matrix
1718 
1719    @ref User
1720 **/
1721 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1722   CeedInt       num_suboperators, single_entries = 0;
1723   CeedOperator *sub_operators;
1724   CeedCall(CeedOperatorCheckReady(op));
1725 
1726   if (op->LinearAssemble) {
1727     // Backend version
1728     CeedCall(op->LinearAssemble(op, values));
1729     return CEED_ERROR_SUCCESS;
1730   } else {
1731     // Operator fallback
1732     CeedOperator op_fallback;
1733 
1734     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1735     if (op_fallback) {
1736       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
1737       return CEED_ERROR_SUCCESS;
1738     }
1739   }
1740 
1741   // Default interface implementation
1742   bool is_composite;
1743   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1744 
1745   CeedInt offset = 0;
1746   if (is_composite) {
1747     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1748     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1749     for (CeedInt k = 0; k < num_suboperators; k++) {
1750       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
1751       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1752       offset += single_entries;
1753     }
1754   } else {
1755     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
1756   }
1757 
1758   return CEED_ERROR_SUCCESS;
1759 }
1760 
1761 /**
1762   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
1763 
1764   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1765 
1766   @param[in]  op               Composite CeedOperator
1767   @param[in]  num_skip_indices Number of suboperators to skip
1768   @param[in]  skip_indices     Array of indices of suboperators to skip
1769   @param[out] mult             Vector to store multiplicity (of size l_size)
1770 
1771   @return An error code: 0 - success, otherwise - failure
1772 
1773   @ref User
1774 **/
1775 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
1776   CeedCall(CeedOperatorCheckReady(op));
1777 
1778   Ceed                ceed;
1779   CeedInt             num_sub_ops;
1780   CeedSize            l_vec_len;
1781   CeedScalar         *mult_array;
1782   CeedVector          ones_l_vec;
1783   CeedElemRestriction elem_restr;
1784   CeedOperator       *sub_ops;
1785 
1786   CeedCall(CeedOperatorGetCeed(op, &ceed));
1787 
1788   // Zero mult vector
1789   CeedCall(CeedVectorSetValue(mult, 0.0));
1790 
1791   // Get suboperators
1792   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_ops));
1793   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_ops));
1794   if (num_sub_ops == 0) return CEED_ERROR_SUCCESS;
1795 
1796   // Work vector
1797   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
1798   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
1799   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
1800   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
1801 
1802   // Compute multiplicity across suboperators
1803   for (CeedInt i = 0; i < num_sub_ops; i++) {
1804     const CeedScalar *sub_mult_array;
1805     CeedVector        sub_mult_l_vec, ones_e_vec;
1806 
1807     // -- Check for suboperator to skip
1808     for (CeedInt j = 0; j < num_skip_indices; j++) {
1809       if (skip_indices[j] == i) continue;
1810     }
1811 
1812     // -- Sub operator multiplicity
1813     CeedCall(CeedOperatorGetActiveElemRestriction(sub_ops[i], &elem_restr));
1814     CeedCall(CeedElemRestrictionCreateVector(elem_restr, &sub_mult_l_vec, &ones_e_vec));
1815     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
1816     CeedCall(CeedElemRestrictionApply(elem_restr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
1817     CeedCall(CeedElemRestrictionApply(elem_restr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
1818     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
1819     // ---- Flag every node present in the current suboperator
1820     for (CeedInt j = 0; j < l_vec_len; j++) {
1821       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
1822     }
1823     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
1824     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
1825     CeedCall(CeedVectorDestroy(&ones_e_vec));
1826   }
1827   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
1828   CeedCall(CeedVectorDestroy(&ones_l_vec));
1829 
1830   return CEED_ERROR_SUCCESS;
1831 }
1832 
1833 /**
1834   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
1835 grid interpolation
1836 
1837   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1838 
1839   @param[in]  op_fine      Fine grid operator
1840   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
1841   @param[in]  rstr_coarse  Coarse grid restriction
1842   @param[in]  basis_coarse Coarse grid active vector basis
1843   @param[out] op_coarse    Coarse grid operator
1844   @param[out] op_prolong   Coarse to fine operator
1845   @param[out] op_restrict  Fine to coarse operator
1846 
1847   @return An error code: 0 - success, otherwise - failure
1848 
1849   @ref User
1850 **/
1851 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1852                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
1853   CeedCall(CeedOperatorCheckReady(op_fine));
1854 
1855   // Build prolongation matrix
1856   CeedBasis basis_fine, basis_c_to_f;
1857   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1858   CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
1859 
1860   // Core code
1861   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1862 
1863   return CEED_ERROR_SUCCESS;
1864 }
1865 
1866 /**
1867   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
1868 
1869   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1870 
1871   @param[in]  op_fine       Fine grid operator
1872   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
1873   @param[in]  rstr_coarse   Coarse grid restriction
1874   @param[in]  basis_coarse  Coarse grid active vector basis
1875   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
1876   @param[out] op_coarse     Coarse grid operator
1877   @param[out] op_prolong    Coarse to fine operator
1878   @param[out] op_restrict   Fine to coarse operator
1879 
1880   @return An error code: 0 - success, otherwise - failure
1881 
1882   @ref User
1883 **/
1884 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1885                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
1886                                              CeedOperator *op_restrict) {
1887   CeedCall(CeedOperatorCheckReady(op_fine));
1888   Ceed ceed;
1889   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1890 
1891   // Check for compatible quadrature spaces
1892   CeedBasis basis_fine;
1893   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1894   CeedInt Q_f, Q_c;
1895   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
1896   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
1897   if (Q_f != Q_c) {
1898     // LCOV_EXCL_START
1899     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
1900     // LCOV_EXCL_STOP
1901   }
1902 
1903   // Coarse to fine basis
1904   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
1905   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
1906   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
1907   CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
1908   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
1909   P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
1910   CeedScalar *q_ref, *q_weight, *grad;
1911   CeedCall(CeedCalloc(P_1d_f, &q_ref));
1912   CeedCall(CeedCalloc(P_1d_f, &q_weight));
1913   CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
1914   CeedBasis basis_c_to_f;
1915   CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
1916   CeedCall(CeedFree(&q_ref));
1917   CeedCall(CeedFree(&q_weight));
1918   CeedCall(CeedFree(&grad));
1919 
1920   // Core code
1921   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1922   return CEED_ERROR_SUCCESS;
1923 }
1924 
1925 /**
1926   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
1927 
1928   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1929 
1930   @param[in]  op_fine       Fine grid operator
1931   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
1932   @param[in]  rstr_coarse   Coarse grid restriction
1933   @param[in]  basis_coarse  Coarse grid active vector basis
1934   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
1935   @param[out] op_coarse     Coarse grid operator
1936   @param[out] op_prolong    Coarse to fine operator
1937   @param[out] op_restrict   Fine to coarse operator
1938 
1939   @return An error code: 0 - success, otherwise - failure
1940 
1941   @ref User
1942 **/
1943 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1944                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
1945                                        CeedOperator *op_restrict) {
1946   CeedCall(CeedOperatorCheckReady(op_fine));
1947   Ceed ceed;
1948   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1949 
1950   // Check for compatible quadrature spaces
1951   CeedBasis basis_fine;
1952   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1953   CeedInt Q_f, Q_c;
1954   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
1955   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
1956   if (Q_f != Q_c) {
1957     // LCOV_EXCL_START
1958     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
1959     // LCOV_EXCL_STOP
1960   }
1961 
1962   // Coarse to fine basis
1963   CeedElemTopology topo;
1964   CeedCall(CeedBasisGetTopology(basis_fine, &topo));
1965   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
1966   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
1967   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
1968   CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
1969   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
1970   CeedScalar *q_ref, *q_weight, *grad;
1971   CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
1972   CeedCall(CeedCalloc(num_nodes_f, &q_weight));
1973   CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
1974   CeedBasis basis_c_to_f;
1975   CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
1976   CeedCall(CeedFree(&q_ref));
1977   CeedCall(CeedFree(&q_weight));
1978   CeedCall(CeedFree(&grad));
1979 
1980   // Core code
1981   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1982   return CEED_ERROR_SUCCESS;
1983 }
1984 
1985 /**
1986   @brief Build a FDM based approximate inverse for each element for a CeedOperator
1987 
1988   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
1989     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V.
1990     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T
1991 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
1992 
1993   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1994 
1995   @param[in]  op      CeedOperator to create element inverses
1996   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
1997   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1998 
1999   @return An error code: 0 - success, otherwise - failure
2000 
2001   @ref User
2002 **/
2003 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2004   CeedCall(CeedOperatorCheckReady(op));
2005 
2006   if (op->CreateFDMElementInverse) {
2007     // Backend version
2008     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2009     return CEED_ERROR_SUCCESS;
2010   } else {
2011     // Operator fallback
2012     CeedOperator op_fallback;
2013 
2014     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2015     if (op_fallback) {
2016       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2017       return CEED_ERROR_SUCCESS;
2018     }
2019   }
2020 
2021   // Default interface implementation
2022   Ceed ceed, ceed_parent;
2023   CeedCall(CeedOperatorGetCeed(op, &ceed));
2024   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2025   ceed_parent = ceed_parent ? ceed_parent : ceed;
2026   CeedQFunction qf;
2027   CeedCall(CeedOperatorGetQFunction(op, &qf));
2028 
2029   // Determine active input basis
2030   bool                interp = false, grad = false;
2031   CeedBasis           basis = NULL;
2032   CeedElemRestriction rstr  = NULL;
2033   CeedOperatorField  *op_fields;
2034   CeedQFunctionField *qf_fields;
2035   CeedInt             num_input_fields;
2036   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2037   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2038   for (CeedInt i = 0; i < num_input_fields; i++) {
2039     CeedVector vec;
2040     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2041     if (vec == CEED_VECTOR_ACTIVE) {
2042       CeedEvalMode eval_mode;
2043       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2044       interp = interp || eval_mode == CEED_EVAL_INTERP;
2045       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2046       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2047       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2048     }
2049   }
2050   if (!basis) {
2051     // LCOV_EXCL_START
2052     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2053     // LCOV_EXCL_STOP
2054   }
2055   CeedSize l_size = 1;
2056   CeedInt  P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2057   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2058   CeedCall(CeedBasisGetNumNodes(basis, &elem_size));
2059   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2060   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2061   CeedCall(CeedBasisGetDimension(basis, &dim));
2062   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2063   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2064   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2065 
2066   // Build and diagonalize 1D Mass and Laplacian
2067   bool tensor_basis;
2068   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2069   if (!tensor_basis) {
2070     // LCOV_EXCL_START
2071     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2072     // LCOV_EXCL_STOP
2073   }
2074   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2075   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2076   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2077   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2078   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2079   CeedCall(CeedCalloc(P_1d, &lambda));
2080   // -- Build matrices
2081   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2082   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2083   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2084   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2085   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2086 
2087   // -- Diagonalize
2088   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2089   CeedCall(CeedFree(&mass));
2090   CeedCall(CeedFree(&laplace));
2091   for (CeedInt i = 0; i < P_1d; i++) {
2092     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2093   }
2094   CeedCall(CeedFree(&x));
2095 
2096   // Assemble QFunction
2097   CeedVector          assembled;
2098   CeedElemRestriction rstr_qf;
2099   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2100   CeedInt layout[3];
2101   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2102   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2103   CeedScalar max_norm = 0;
2104   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2105 
2106   // Calculate element averages
2107   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2108   CeedScalar       *elem_avg;
2109   const CeedScalar *assembled_array, *q_weight_array;
2110   CeedVector        q_weight;
2111   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2112   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2113   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2114   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2115   CeedCall(CeedCalloc(num_elem, &elem_avg));
2116   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2117   for (CeedInt e = 0; e < num_elem; e++) {
2118     CeedInt count = 0;
2119     for (CeedInt q = 0; q < num_qpts; q++) {
2120       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2121         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2122           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2123           count++;
2124         }
2125       }
2126     }
2127     if (count) {
2128       elem_avg[e] /= count;
2129     } else {
2130       elem_avg[e] = 1.0;
2131     }
2132   }
2133   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2134   CeedCall(CeedVectorDestroy(&assembled));
2135   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2136   CeedCall(CeedVectorDestroy(&q_weight));
2137 
2138   // Build FDM diagonal
2139   CeedVector  q_data;
2140   CeedScalar *q_data_array, *fdm_diagonal;
2141   CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal));
2142   const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON;
2143   for (CeedInt c = 0; c < num_comp; c++) {
2144     for (CeedInt n = 0; n < elem_size; n++) {
2145       if (interp) fdm_diagonal[c * elem_size + n] = 1.0;
2146       if (grad) {
2147         for (CeedInt d = 0; d < dim; d++) {
2148           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2149           fdm_diagonal[c * elem_size + n] += lambda[i];
2150         }
2151       }
2152       if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound;
2153     }
2154   }
2155   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data));
2156   CeedCall(CeedVectorSetValue(q_data, 0.0));
2157   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2158   for (CeedInt e = 0; e < num_elem; e++) {
2159     for (CeedInt c = 0; c < num_comp; c++) {
2160       for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]);
2161     }
2162   }
2163   CeedCall(CeedFree(&elem_avg));
2164   CeedCall(CeedFree(&fdm_diagonal));
2165   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2166 
2167   // Setup FDM operator
2168   // -- Basis
2169   CeedBasis   fdm_basis;
2170   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2171   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2172   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2173   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2174   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2175   CeedCall(CeedFree(&fdm_interp));
2176   CeedCall(CeedFree(&grad_dummy));
2177   CeedCall(CeedFree(&q_ref_dummy));
2178   CeedCall(CeedFree(&q_weight_dummy));
2179   CeedCall(CeedFree(&lambda));
2180 
2181   // -- Restriction
2182   CeedElemRestriction rstr_qd_i;
2183   CeedInt             strides[3] = {1, elem_size, elem_size * num_comp};
2184   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i));
2185   // -- QFunction
2186   CeedQFunction qf_fdm;
2187   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2188   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2189   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2190   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2191   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2192   // -- QFunction context
2193   CeedInt *num_comp_data;
2194   CeedCall(CeedCalloc(1, &num_comp_data));
2195   num_comp_data[0] = num_comp;
2196   CeedQFunctionContext ctx_fdm;
2197   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2198   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2199   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2200   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2201   // -- Operator
2202   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2203   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2204   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2205   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2206 
2207   // Cleanup
2208   CeedCall(CeedVectorDestroy(&q_data));
2209   CeedCall(CeedBasisDestroy(&fdm_basis));
2210   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2211   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2212 
2213   return CEED_ERROR_SUCCESS;
2214 }
2215 
2216 /// @}
2217