xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision 58e4b056e7b8c075111600b0ac7bf1fde94b8059)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed/backend.h>
11 #include <ceed/ceed.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   Ceed ceed_fallback;
84 
85   // Check not already created
86   if (op->op_fallback) return CEED_ERROR_SUCCESS;
87 
88   // Fallback Ceed
89   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
90   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
91 
92   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
93   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
94 
95   // Clone Op
96   CeedOperator op_fallback;
97   if (op->is_composite) {
98     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
99     for (CeedInt i = 0; i < op->num_suboperators; i++) {
100       CeedOperator op_sub_fallback;
101 
102       CeedCall(CeedOperatorGetFallback(op->sub_operators[i], &op_sub_fallback));
103       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
104     }
105   } else {
106     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
107     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
108     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
109     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
110     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
111     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
112       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_restr, op->input_fields[i]->basis,
113                                     op->input_fields[i]->vec));
114     }
115     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
116       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_restr, op->output_fields[i]->basis,
117                                     op->output_fields[i]->vec));
118     }
119     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
120     if (op_fallback->num_qpts == 0) {
121       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
122     }
123     // Cleanup
124     CeedCall(CeedQFunctionDestroy(&qf_fallback));
125     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
126     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
127   }
128   CeedCall(CeedOperatorSetName(op_fallback, op->name));
129   CeedCall(CeedOperatorCheckReady(op_fallback));
130   op->op_fallback = op_fallback;
131 
132   return CEED_ERROR_SUCCESS;
133 }
134 
135 /**
136   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
137 
138   @param[in]  op          CeedOperator to retrieve fallback for
139   @param[out] op_fallback Fallback CeedOperator
140 
141   @return An error code: 0 - success, otherwise - failure
142 
143   @ref Developer
144 **/
145 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
146   // Create if needed
147   if (!op->op_fallback) {
148     CeedCall(CeedOperatorCreateFallback(op));
149   }
150   if (op->op_fallback) {
151     bool is_debug;
152 
153     CeedCall(CeedIsDebug(op->ceed, &is_debug));
154     if (is_debug) {
155       Ceed        ceed_fallback;
156       const char *resource, *resource_fallback;
157 
158       CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
159       CeedCall(CeedGetResource(op->ceed, &resource));
160       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
161 
162       CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
163       CeedDebug(op->ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
164                 op->op_fallback);
165     }
166   }
167   *op_fallback = op->op_fallback;
168 
169   return CEED_ERROR_SUCCESS;
170 }
171 
172 /**
173   @brief Select correct basis matrix pointer based on CeedEvalMode
174 
175   @param[in]  eval_mode Current basis evaluation mode
176   @param[in]  identity  Pointer to identity matrix
177   @param[in]  interp    Pointer to interpolation matrix
178   @param[in]  grad      Pointer to gradient matrix
179   @param[out] basis_ptr Basis pointer to set
180 
181   @ref Developer
182 **/
183 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad,
184                                                const CeedScalar **basis_ptr) {
185   switch (eval_mode) {
186     case CEED_EVAL_NONE:
187       *basis_ptr = identity;
188       break;
189     case CEED_EVAL_INTERP:
190       *basis_ptr = interp;
191       break;
192     case CEED_EVAL_GRAD:
193       *basis_ptr = grad;
194       break;
195     case CEED_EVAL_WEIGHT:
196     case CEED_EVAL_DIV:
197     case CEED_EVAL_CURL:
198       break;  // Caught by QF Assembly
199   }
200   assert(*basis_ptr != NULL);
201 }
202 
203 /**
204   @brief Create point block restriction for active operator field
205 
206   @param[in]  rstr            Original CeedElemRestriction for active field
207   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
208 
209   @return An error code: 0 - success, otherwise - failure
210 
211   @ref Developer
212 **/
213 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
214   Ceed ceed;
215   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
216   const CeedInt *offsets;
217   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
218 
219   // Expand offsets
220   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
221   CeedSize l_size;
222   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
223   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
224   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
225   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
226   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
227   CeedInt shift = num_comp;
228   if (comp_stride != 1) shift *= num_comp;
229   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
230   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
231     pointblock_offsets[i] = offsets[i] * shift;
232   }
233 
234   // Create new restriction
235   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
236                                      pointblock_offsets, pointblock_rstr));
237 
238   // Cleanup
239   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
240 
241   return CEED_ERROR_SUCCESS;
242 }
243 
244 /**
245   @brief Core logic for assembling operator diagonal or point block diagonal
246 
247   @param[in]  op            CeedOperator to assemble point block diagonal
248   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
249   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
250   @param[out] assembled     CeedVector to store assembled diagonal
251 
252   @return An error code: 0 - success, otherwise - failure
253 
254   @ref Developer
255 **/
256 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
257   Ceed ceed;
258   CeedCall(CeedOperatorGetCeed(op, &ceed));
259 
260   // Assemble QFunction
261   CeedQFunction qf;
262   CeedCall(CeedOperatorGetQFunction(op, &qf));
263   CeedInt num_input_fields, num_output_fields;
264   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
265   CeedVector          assembled_qf;
266   CeedElemRestriction rstr;
267   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr, request));
268   CeedInt layout[3];
269   CeedCall(CeedElemRestrictionGetELayout(rstr, &layout));
270   CeedCall(CeedElemRestrictionDestroy(&rstr));
271 
272   // Get assembly data
273   CeedOperatorAssemblyData data;
274   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
275   const CeedEvalMode *eval_mode_in, *eval_mode_out;
276   CeedInt             num_eval_mode_in, num_eval_mode_out;
277   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
278   CeedBasis basis_in, basis_out;
279   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
280   CeedInt num_comp;
281   CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
282 
283   // Assemble point block diagonal restriction, if needed
284   CeedElemRestriction diag_rstr;
285   CeedCall(CeedOperatorGetActiveElemRestriction(op, &diag_rstr));
286   if (is_pointblock) {
287     CeedElemRestriction point_block_rstr;
288     CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_rstr, &point_block_rstr));
289     diag_rstr = point_block_rstr;
290   }
291 
292   // Create diagonal vector
293   CeedVector elem_diag;
294   CeedCall(CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag));
295 
296   // Assemble element operator diagonals
297   CeedScalar       *elem_diag_array;
298   const CeedScalar *assembled_qf_array;
299   CeedCall(CeedVectorSetValue(elem_diag, 0.0));
300   CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
301   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
302   CeedInt num_elem, num_nodes, num_qpts;
303   CeedCall(CeedElemRestrictionGetNumElements(diag_rstr, &num_elem));
304   CeedCall(CeedBasisGetNumNodes(basis_in, &num_nodes));
305   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
306 
307   // Basis matrices
308   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
309   CeedScalar       *identity      = NULL;
310   bool              has_eval_none = false;
311   for (CeedInt i = 0; i < num_eval_mode_in; i++) {
312     has_eval_none = has_eval_none || (eval_mode_in[i] == CEED_EVAL_NONE);
313   }
314   for (CeedInt i = 0; i < num_eval_mode_out; i++) {
315     has_eval_none = has_eval_none || (eval_mode_out[i] == CEED_EVAL_NONE);
316   }
317   if (has_eval_none) {
318     CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
319     for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
320   }
321   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
322   CeedCall(CeedBasisGetInterp(basis_out, &interp_out));
323   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
324   CeedCall(CeedBasisGetGrad(basis_out, &grad_out));
325   // Compute the diagonal of B^T D B
326   // Each element
327   for (CeedInt e = 0; e < num_elem; e++) {
328     CeedInt d_out = -1;
329     // Each basis eval mode pair
330     for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
331       const CeedScalar *bt = NULL;
332       if (eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out += 1;
333       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * num_nodes], &bt);
334       CeedInt d_in = -1;
335       for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
336         const CeedScalar *b = NULL;
337         if (eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in += 1;
338         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * num_nodes], &b);
339         // Each component
340         for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
341           // Each qpoint/node pair
342           for (CeedInt q = 0; q < num_qpts; q++) {
343             if (is_pointblock) {
344               // Point Block Diagonal
345               for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
346                 const CeedScalar qf_value =
347                     assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_in) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
348                                        e * layout[2]];
349                 for (CeedInt n = 0; n < num_nodes; n++) {
350                   elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
351                       bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
352                 }
353               }
354             } else {
355               // Diagonal Only
356               const CeedScalar qf_value =
357                   assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_out) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
358                                      e * layout[2]];
359               for (CeedInt n = 0; n < num_nodes; n++) {
360                 elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
361               }
362             }
363           }
364         }
365       }
366     }
367   }
368   CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
369   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
370 
371   // Assemble local operator diagonal
372   CeedCall(CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
373 
374   // Cleanup
375   if (is_pointblock) {
376     CeedCall(CeedElemRestrictionDestroy(&diag_rstr));
377   }
378   CeedCall(CeedVectorDestroy(&assembled_qf));
379   CeedCall(CeedVectorDestroy(&elem_diag));
380   CeedCall(CeedFree(&identity));
381 
382   return CEED_ERROR_SUCCESS;
383 }
384 
385 /**
386   @brief Core logic for assembling composite operator diagonal
387 
388   @param[in]  op            CeedOperator to assemble point block diagonal
389   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
390   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
391   @param[out] assembled     CeedVector to store assembled diagonal
392 
393   @return An error code: 0 - success, otherwise - failure
394 
395   @ref Developer
396 **/
397 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
398                                                                  CeedVector assembled) {
399   CeedInt       num_sub;
400   CeedOperator *suboperators;
401   CeedCall(CeedOperatorGetNumSub(op, &num_sub));
402   CeedCall(CeedOperatorGetSubList(op, &suboperators));
403   for (CeedInt i = 0; i < num_sub; i++) {
404     if (is_pointblock) {
405       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
406     } else {
407       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
408     }
409   }
410   return CEED_ERROR_SUCCESS;
411 }
412 
413 /**
414   @brief Build nonzero pattern for non-composite operator
415 
416   Users should generally use CeedOperatorLinearAssembleSymbolic()
417 
418   @param[in]  op     CeedOperator to assemble nonzero pattern
419   @param[in]  offset Offset for number of entries
420   @param[out] rows   Row number for each entry
421   @param[out] cols   Column number for each entry
422 
423   @return An error code: 0 - success, otherwise - failure
424 
425   @ref Developer
426 **/
427 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
428   Ceed ceed = op->ceed;
429   if (op->is_composite) {
430     // LCOV_EXCL_START
431     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
432     // LCOV_EXCL_STOP
433   }
434 
435   CeedSize num_nodes;
436   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
437   CeedElemRestriction rstr_in;
438   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
439   CeedInt num_elem, elem_size, num_comp;
440   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
441   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
442   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
443   CeedInt layout_er[3];
444   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
445 
446   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
447 
448   // Determine elem_dof relation
449   CeedVector index_vec;
450   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
451   CeedScalar *array;
452   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
453   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
454   CeedCall(CeedVectorRestoreArray(index_vec, &array));
455   CeedVector elem_dof;
456   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
457   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
458   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
459   const CeedScalar *elem_dof_a;
460   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
461   CeedCall(CeedVectorDestroy(&index_vec));
462 
463   // Determine i, j locations for element matrices
464   CeedInt count = 0;
465   for (CeedInt e = 0; e < num_elem; e++) {
466     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
467       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
468         for (CeedInt i = 0; i < elem_size; i++) {
469           for (CeedInt j = 0; j < elem_size; j++) {
470             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
471             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
472 
473             const CeedInt row = elem_dof_a[elem_dof_index_row];
474             const CeedInt col = elem_dof_a[elem_dof_index_col];
475 
476             rows[offset + count] = row;
477             cols[offset + count] = col;
478             count++;
479           }
480         }
481       }
482     }
483   }
484   if (count != local_num_entries) {
485     // LCOV_EXCL_START
486     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
487     // LCOV_EXCL_STOP
488   }
489   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
490   CeedCall(CeedVectorDestroy(&elem_dof));
491 
492   return CEED_ERROR_SUCCESS;
493 }
494 
495 /**
496   @brief Assemble nonzero entries for non-composite operator
497 
498   Users should generally use CeedOperatorLinearAssemble()
499 
500   @param[in]  op     CeedOperator to assemble
501   @param[in]  offset Offset for number of entries
502   @param[out] values Values to assemble into matrix
503 
504   @return An error code: 0 - success, otherwise - failure
505 
506   @ref Developer
507 **/
508 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
509   Ceed ceed = op->ceed;
510   if (op->is_composite) {
511     // LCOV_EXCL_START
512     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
513     // LCOV_EXCL_STOP
514   }
515   if (op->num_elem == 0) return CEED_ERROR_SUCCESS;
516 
517   if (op->LinearAssembleSingle) {
518     // Backend version
519     CeedCall(op->LinearAssembleSingle(op, offset, values));
520     return CEED_ERROR_SUCCESS;
521   } else {
522     // Operator fallback
523     CeedOperator op_fallback;
524 
525     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
526     if (op_fallback) {
527       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
528       return CEED_ERROR_SUCCESS;
529     }
530   }
531 
532   // Assemble QFunction
533   CeedQFunction qf;
534   CeedCall(CeedOperatorGetQFunction(op, &qf));
535   CeedVector          assembled_qf;
536   CeedElemRestriction rstr_q;
537   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
538   CeedSize qf_length;
539   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
540 
541   CeedInt            num_input_fields, num_output_fields;
542   CeedOperatorField *input_fields;
543   CeedOperatorField *output_fields;
544   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
545 
546   // Get assembly data
547   CeedOperatorAssemblyData data;
548   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
549   const CeedEvalMode *eval_mode_in, *eval_mode_out;
550   CeedInt             num_eval_mode_in, num_eval_mode_out;
551   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
552   CeedBasis basis_in, basis_out;
553   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
554 
555   if (num_eval_mode_in == 0 || num_eval_mode_out == 0) {
556     // LCOV_EXCL_START
557     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
558     // LCOV_EXCL_STOP
559   }
560 
561   CeedElemRestriction active_rstr;
562   CeedInt             num_elem, elem_size, num_qpts, num_comp;
563   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
564   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
565   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
566   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
567   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
568 
569   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
570 
571   // loop over elements and put in data structure
572   const CeedScalar *interp_in, *grad_in;
573   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
574   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
575 
576   const CeedScalar *assembled_qf_array;
577   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
578 
579   CeedInt layout_qf[3];
580   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
581   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
582 
583   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
584   const CeedScalar *B_mat_in, *B_mat_out;
585   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &B_mat_in, NULL, &B_mat_out));
586   CeedScalar  BTD_mat[elem_size * num_qpts * num_eval_mode_in];
587   CeedScalar  elem_mat[elem_size * elem_size];
588   CeedInt     count = 0;
589   CeedScalar *vals;
590   CeedCall(CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals));
591   for (CeedInt e = 0; e < num_elem; e++) {
592     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
593       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
594         // Compute B^T*D
595         for (CeedInt n = 0; n < elem_size; n++) {
596           for (CeedInt q = 0; q < num_qpts; q++) {
597             for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
598               const CeedInt btd_index = n * (num_qpts * num_eval_mode_in) + (num_eval_mode_in * q + e_in);
599               CeedScalar    sum       = 0.0;
600               for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
601                 const CeedInt b_out_index     = (num_eval_mode_out * q + e_out) * elem_size + n;
602                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_mode_out + e_out) * num_comp + comp_out;
603                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
604                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
605               }
606               BTD_mat[btd_index] = sum;
607             }
608           }
609         }
610         // form element matrix itself (for each block component)
611         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_mode_in));
612 
613         // put element matrix in coordinate data structure
614         for (CeedInt i = 0; i < elem_size; i++) {
615           for (CeedInt j = 0; j < elem_size; j++) {
616             vals[offset + count] = elem_mat[i * elem_size + j];
617             count++;
618           }
619         }
620       }
621     }
622   }
623   if (count != local_num_entries) {
624     // LCOV_EXCL_START
625     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
626     // LCOV_EXCL_STOP
627   }
628   CeedCall(CeedVectorRestoreArray(values, &vals));
629 
630   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
631   CeedCall(CeedVectorDestroy(&assembled_qf));
632 
633   return CEED_ERROR_SUCCESS;
634 }
635 
636 /**
637   @brief Count number of entries for assembled CeedOperator
638 
639   @param[in]  op          CeedOperator to assemble
640   @param[out] num_entries Number of entries in assembled representation
641 
642   @return An error code: 0 - success, otherwise - failure
643 
644   @ref Utility
645 **/
646 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
647   CeedElemRestriction rstr;
648   CeedInt             num_elem, elem_size, num_comp;
649 
650   if (op->is_composite) {
651     // LCOV_EXCL_START
652     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
653     // LCOV_EXCL_STOP
654   }
655   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
656   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
657   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
658   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
659   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
660 
661   return CEED_ERROR_SUCCESS;
662 }
663 
664 /**
665   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
666 
667   @param[in]  op_fine      Fine grid operator
668   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
669   @param[in]  rstr_coarse  Coarse grid restriction
670   @param[in]  basis_coarse Coarse grid active vector basis
671   @param[in]  basis_c_to_f Basis for coarse to fine interpolation
672   @param[out] op_coarse    Coarse grid operator
673   @param[out] op_prolong   Coarse to fine operator
674   @param[out] op_restrict  Fine to coarse operator
675 
676   @return An error code: 0 - success, otherwise - failure
677 
678   @ref Developer
679 **/
680 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
681                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
682   Ceed ceed;
683   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
684 
685   // Check for composite operator
686   bool is_composite;
687   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
688   if (is_composite) {
689     // LCOV_EXCL_START
690     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
691     // LCOV_EXCL_STOP
692   }
693 
694   // Coarse Grid
695   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
696   CeedElemRestriction rstr_fine = NULL;
697   // -- Clone input fields
698   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
699     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
700       rstr_fine = op_fine->input_fields[i]->elem_restr;
701       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
702     } else {
703       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_restr,
704                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
705     }
706   }
707   // -- Clone output fields
708   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
709     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
710       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
711     } else {
712       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_restr,
713                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
714     }
715   }
716   // -- Clone QFunctionAssemblyData
717   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
718 
719   // Multiplicity vector
720   CeedVector mult_vec, mult_e_vec;
721   CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
722   CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
723   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
724   CeedCall(CeedVectorSetValue(mult_vec, 0.0));
725   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
726   CeedCall(CeedVectorDestroy(&mult_e_vec));
727   CeedCall(CeedVectorReciprocal(mult_vec));
728 
729   // Restriction
730   CeedInt num_comp;
731   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
732   CeedQFunction qf_restrict;
733   CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
734   CeedInt *num_comp_r_data;
735   CeedCall(CeedCalloc(1, &num_comp_r_data));
736   num_comp_r_data[0] = num_comp;
737   CeedQFunctionContext ctx_r;
738   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
739   CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
740   CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
741   CeedCall(CeedQFunctionContextDestroy(&ctx_r));
742   CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
743   CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
744   CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
745   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
746 
747   CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
748   CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
749   CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
750   CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
751 
752   // Prolongation
753   CeedQFunction qf_prolong;
754   CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
755   CeedInt *num_comp_p_data;
756   CeedCall(CeedCalloc(1, &num_comp_p_data));
757   num_comp_p_data[0] = num_comp;
758   CeedQFunctionContext ctx_p;
759   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
760   CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
761   CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
762   CeedCall(CeedQFunctionContextDestroy(&ctx_p));
763   CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
764   CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
765   CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
766   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
767 
768   CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
769   CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
770   CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
771   CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
772 
773   // Clone name
774   bool   has_name = op_fine->name;
775   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
776   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
777   {
778     char *prolongation_name;
779     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
780     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
781     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
782     CeedCall(CeedFree(&prolongation_name));
783   }
784   {
785     char *restriction_name;
786     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
787     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
788     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
789     CeedCall(CeedFree(&restriction_name));
790   }
791 
792   // Check
793   CeedCall(CeedOperatorCheckReady(*op_coarse));
794   CeedCall(CeedOperatorCheckReady(*op_prolong));
795   CeedCall(CeedOperatorCheckReady(*op_restrict));
796 
797   // Cleanup
798   CeedCall(CeedVectorDestroy(&mult_vec));
799   CeedCall(CeedBasisDestroy(&basis_c_to_f));
800   CeedCall(CeedQFunctionDestroy(&qf_restrict));
801   CeedCall(CeedQFunctionDestroy(&qf_prolong));
802 
803   return CEED_ERROR_SUCCESS;
804 }
805 
806 /**
807   @brief Build 1D mass matrix and Laplacian with perturbation
808 
809   @param[in]  interp_1d   Interpolation matrix in one dimension
810   @param[in]  grad_1d     Gradient matrix in one dimension
811   @param[in]  q_weight_1d Quadrature weights in one dimension
812   @param[in]  P_1d        Number of basis nodes in one dimension
813   @param[in]  Q_1d        Number of quadrature points in one dimension
814   @param[in]  dim         Dimension of basis
815   @param[out] mass        Assembled mass matrix in one dimension
816   @param[out] laplace     Assembled perturbed Laplacian in one dimension
817 
818   @return An error code: 0 - success, otherwise - failure
819 
820   @ref Developer
821 **/
822 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d,
823                                                       CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
824   for (CeedInt i = 0; i < P_1d; i++) {
825     for (CeedInt j = 0; j < P_1d; j++) {
826       CeedScalar sum = 0.0;
827       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
828       mass[i + j * P_1d] = sum;
829     }
830   }
831   // -- Laplacian
832   for (CeedInt i = 0; i < P_1d; i++) {
833     for (CeedInt j = 0; j < P_1d; j++) {
834       CeedScalar sum = 0.0;
835       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
836       laplace[i + j * P_1d] = sum;
837     }
838   }
839   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
840   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
841   return CEED_ERROR_SUCCESS;
842 }
843 CeedPragmaOptimizeOn;
844 
845 /// @}
846 
847 /// ----------------------------------------------------------------------------
848 /// CeedOperator Backend API
849 /// ----------------------------------------------------------------------------
850 /// @addtogroup CeedOperatorBackend
851 /// @{
852 
853 /**
854   @brief Create object holding CeedQFunction assembly data for CeedOperator
855 
856   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
857   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
858 
859   @return An error code: 0 - success, otherwise - failure
860 
861   @ref Backend
862 **/
863 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
864   CeedCall(CeedCalloc(1, data));
865   (*data)->ref_count = 1;
866   (*data)->ceed      = ceed;
867   CeedCall(CeedReference(ceed));
868 
869   return CEED_ERROR_SUCCESS;
870 }
871 
872 /**
873   @brief Increment the reference counter for a CeedQFunctionAssemblyData
874 
875   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
876 
877   @return An error code: 0 - success, otherwise - failure
878 
879   @ref Backend
880 **/
881 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
882   data->ref_count++;
883   return CEED_ERROR_SUCCESS;
884 }
885 
886 /**
887   @brief Set re-use of CeedQFunctionAssemblyData
888 
889   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
890   @param[in]     reuse_data Boolean flag indicating data re-use
891 
892   @return An error code: 0 - success, otherwise - failure
893 
894   @ref Backend
895 **/
896 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
897   data->reuse_data        = reuse_data;
898   data->needs_data_update = true;
899   return CEED_ERROR_SUCCESS;
900 }
901 
902 /**
903   @brief Mark QFunctionAssemblyData as stale
904 
905   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
906   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
907 
908   @return An error code: 0 - success, otherwise - failure
909 
910   @ref Backend
911 **/
912 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
913   data->needs_data_update = needs_data_update;
914   return CEED_ERROR_SUCCESS;
915 }
916 
917 /**
918   @brief Determine if QFunctionAssemblyData needs update
919 
920   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
921   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
922 
923   @return An error code: 0 - success, otherwise - failure
924 
925   @ref Backend
926 **/
927 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
928   *is_update_needed = !data->reuse_data || data->needs_data_update;
929   return CEED_ERROR_SUCCESS;
930 }
931 
932 /**
933   @brief Copy the pointer to a CeedQFunctionAssemblyData.
934            Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
935            Note: If `*data_copy` is non-NULL, then it is assumed that `*data_copy` is a pointer to a CeedQFunctionAssemblyData.
936              This CeedQFunctionAssemblyData will be destroyed if `*data_copy` is the only reference to this CeedQFunctionAssemblyData.
937 
938   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
939   @param[in,out] data_copy Variable to store copied reference
940 
941   @return An error code: 0 - success, otherwise - failure
942 
943   @ref Backend
944 **/
945 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
946   CeedCall(CeedQFunctionAssemblyDataReference(data));
947   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
948   *data_copy = data;
949   return CEED_ERROR_SUCCESS;
950 }
951 
952 /**
953   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
954 
955   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
956   @param[out] is_setup Boolean flag for setup status
957 
958   @return An error code: 0 - success, otherwise - failure
959 
960   @ref Backend
961 **/
962 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
963   *is_setup = data->is_setup;
964   return CEED_ERROR_SUCCESS;
965 }
966 
967 /**
968   @brief Set internal objects for CeedQFunctionAssemblyData
969 
970   @param[in,out] data CeedQFunctionAssemblyData to set objects
971   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
972   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
973 
974   @return An error code: 0 - success, otherwise - failure
975 
976   @ref Backend
977 **/
978 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
979   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
980   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
981 
982   data->is_setup = true;
983   return CEED_ERROR_SUCCESS;
984 }
985 
986 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
987   if (!data->is_setup) {
988     // LCOV_EXCL_START
989     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
990     // LCOV_EXCL_STOP
991   }
992 
993   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
994   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
995 
996   return CEED_ERROR_SUCCESS;
997 }
998 
999 /**
1000   @brief Destroy CeedQFunctionAssemblyData
1001 
1002   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1003 
1004   @return An error code: 0 - success, otherwise - failure
1005 
1006   @ref Backend
1007 **/
1008 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1009   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1010 
1011   CeedCall(CeedDestroy(&(*data)->ceed));
1012   CeedCall(CeedVectorDestroy(&(*data)->vec));
1013   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1014 
1015   CeedCall(CeedFree(data));
1016   return CEED_ERROR_SUCCESS;
1017 }
1018 
1019 /**
1020   @brief Get CeedOperatorAssemblyData
1021 
1022   @param[in]  op   CeedOperator to assemble
1023   @param[out] data CeedQFunctionAssemblyData
1024 
1025   @return An error code: 0 - success, otherwise - failure
1026 
1027   @ref Backend
1028 **/
1029 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1030   if (!op->op_assembled) {
1031     CeedOperatorAssemblyData data;
1032 
1033     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1034     op->op_assembled = data;
1035   }
1036   *data = op->op_assembled;
1037 
1038   return CEED_ERROR_SUCCESS;
1039 }
1040 
1041 /**
1042   @brief Create object holding CeedOperator assembly data
1043 
1044   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1045   @param[in]  op   CeedOperator to be assembled
1046   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1047 
1048   @return An error code: 0 - success, otherwise - failure
1049 
1050   @ref Backend
1051 **/
1052 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1053   CeedCall(CeedCalloc(1, data));
1054   (*data)->ceed = ceed;
1055   CeedCall(CeedReference(ceed));
1056 
1057   // Build OperatorAssembly data
1058   CeedQFunction       qf;
1059   CeedQFunctionField *qf_fields;
1060   CeedOperatorField  *op_fields;
1061   CeedInt             num_input_fields;
1062   CeedCall(CeedOperatorGetQFunction(op, &qf));
1063   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1064   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1065 
1066   // Determine active input basis
1067   CeedInt       num_eval_mode_in = 0, dim = 1;
1068   CeedEvalMode *eval_mode_in = NULL;
1069   CeedBasis     basis_in     = NULL;
1070   for (CeedInt i = 0; i < num_input_fields; i++) {
1071     CeedVector vec;
1072     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1073     if (vec == CEED_VECTOR_ACTIVE) {
1074       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1075       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1076       CeedEvalMode eval_mode;
1077       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1078       switch (eval_mode) {
1079         case CEED_EVAL_NONE:
1080         case CEED_EVAL_INTERP:
1081           CeedCall(CeedRealloc(num_eval_mode_in + 1, &eval_mode_in));
1082           eval_mode_in[num_eval_mode_in] = eval_mode;
1083           num_eval_mode_in += 1;
1084           break;
1085         case CEED_EVAL_GRAD:
1086           CeedCall(CeedRealloc(num_eval_mode_in + dim, &eval_mode_in));
1087           for (CeedInt d = 0; d < dim; d++) {
1088             eval_mode_in[num_eval_mode_in + d] = eval_mode;
1089           }
1090           num_eval_mode_in += dim;
1091           break;
1092         case CEED_EVAL_WEIGHT:
1093         case CEED_EVAL_DIV:
1094         case CEED_EVAL_CURL:
1095           break;  // Caught by QF Assembly
1096       }
1097     }
1098   }
1099   (*data)->num_eval_mode_in = num_eval_mode_in;
1100   (*data)->eval_mode_in     = eval_mode_in;
1101   CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->basis_in));
1102 
1103   // Determine active output basis
1104   CeedInt num_output_fields;
1105   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1106   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1107   CeedInt       num_eval_mode_out = 0;
1108   CeedEvalMode *eval_mode_out     = NULL;
1109   CeedBasis     basis_out         = NULL;
1110   for (CeedInt i = 0; i < num_output_fields; i++) {
1111     CeedVector vec;
1112     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1113     if (vec == CEED_VECTOR_ACTIVE) {
1114       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1115       CeedEvalMode eval_mode;
1116       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1117       switch (eval_mode) {
1118         case CEED_EVAL_NONE:
1119         case CEED_EVAL_INTERP:
1120           CeedCall(CeedRealloc(num_eval_mode_out + 1, &eval_mode_out));
1121           eval_mode_out[num_eval_mode_out] = eval_mode;
1122           num_eval_mode_out += 1;
1123           break;
1124         case CEED_EVAL_GRAD:
1125           CeedCall(CeedRealloc(num_eval_mode_out + dim, &eval_mode_out));
1126           for (CeedInt d = 0; d < dim; d++) {
1127             eval_mode_out[num_eval_mode_out + d] = eval_mode;
1128           }
1129           num_eval_mode_out += dim;
1130           break;
1131         case CEED_EVAL_WEIGHT:
1132         case CEED_EVAL_DIV:
1133         case CEED_EVAL_CURL:
1134           break;  // Caught by QF Assembly
1135       }
1136     }
1137   }
1138   (*data)->num_eval_mode_out = num_eval_mode_out;
1139   (*data)->eval_mode_out     = eval_mode_out;
1140   CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->basis_out));
1141 
1142   return CEED_ERROR_SUCCESS;
1143 }
1144 
1145 /**
1146   @brief Get CeedOperator CeedEvalModes for assembly
1147 
1148   @param[in]  data              CeedOperatorAssemblyData
1149   @param[out] num_eval_mode_in  Pointer to hold number of input CeedEvalModes, or NULL
1150   @param[out] eval_mode_in      Pointer to hold input CeedEvalModes, or NULL
1151   @param[out] num_eval_mode_out Pointer to hold number of output CeedEvalModes, or NULL
1152   @param[out] eval_mode_out     Pointer to hold output CeedEvalModes, or NULL
1153 
1154   @return An error code: 0 - success, otherwise - failure
1155 
1156   @ref Backend
1157 **/
1158 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_eval_mode_in, const CeedEvalMode **eval_mode_in,
1159                                          CeedInt *num_eval_mode_out, const CeedEvalMode **eval_mode_out) {
1160   if (num_eval_mode_in) *num_eval_mode_in = data->num_eval_mode_in;
1161   if (eval_mode_in) *eval_mode_in = data->eval_mode_in;
1162   if (num_eval_mode_out) *num_eval_mode_out = data->num_eval_mode_out;
1163   if (eval_mode_out) *eval_mode_out = data->eval_mode_out;
1164 
1165   return CEED_ERROR_SUCCESS;
1166 }
1167 
1168 /**
1169   @brief Get CeedOperator CeedBasis data for assembly
1170 
1171   @param[in]  data      CeedOperatorAssemblyData
1172   @param[out] basis_in  Pointer to hold active input CeedBasis, or NULL
1173   @param[out] B_in      Pointer to hold assembled active input B, or NULL
1174   @param[out] basis_out Pointer to hold active output CeedBasis, or NULL
1175   @param[out] B_out     Pointer to hold assembled active output B, or NULL
1176 
1177   @return An error code: 0 - success, otherwise - failure
1178 
1179   @ref Backend
1180 **/
1181 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedBasis *basis_in, const CeedScalar **B_in, CeedBasis *basis_out,
1182                                      const CeedScalar **B_out) {
1183   // Assemble B_in, B_out if needed
1184   if (B_in && !data->B_in) {
1185     CeedInt           num_qpts, elem_size;
1186     CeedScalar       *B_in, *identity = NULL;
1187     const CeedScalar *interp_in, *grad_in;
1188     bool              has_eval_none = false;
1189 
1190     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_in, &num_qpts));
1191     CeedCall(CeedBasisGetNumNodes(data->basis_in, &elem_size));
1192     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_in, &B_in));
1193 
1194     for (CeedInt i = 0; i < data->num_eval_mode_in; i++) {
1195       has_eval_none = has_eval_none || (data->eval_mode_in[i] == CEED_EVAL_NONE);
1196     }
1197     if (has_eval_none) {
1198       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1199       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1200         identity[i * elem_size + i] = 1.0;
1201       }
1202     }
1203     CeedCall(CeedBasisGetInterp(data->basis_in, &interp_in));
1204     CeedCall(CeedBasisGetGrad(data->basis_in, &grad_in));
1205 
1206     for (CeedInt q = 0; q < num_qpts; q++) {
1207       for (CeedInt n = 0; n < elem_size; n++) {
1208         CeedInt d_in = -1;
1209         for (CeedInt e_in = 0; e_in < data->num_eval_mode_in; e_in++) {
1210           const CeedInt     qq = data->num_eval_mode_in * q;
1211           const CeedScalar *b  = NULL;
1212 
1213           if (data->eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in++;
1214           CeedOperatorGetBasisPointer(data->eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &b);
1215           B_in[(qq + e_in) * elem_size + n] = b[q * elem_size + n];
1216         }
1217       }
1218     }
1219     data->B_in = B_in;
1220   }
1221 
1222   if (B_out && !data->B_out) {
1223     CeedInt           num_qpts, elem_size;
1224     CeedScalar       *B_out, *identity = NULL;
1225     const CeedScalar *interp_out, *grad_out;
1226     bool              has_eval_none = false;
1227 
1228     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_out, &num_qpts));
1229     CeedCall(CeedBasisGetNumNodes(data->basis_out, &elem_size));
1230     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_out, &B_out));
1231 
1232     for (CeedInt i = 0; i < data->num_eval_mode_out; i++) {
1233       has_eval_none = has_eval_none || (data->eval_mode_out[i] == CEED_EVAL_NONE);
1234     }
1235     if (has_eval_none) {
1236       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1237       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1238         identity[i * elem_size + i] = 1.0;
1239       }
1240     }
1241     CeedCall(CeedBasisGetInterp(data->basis_out, &interp_out));
1242     CeedCall(CeedBasisGetGrad(data->basis_out, &grad_out));
1243 
1244     for (CeedInt q = 0; q < num_qpts; q++) {
1245       for (CeedInt n = 0; n < elem_size; n++) {
1246         CeedInt d_out = -1;
1247         for (CeedInt e_out = 0; e_out < data->num_eval_mode_out; e_out++) {
1248           const CeedInt     qq = data->num_eval_mode_out * q;
1249           const CeedScalar *b  = NULL;
1250 
1251           if (data->eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out++;
1252           CeedOperatorGetBasisPointer(data->eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &b);
1253           B_out[(qq + e_out) * elem_size + n] = b[q * elem_size + n];
1254         }
1255       }
1256     }
1257     data->B_out = B_out;
1258   }
1259 
1260   if (basis_in) *basis_in = data->basis_in;
1261   if (B_in) *B_in = data->B_in;
1262   if (basis_out) *basis_out = data->basis_out;
1263   if (B_out) *B_out = data->B_out;
1264 
1265   return CEED_ERROR_SUCCESS;
1266 }
1267 
1268 /**
1269   @brief Destroy CeedOperatorAssemblyData
1270 
1271   @param[in,out] data CeedOperatorAssemblyData to destroy
1272 
1273   @return An error code: 0 - success, otherwise - failure
1274 
1275   @ref Backend
1276 **/
1277 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1278   if (!*data) return CEED_ERROR_SUCCESS;
1279 
1280   CeedCall(CeedDestroy(&(*data)->ceed));
1281   CeedCall(CeedBasisDestroy(&(*data)->basis_in));
1282   CeedCall(CeedBasisDestroy(&(*data)->basis_out));
1283   CeedCall(CeedFree(&(*data)->eval_mode_in));
1284   CeedCall(CeedFree(&(*data)->eval_mode_out));
1285   CeedCall(CeedFree(&(*data)->B_in));
1286   CeedCall(CeedFree(&(*data)->B_out));
1287 
1288   CeedCall(CeedFree(data));
1289   return CEED_ERROR_SUCCESS;
1290 }
1291 
1292 /// @}
1293 
1294 /// ----------------------------------------------------------------------------
1295 /// CeedOperator Public API
1296 /// ----------------------------------------------------------------------------
1297 /// @addtogroup CeedOperatorUser
1298 /// @{
1299 
1300 /**
1301   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1302 
1303   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1304     The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices
1305 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the
1306 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1307 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1308 and producing the output [dv_0, dv_1, v].
1309 
1310   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1311 
1312   @param[in]  op        CeedOperator to assemble CeedQFunction
1313   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1314   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1315   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1316 
1317   @return An error code: 0 - success, otherwise - failure
1318 
1319   @ref User
1320 **/
1321 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1322   CeedCall(CeedOperatorCheckReady(op));
1323 
1324   if (op->LinearAssembleQFunction) {
1325     // Backend version
1326     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1327   } else {
1328     // Operator fallback
1329     CeedOperator op_fallback;
1330 
1331     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1332     if (op_fallback) {
1333       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1334     } else {
1335       // LCOV_EXCL_START
1336       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1337       // LCOV_EXCL_STOP
1338     }
1339   }
1340   return CEED_ERROR_SUCCESS;
1341 }
1342 
1343 /**
1344   @brief Assemble CeedQFunction and store result internally.
1345            Return copied references of stored data to the caller.
1346            Caller is responsible for ownership and destruction of the copied references.
1347            See also @ref CeedOperatorLinearAssembleQFunction
1348 
1349   @param[in]  op        CeedOperator to assemble CeedQFunction
1350   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1351   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1352   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1353 
1354   @return An error code: 0 - success, otherwise - failure
1355 
1356   @ref User
1357 **/
1358 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1359   CeedCall(CeedOperatorCheckReady(op));
1360 
1361   if (op->LinearAssembleQFunctionUpdate) {
1362     // Backend version
1363     bool                qf_assembled_is_setup;
1364     CeedVector          assembled_vec  = NULL;
1365     CeedElemRestriction assembled_rstr = NULL;
1366 
1367     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1368     if (qf_assembled_is_setup) {
1369       bool update_needed;
1370 
1371       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1372       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1373       if (update_needed) {
1374         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1375       }
1376     } else {
1377       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1378       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1379     }
1380     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1381 
1382     // Copy reference from internally held copy
1383     *assembled = NULL;
1384     *rstr      = NULL;
1385     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1386     CeedCall(CeedVectorDestroy(&assembled_vec));
1387     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1388     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1389   } else {
1390     // Operator fallback
1391     CeedOperator op_fallback;
1392 
1393     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1394     if (op_fallback) {
1395       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1396     } else {
1397       // LCOV_EXCL_START
1398       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1399       // LCOV_EXCL_STOP
1400     }
1401   }
1402 
1403   return CEED_ERROR_SUCCESS;
1404 }
1405 
1406 /**
1407   @brief Assemble the diagonal of a square linear CeedOperator
1408 
1409   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1410 
1411   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1412 
1413   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1414 
1415   @param[in]  op        CeedOperator to assemble CeedQFunction
1416   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1417   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1418 
1419   @return An error code: 0 - success, otherwise - failure
1420 
1421   @ref User
1422 **/
1423 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1424   CeedCall(CeedOperatorCheckReady(op));
1425 
1426   CeedSize input_size = 0, output_size = 0;
1427   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1428   if (input_size != output_size) {
1429     // LCOV_EXCL_START
1430     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1431     // LCOV_EXCL_STOP
1432   }
1433 
1434   if (op->LinearAssembleDiagonal) {
1435     // Backend version
1436     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1437     return CEED_ERROR_SUCCESS;
1438   } else if (op->LinearAssembleAddDiagonal) {
1439     // Backend version with zeroing first
1440     CeedCall(CeedVectorSetValue(assembled, 0.0));
1441     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1442     return CEED_ERROR_SUCCESS;
1443   } else {
1444     // Operator fallback
1445     CeedOperator op_fallback;
1446 
1447     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1448     if (op_fallback) {
1449       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1450       return CEED_ERROR_SUCCESS;
1451     }
1452   }
1453   // Default interface implementation
1454   CeedCall(CeedVectorSetValue(assembled, 0.0));
1455   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1456 
1457   return CEED_ERROR_SUCCESS;
1458 }
1459 
1460 /**
1461   @brief Assemble the diagonal of a square linear CeedOperator
1462 
1463   This sums into a CeedVector the diagonal of a linear CeedOperator.
1464 
1465   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1466 
1467   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1468 
1469   @param[in]  op        CeedOperator to assemble CeedQFunction
1470   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1471   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1472 
1473   @return An error code: 0 - success, otherwise - failure
1474 
1475   @ref User
1476 **/
1477 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1478   CeedCall(CeedOperatorCheckReady(op));
1479 
1480   CeedSize input_size = 0, output_size = 0;
1481   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1482   if (input_size != output_size) {
1483     // LCOV_EXCL_START
1484     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1485     // LCOV_EXCL_STOP
1486   }
1487 
1488   if (op->LinearAssembleAddDiagonal) {
1489     // Backend version
1490     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1491     return CEED_ERROR_SUCCESS;
1492   } else {
1493     // Operator fallback
1494     CeedOperator op_fallback;
1495 
1496     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1497     if (op_fallback) {
1498       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1499       return CEED_ERROR_SUCCESS;
1500     }
1501   }
1502   // Default interface implementation
1503   bool is_composite;
1504   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1505   if (is_composite) {
1506     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1507   } else {
1508     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1509   }
1510 
1511   return CEED_ERROR_SUCCESS;
1512 }
1513 
1514 /**
1515   @brief Assemble the point block diagonal of a square linear CeedOperator
1516 
1517   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1518 
1519   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1520 
1521   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1522 
1523   @param[in]  op        CeedOperator to assemble CeedQFunction
1524   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1525 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1526 component in].
1527   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1528 
1529   @return An error code: 0 - success, otherwise - failure
1530 
1531   @ref User
1532 **/
1533 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1534   CeedCall(CeedOperatorCheckReady(op));
1535 
1536   CeedSize input_size = 0, output_size = 0;
1537   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1538   if (input_size != output_size) {
1539     // LCOV_EXCL_START
1540     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1541     // LCOV_EXCL_STOP
1542   }
1543 
1544   if (op->LinearAssemblePointBlockDiagonal) {
1545     // Backend version
1546     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1547     return CEED_ERROR_SUCCESS;
1548   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1549     // Backend version with zeroing first
1550     CeedCall(CeedVectorSetValue(assembled, 0.0));
1551     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1552     return CEED_ERROR_SUCCESS;
1553   } else {
1554     // Operator fallback
1555     CeedOperator op_fallback;
1556 
1557     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1558     if (op_fallback) {
1559       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1560       return CEED_ERROR_SUCCESS;
1561     }
1562   }
1563   // Default interface implementation
1564   CeedCall(CeedVectorSetValue(assembled, 0.0));
1565   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1566 
1567   return CEED_ERROR_SUCCESS;
1568 }
1569 
1570 /**
1571   @brief Assemble the point block diagonal of a square linear CeedOperator
1572 
1573   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1574 
1575   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1576 
1577   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1578 
1579   @param[in]  op        CeedOperator to assemble CeedQFunction
1580   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1581 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1582 component in].
1583   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1584 
1585   @return An error code: 0 - success, otherwise - failure
1586 
1587   @ref User
1588 **/
1589 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1590   CeedCall(CeedOperatorCheckReady(op));
1591 
1592   CeedSize input_size = 0, output_size = 0;
1593   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1594   if (input_size != output_size) {
1595     // LCOV_EXCL_START
1596     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1597     // LCOV_EXCL_STOP
1598   }
1599 
1600   if (op->LinearAssembleAddPointBlockDiagonal) {
1601     // Backend version
1602     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1603     return CEED_ERROR_SUCCESS;
1604   } else {
1605     // Operator fallback
1606     CeedOperator op_fallback;
1607 
1608     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1609     if (op_fallback) {
1610       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1611       return CEED_ERROR_SUCCESS;
1612     }
1613   }
1614   // Default interface implementation
1615   bool is_composite;
1616   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1617   if (is_composite) {
1618     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1619   } else {
1620     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1621   }
1622 
1623   return CEED_ERROR_SUCCESS;
1624 }
1625 
1626 /**
1627    @brief Fully assemble the nonzero pattern of a linear operator.
1628 
1629    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1630 
1631    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1632 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1633 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1634 
1635    This will generally be slow unless your operator is low-order.
1636 
1637    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1638 
1639    @param[in]  op          CeedOperator to assemble
1640    @param[out] num_entries Number of entries in coordinate nonzero pattern
1641    @param[out] rows        Row number for each entry
1642    @param[out] cols        Column number for each entry
1643 
1644    @ref User
1645 **/
1646 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1647   CeedInt       num_suboperators, single_entries;
1648   CeedOperator *sub_operators;
1649   bool          is_composite;
1650   CeedCall(CeedOperatorCheckReady(op));
1651 
1652   if (op->LinearAssembleSymbolic) {
1653     // Backend version
1654     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1655     return CEED_ERROR_SUCCESS;
1656   } else {
1657     // Operator fallback
1658     CeedOperator op_fallback;
1659 
1660     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1661     if (op_fallback) {
1662       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1663       return CEED_ERROR_SUCCESS;
1664     }
1665   }
1666 
1667   // Default interface implementation
1668 
1669   // count entries and allocate rows, cols arrays
1670   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1671   *num_entries = 0;
1672   if (is_composite) {
1673     CeedCall(CeedOperatorGetNumSub(op, &num_suboperators));
1674     CeedCall(CeedOperatorGetSubList(op, &sub_operators));
1675     for (CeedInt k = 0; k < num_suboperators; ++k) {
1676       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1677       *num_entries += single_entries;
1678     }
1679   } else {
1680     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1681     *num_entries += single_entries;
1682   }
1683   CeedCall(CeedCalloc(*num_entries, rows));
1684   CeedCall(CeedCalloc(*num_entries, cols));
1685 
1686   // assemble nonzero locations
1687   CeedInt offset = 0;
1688   if (is_composite) {
1689     CeedCall(CeedOperatorGetNumSub(op, &num_suboperators));
1690     CeedCall(CeedOperatorGetSubList(op, &sub_operators));
1691     for (CeedInt k = 0; k < num_suboperators; ++k) {
1692       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1693       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1694       offset += single_entries;
1695     }
1696   } else {
1697     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1698   }
1699 
1700   return CEED_ERROR_SUCCESS;
1701 }
1702 
1703 /**
1704    @brief Fully assemble the nonzero entries of a linear operator.
1705 
1706    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1707 
1708    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1709 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1710 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1711 
1712    This will generally be slow unless your operator is low-order.
1713 
1714    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1715 
1716    @param[in]  op     CeedOperator to assemble
1717    @param[out] values Values to assemble into matrix
1718 
1719    @ref User
1720 **/
1721 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1722   CeedInt       num_suboperators, single_entries = 0;
1723   CeedOperator *sub_operators;
1724   CeedCall(CeedOperatorCheckReady(op));
1725 
1726   if (op->LinearAssemble) {
1727     // Backend version
1728     CeedCall(op->LinearAssemble(op, values));
1729     return CEED_ERROR_SUCCESS;
1730   } else {
1731     // Operator fallback
1732     CeedOperator op_fallback;
1733 
1734     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1735     if (op_fallback) {
1736       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
1737       return CEED_ERROR_SUCCESS;
1738     }
1739   }
1740 
1741   // Default interface implementation
1742   bool is_composite;
1743   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1744 
1745   CeedInt offset = 0;
1746   if (is_composite) {
1747     CeedCall(CeedOperatorGetNumSub(op, &num_suboperators));
1748     CeedCall(CeedOperatorGetSubList(op, &sub_operators));
1749     for (CeedInt k = 0; k < num_suboperators; k++) {
1750       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
1751       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1752       offset += single_entries;
1753     }
1754   } else {
1755     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
1756   }
1757 
1758   return CEED_ERROR_SUCCESS;
1759 }
1760 
1761 /**
1762   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
1763 grid interpolation
1764 
1765   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1766 
1767   @param[in]  op_fine      Fine grid operator
1768   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
1769   @param[in]  rstr_coarse  Coarse grid restriction
1770   @param[in]  basis_coarse Coarse grid active vector basis
1771   @param[out] op_coarse    Coarse grid operator
1772   @param[out] op_prolong   Coarse to fine operator
1773   @param[out] op_restrict  Fine to coarse operator
1774 
1775   @return An error code: 0 - success, otherwise - failure
1776 
1777   @ref User
1778 **/
1779 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1780                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
1781   CeedCall(CeedOperatorCheckReady(op_fine));
1782 
1783   // Build prolongation matrix
1784   CeedBasis basis_fine, basis_c_to_f;
1785   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1786   CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
1787 
1788   // Core code
1789   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1790 
1791   return CEED_ERROR_SUCCESS;
1792 }
1793 
1794 /**
1795   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
1796 
1797   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1798 
1799   @param[in]  op_fine       Fine grid operator
1800   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
1801   @param[in]  rstr_coarse   Coarse grid restriction
1802   @param[in]  basis_coarse  Coarse grid active vector basis
1803   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
1804   @param[out] op_coarse     Coarse grid operator
1805   @param[out] op_prolong    Coarse to fine operator
1806   @param[out] op_restrict   Fine to coarse operator
1807 
1808   @return An error code: 0 - success, otherwise - failure
1809 
1810   @ref User
1811 **/
1812 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1813                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
1814                                              CeedOperator *op_restrict) {
1815   CeedCall(CeedOperatorCheckReady(op_fine));
1816   Ceed ceed;
1817   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1818 
1819   // Check for compatible quadrature spaces
1820   CeedBasis basis_fine;
1821   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1822   CeedInt Q_f, Q_c;
1823   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
1824   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
1825   if (Q_f != Q_c) {
1826     // LCOV_EXCL_START
1827     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
1828     // LCOV_EXCL_STOP
1829   }
1830 
1831   // Coarse to fine basis
1832   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
1833   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
1834   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
1835   CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
1836   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
1837   P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
1838   CeedScalar *q_ref, *q_weight, *grad;
1839   CeedCall(CeedCalloc(P_1d_f, &q_ref));
1840   CeedCall(CeedCalloc(P_1d_f, &q_weight));
1841   CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
1842   CeedBasis basis_c_to_f;
1843   CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
1844   CeedCall(CeedFree(&q_ref));
1845   CeedCall(CeedFree(&q_weight));
1846   CeedCall(CeedFree(&grad));
1847 
1848   // Core code
1849   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1850   return CEED_ERROR_SUCCESS;
1851 }
1852 
1853 /**
1854   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
1855 
1856   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1857 
1858   @param[in]  op_fine       Fine grid operator
1859   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
1860   @param[in]  rstr_coarse   Coarse grid restriction
1861   @param[in]  basis_coarse  Coarse grid active vector basis
1862   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
1863   @param[out] op_coarse     Coarse grid operator
1864   @param[out] op_prolong    Coarse to fine operator
1865   @param[out] op_restrict   Fine to coarse operator
1866 
1867   @return An error code: 0 - success, otherwise - failure
1868 
1869   @ref User
1870 **/
1871 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1872                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
1873                                        CeedOperator *op_restrict) {
1874   CeedCall(CeedOperatorCheckReady(op_fine));
1875   Ceed ceed;
1876   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1877 
1878   // Check for compatible quadrature spaces
1879   CeedBasis basis_fine;
1880   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1881   CeedInt Q_f, Q_c;
1882   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
1883   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
1884   if (Q_f != Q_c) {
1885     // LCOV_EXCL_START
1886     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
1887     // LCOV_EXCL_STOP
1888   }
1889 
1890   // Coarse to fine basis
1891   CeedElemTopology topo;
1892   CeedCall(CeedBasisGetTopology(basis_fine, &topo));
1893   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
1894   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
1895   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
1896   CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
1897   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
1898   CeedScalar *q_ref, *q_weight, *grad;
1899   CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
1900   CeedCall(CeedCalloc(num_nodes_f, &q_weight));
1901   CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
1902   CeedBasis basis_c_to_f;
1903   CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
1904   CeedCall(CeedFree(&q_ref));
1905   CeedCall(CeedFree(&q_weight));
1906   CeedCall(CeedFree(&grad));
1907 
1908   // Core code
1909   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1910   return CEED_ERROR_SUCCESS;
1911 }
1912 
1913 /**
1914   @brief Build a FDM based approximate inverse for each element for a CeedOperator
1915 
1916   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
1917     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V.
1918     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T
1919 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
1920 
1921   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1922 
1923   @param[in]  op      CeedOperator to create element inverses
1924   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
1925   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1926 
1927   @return An error code: 0 - success, otherwise - failure
1928 
1929   @ref User
1930 **/
1931 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
1932   CeedCall(CeedOperatorCheckReady(op));
1933 
1934   if (op->CreateFDMElementInverse) {
1935     // Backend version
1936     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
1937     return CEED_ERROR_SUCCESS;
1938   } else {
1939     // Operator fallback
1940     CeedOperator op_fallback;
1941 
1942     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1943     if (op_fallback) {
1944       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
1945       return CEED_ERROR_SUCCESS;
1946     }
1947   }
1948 
1949   // Default interface implementation
1950   Ceed ceed, ceed_parent;
1951   CeedCall(CeedOperatorGetCeed(op, &ceed));
1952   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
1953   ceed_parent = ceed_parent ? ceed_parent : ceed;
1954   CeedQFunction qf;
1955   CeedCall(CeedOperatorGetQFunction(op, &qf));
1956 
1957   // Determine active input basis
1958   bool                interp = false, grad = false;
1959   CeedBasis           basis = NULL;
1960   CeedElemRestriction rstr  = NULL;
1961   CeedOperatorField  *op_fields;
1962   CeedQFunctionField *qf_fields;
1963   CeedInt             num_input_fields;
1964   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
1965   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
1966   for (CeedInt i = 0; i < num_input_fields; i++) {
1967     CeedVector vec;
1968     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1969     if (vec == CEED_VECTOR_ACTIVE) {
1970       CeedEvalMode eval_mode;
1971       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1972       interp = interp || eval_mode == CEED_EVAL_INTERP;
1973       grad   = grad || eval_mode == CEED_EVAL_GRAD;
1974       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
1975       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
1976     }
1977   }
1978   if (!basis) {
1979     // LCOV_EXCL_START
1980     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
1981     // LCOV_EXCL_STOP
1982   }
1983   CeedSize l_size = 1;
1984   CeedInt  P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
1985   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
1986   CeedCall(CeedBasisGetNumNodes(basis, &elem_size));
1987   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
1988   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
1989   CeedCall(CeedBasisGetDimension(basis, &dim));
1990   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
1991   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
1992   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
1993 
1994   // Build and diagonalize 1D Mass and Laplacian
1995   bool tensor_basis;
1996   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
1997   if (!tensor_basis) {
1998     // LCOV_EXCL_START
1999     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2000     // LCOV_EXCL_STOP
2001   }
2002   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2003   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2004   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2005   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2006   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2007   CeedCall(CeedCalloc(P_1d, &lambda));
2008   // -- Build matrices
2009   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2010   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2011   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2012   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2013   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2014 
2015   // -- Diagonalize
2016   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2017   CeedCall(CeedFree(&mass));
2018   CeedCall(CeedFree(&laplace));
2019   for (CeedInt i = 0; i < P_1d; i++) {
2020     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2021   }
2022   CeedCall(CeedFree(&x));
2023 
2024   // Assemble QFunction
2025   CeedVector          assembled;
2026   CeedElemRestriction rstr_qf;
2027   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2028   CeedInt layout[3];
2029   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2030   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2031   CeedScalar max_norm = 0;
2032   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2033 
2034   // Calculate element averages
2035   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2036   CeedScalar       *elem_avg;
2037   const CeedScalar *assembled_array, *q_weight_array;
2038   CeedVector        q_weight;
2039   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2040   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2041   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2042   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2043   CeedCall(CeedCalloc(num_elem, &elem_avg));
2044   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2045   for (CeedInt e = 0; e < num_elem; e++) {
2046     CeedInt count = 0;
2047     for (CeedInt q = 0; q < num_qpts; q++) {
2048       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2049         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2050           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2051           count++;
2052         }
2053       }
2054     }
2055     if (count) {
2056       elem_avg[e] /= count;
2057     } else {
2058       elem_avg[e] = 1.0;
2059     }
2060   }
2061   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2062   CeedCall(CeedVectorDestroy(&assembled));
2063   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2064   CeedCall(CeedVectorDestroy(&q_weight));
2065 
2066   // Build FDM diagonal
2067   CeedVector  q_data;
2068   CeedScalar *q_data_array, *fdm_diagonal;
2069   CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal));
2070   const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON;
2071   for (CeedInt c = 0; c < num_comp; c++) {
2072     for (CeedInt n = 0; n < elem_size; n++) {
2073       if (interp) fdm_diagonal[c * elem_size + n] = 1.0;
2074       if (grad) {
2075         for (CeedInt d = 0; d < dim; d++) {
2076           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2077           fdm_diagonal[c * elem_size + n] += lambda[i];
2078         }
2079       }
2080       if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound;
2081     }
2082   }
2083   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data));
2084   CeedCall(CeedVectorSetValue(q_data, 0.0));
2085   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2086   for (CeedInt e = 0; e < num_elem; e++) {
2087     for (CeedInt c = 0; c < num_comp; c++) {
2088       for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]);
2089     }
2090   }
2091   CeedCall(CeedFree(&elem_avg));
2092   CeedCall(CeedFree(&fdm_diagonal));
2093   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2094 
2095   // Setup FDM operator
2096   // -- Basis
2097   CeedBasis   fdm_basis;
2098   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2099   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2100   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2101   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2102   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2103   CeedCall(CeedFree(&fdm_interp));
2104   CeedCall(CeedFree(&grad_dummy));
2105   CeedCall(CeedFree(&q_ref_dummy));
2106   CeedCall(CeedFree(&q_weight_dummy));
2107   CeedCall(CeedFree(&lambda));
2108 
2109   // -- Restriction
2110   CeedElemRestriction rstr_qd_i;
2111   CeedInt             strides[3] = {1, elem_size, elem_size * num_comp};
2112   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i));
2113   // -- QFunction
2114   CeedQFunction qf_fdm;
2115   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2116   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2117   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2118   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2119   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2120   // -- QFunction context
2121   CeedInt *num_comp_data;
2122   CeedCall(CeedCalloc(1, &num_comp_data));
2123   num_comp_data[0] = num_comp;
2124   CeedQFunctionContext ctx_fdm;
2125   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2126   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2127   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2128   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2129   // -- Operator
2130   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2131   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2132   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2133   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2134 
2135   // Cleanup
2136   CeedCall(CeedVectorDestroy(&q_data));
2137   CeedCall(CeedBasisDestroy(&fdm_basis));
2138   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2139   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2140 
2141   return CEED_ERROR_SUCCESS;
2142 }
2143 
2144 /// @}
2145