xref: /libCEED/interface/ceed-preconditioning.c (revision ea61e9ac44808524e4667c1525a05976f536c19c)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed/backend.h>
11 #include <ceed/ceed.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   Ceed ceed_fallback;
84 
85   // Check not already created
86   if (op->op_fallback) return CEED_ERROR_SUCCESS;
87 
88   // Fallback Ceed
89   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
90   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
91 
92   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
93   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
94 
95   // Clone Op
96   CeedOperator op_fallback;
97   if (op->is_composite) {
98     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
99     for (CeedInt i = 0; i < op->num_suboperators; i++) {
100       CeedOperator op_sub_fallback;
101 
102       CeedCall(CeedOperatorGetFallback(op->sub_operators[i], &op_sub_fallback));
103       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
104     }
105   } else {
106     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
107     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
108     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
109     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
110     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
111     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
112       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_restr, op->input_fields[i]->basis,
113                                     op->input_fields[i]->vec));
114     }
115     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
116       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_restr, op->output_fields[i]->basis,
117                                     op->output_fields[i]->vec));
118     }
119     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
120     if (op_fallback->num_qpts == 0) {
121       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
122     }
123     // Cleanup
124     CeedCall(CeedQFunctionDestroy(&qf_fallback));
125     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
126     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
127   }
128   CeedCall(CeedOperatorSetName(op_fallback, op->name));
129   CeedCall(CeedOperatorCheckReady(op_fallback));
130   op->op_fallback = op_fallback;
131 
132   return CEED_ERROR_SUCCESS;
133 }
134 
135 /**
136   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
137 
138   @param[in]  op          CeedOperator to retrieve fallback for
139   @param[out] op_fallback Fallback CeedOperator
140 
141   @return An error code: 0 - success, otherwise - failure
142 
143   @ref Developer
144 **/
145 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
146   // Create if needed
147   if (!op->op_fallback) {
148     CeedCall(CeedOperatorCreateFallback(op));
149   }
150   if (op->op_fallback) {
151     bool is_debug;
152 
153     CeedCall(CeedIsDebug(op->ceed, &is_debug));
154     if (is_debug) {
155       Ceed        ceed_fallback;
156       const char *resource, *resource_fallback;
157 
158       CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
159       CeedCall(CeedGetResource(op->ceed, &resource));
160       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
161 
162       CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
163       CeedDebug(op->ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
164                 op->op_fallback);
165     }
166   }
167   *op_fallback = op->op_fallback;
168 
169   return CEED_ERROR_SUCCESS;
170 }
171 
172 /**
173   @brief Select correct basis matrix pointer based on CeedEvalMode
174 
175   @param[in]  eval_mode Current basis evaluation mode
176   @param[in]  identity  Pointer to identity matrix
177   @param[in]  interp    Pointer to interpolation matrix
178   @param[in]  grad      Pointer to gradient matrix
179   @param[out] basis_ptr Basis pointer to set
180 
181   @ref Developer
182 **/
183 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad,
184                                                const CeedScalar **basis_ptr) {
185   switch (eval_mode) {
186     case CEED_EVAL_NONE:
187       *basis_ptr = identity;
188       break;
189     case CEED_EVAL_INTERP:
190       *basis_ptr = interp;
191       break;
192     case CEED_EVAL_GRAD:
193       *basis_ptr = grad;
194       break;
195     case CEED_EVAL_WEIGHT:
196     case CEED_EVAL_DIV:
197     case CEED_EVAL_CURL:
198       break;  // Caught by QF Assembly
199   }
200   assert(*basis_ptr != NULL);
201 }
202 
203 /**
204   @brief Create point block restriction for active operator field
205 
206   @param[in]  rstr            Original CeedElemRestriction for active field
207   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
208 
209   @return An error code: 0 - success, otherwise - failure
210 
211   @ref Developer
212 **/
213 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
214   Ceed ceed;
215   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
216   const CeedInt *offsets;
217   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
218 
219   // Expand offsets
220   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
221   CeedSize l_size;
222   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
223   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
224   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
225   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
226   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
227   CeedInt shift = num_comp;
228   if (comp_stride != 1) shift *= num_comp;
229   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
230   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
231     pointblock_offsets[i] = offsets[i] * shift;
232   }
233 
234   // Create new restriction
235   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
236                                      pointblock_offsets, pointblock_rstr));
237 
238   // Cleanup
239   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
240 
241   return CEED_ERROR_SUCCESS;
242 }
243 
244 /**
245   @brief Core logic for assembling operator diagonal or point block diagonal
246 
247   @param[in]  op            CeedOperator to assemble point block diagonal
248   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
249   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
250   @param[out] assembled     CeedVector to store assembled diagonal
251 
252   @return An error code: 0 - success, otherwise - failure
253 
254   @ref Developer
255 **/
256 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
257   Ceed ceed;
258   CeedCall(CeedOperatorGetCeed(op, &ceed));
259 
260   // Assemble QFunction
261   CeedQFunction qf;
262   CeedCall(CeedOperatorGetQFunction(op, &qf));
263   CeedInt num_input_fields, num_output_fields;
264   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
265   CeedVector          assembled_qf;
266   CeedElemRestriction rstr;
267   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr, request));
268   CeedInt layout[3];
269   CeedCall(CeedElemRestrictionGetELayout(rstr, &layout));
270   CeedCall(CeedElemRestrictionDestroy(&rstr));
271 
272   // Get assembly data
273   CeedOperatorAssemblyData data;
274   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
275   const CeedEvalMode *eval_mode_in, *eval_mode_out;
276   CeedInt             num_eval_mode_in, num_eval_mode_out;
277   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
278   CeedBasis basis_in, basis_out;
279   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
280   CeedInt num_comp;
281   CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
282 
283   // Assemble point block diagonal restriction, if needed
284   CeedElemRestriction diag_rstr;
285   CeedCall(CeedOperatorGetActiveElemRestriction(op, &diag_rstr));
286   if (is_pointblock) {
287     CeedElemRestriction point_block_rstr;
288     CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_rstr, &point_block_rstr));
289     diag_rstr = point_block_rstr;
290   }
291 
292   // Create diagonal vector
293   CeedVector elem_diag;
294   CeedCall(CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag));
295 
296   // Assemble element operator diagonals
297   CeedScalar       *elem_diag_array;
298   const CeedScalar *assembled_qf_array;
299   CeedCall(CeedVectorSetValue(elem_diag, 0.0));
300   CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
301   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
302   CeedInt num_elem, num_nodes, num_qpts;
303   CeedCall(CeedElemRestrictionGetNumElements(diag_rstr, &num_elem));
304   CeedCall(CeedBasisGetNumNodes(basis_in, &num_nodes));
305   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
306 
307   // Basis matrices
308   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
309   CeedScalar       *identity      = NULL;
310   bool              has_eval_none = false;
311   for (CeedInt i = 0; i < num_eval_mode_in; i++) {
312     has_eval_none = has_eval_none || (eval_mode_in[i] == CEED_EVAL_NONE);
313   }
314   for (CeedInt i = 0; i < num_eval_mode_out; i++) {
315     has_eval_none = has_eval_none || (eval_mode_out[i] == CEED_EVAL_NONE);
316   }
317   if (has_eval_none) {
318     CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
319     for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
320   }
321   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
322   CeedCall(CeedBasisGetInterp(basis_out, &interp_out));
323   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
324   CeedCall(CeedBasisGetGrad(basis_out, &grad_out));
325   // Compute the diagonal of B^T D B
326   // Each element
327   for (CeedInt e = 0; e < num_elem; e++) {
328     CeedInt d_out = -1;
329     // Each basis eval mode pair
330     for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
331       const CeedScalar *bt = NULL;
332       if (eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out += 1;
333       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * num_nodes], &bt);
334       CeedInt d_in = -1;
335       for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
336         const CeedScalar *b = NULL;
337         if (eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in += 1;
338         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * num_nodes], &b);
339         // Each component
340         for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
341           // Each qpoint/node pair
342           for (CeedInt q = 0; q < num_qpts; q++) {
343             if (is_pointblock) {
344               // Point Block Diagonal
345               for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
346                 const CeedScalar qf_value =
347                     assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_in) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
348                                        e * layout[2]];
349                 for (CeedInt n = 0; n < num_nodes; n++) {
350                   elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
351                       bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
352                 }
353               }
354             } else {
355               // Diagonal Only
356               const CeedScalar qf_value =
357                   assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_out) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
358                                      e * layout[2]];
359               for (CeedInt n = 0; n < num_nodes; n++) {
360                 elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
361               }
362             }
363           }
364         }
365       }
366     }
367   }
368   CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
369   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
370 
371   // Assemble local operator diagonal
372   CeedCall(CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
373 
374   // Cleanup
375   if (is_pointblock) {
376     CeedCall(CeedElemRestrictionDestroy(&diag_rstr));
377   }
378   CeedCall(CeedVectorDestroy(&assembled_qf));
379   CeedCall(CeedVectorDestroy(&elem_diag));
380   CeedCall(CeedFree(&identity));
381 
382   return CEED_ERROR_SUCCESS;
383 }
384 
385 /**
386   @brief Core logic for assembling composite operator diagonal
387 
388   @param[in]  op            CeedOperator to assemble point block diagonal
389   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
390   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
391   @param[out] assembled     CeedVector to store assembled diagonal
392 
393   @return An error code: 0 - success, otherwise - failure
394 
395   @ref Developer
396 **/
397 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
398                                                                  CeedVector assembled) {
399   CeedInt       num_sub;
400   CeedOperator *suboperators;
401   CeedCall(CeedOperatorGetNumSub(op, &num_sub));
402   CeedCall(CeedOperatorGetSubList(op, &suboperators));
403   for (CeedInt i = 0; i < num_sub; i++) {
404     if (is_pointblock) {
405       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
406     } else {
407       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
408     }
409   }
410   return CEED_ERROR_SUCCESS;
411 }
412 
413 /**
414   @brief Build nonzero pattern for non-composite operator
415 
416   Users should generally use CeedOperatorLinearAssembleSymbolic()
417 
418   @param[in]  op     CeedOperator to assemble nonzero pattern
419   @param[in]  offset Offset for number of entries
420   @param[out] rows   Row number for each entry
421   @param[out] cols   Column number for each entry
422 
423   @return An error code: 0 - success, otherwise - failure
424 
425   @ref Developer
426 **/
427 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
428   Ceed ceed = op->ceed;
429   if (op->is_composite) {
430     // LCOV_EXCL_START
431     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
432     // LCOV_EXCL_STOP
433   }
434 
435   CeedSize num_nodes;
436   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
437   CeedElemRestriction rstr_in;
438   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
439   CeedInt num_elem, elem_size, num_comp;
440   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
441   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
442   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
443   CeedInt layout_er[3];
444   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
445 
446   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
447 
448   // Determine elem_dof relation
449   CeedVector index_vec;
450   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
451   CeedScalar *array;
452   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
453   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
454   CeedCall(CeedVectorRestoreArray(index_vec, &array));
455   CeedVector elem_dof;
456   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
457   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
458   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
459   const CeedScalar *elem_dof_a;
460   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
461   CeedCall(CeedVectorDestroy(&index_vec));
462 
463   // Determine i, j locations for element matrices
464   CeedInt count = 0;
465   for (CeedInt e = 0; e < num_elem; e++) {
466     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
467       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
468         for (CeedInt i = 0; i < elem_size; i++) {
469           for (CeedInt j = 0; j < elem_size; j++) {
470             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
471             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
472 
473             const CeedInt row = elem_dof_a[elem_dof_index_row];
474             const CeedInt col = elem_dof_a[elem_dof_index_col];
475 
476             rows[offset + count] = row;
477             cols[offset + count] = col;
478             count++;
479           }
480         }
481       }
482     }
483   }
484   if (count != local_num_entries) {
485     // LCOV_EXCL_START
486     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
487     // LCOV_EXCL_STOP
488   }
489   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
490   CeedCall(CeedVectorDestroy(&elem_dof));
491 
492   return CEED_ERROR_SUCCESS;
493 }
494 
495 /**
496   @brief Assemble nonzero entries for non-composite operator
497 
498   Users should generally use CeedOperatorLinearAssemble()
499 
500   @param[in]  op     CeedOperator to assemble
501   @param[in]  offset Offset for number of entries
502   @param[out] values Values to assemble into matrix
503 
504   @return An error code: 0 - success, otherwise - failure
505 
506   @ref Developer
507 **/
508 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
509   Ceed ceed = op->ceed;
510   if (op->is_composite) {
511     // LCOV_EXCL_START
512     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
513     // LCOV_EXCL_STOP
514   }
515   if (op->num_elem == 0) return CEED_ERROR_SUCCESS;
516 
517   if (op->LinearAssembleSingle) {
518     // Backend version
519     CeedCall(op->LinearAssembleSingle(op, offset, values));
520     return CEED_ERROR_SUCCESS;
521   } else {
522     // Operator fallback
523     CeedOperator op_fallback;
524 
525     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
526     if (op_fallback) {
527       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
528       return CEED_ERROR_SUCCESS;
529     }
530   }
531 
532   // Assemble QFunction
533   CeedQFunction qf;
534   CeedCall(CeedOperatorGetQFunction(op, &qf));
535   CeedVector          assembled_qf;
536   CeedElemRestriction rstr_q;
537   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
538   CeedSize qf_length;
539   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
540 
541   CeedInt            num_input_fields, num_output_fields;
542   CeedOperatorField *input_fields;
543   CeedOperatorField *output_fields;
544   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
545 
546   // Get assembly data
547   CeedOperatorAssemblyData data;
548   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
549   const CeedEvalMode *eval_mode_in, *eval_mode_out;
550   CeedInt             num_eval_mode_in, num_eval_mode_out;
551   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
552   CeedBasis basis_in, basis_out;
553   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
554 
555   if (num_eval_mode_in == 0 || num_eval_mode_out == 0) {
556     // LCOV_EXCL_START
557     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
558     // LCOV_EXCL_STOP
559   }
560 
561   CeedElemRestriction active_rstr;
562   CeedInt             num_elem, elem_size, num_qpts, num_comp;
563   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
564   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
565   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
566   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
567   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
568 
569   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
570 
571   // loop over elements and put in data structure
572   const CeedScalar *interp_in, *grad_in;
573   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
574   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
575 
576   const CeedScalar *assembled_qf_array;
577   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
578 
579   CeedInt layout_qf[3];
580   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
581   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
582 
583   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
584   const CeedScalar *B_mat_in, *B_mat_out;
585   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &B_mat_in, NULL, &B_mat_out));
586   CeedScalar  BTD_mat[elem_size * num_qpts * num_eval_mode_in];
587   CeedScalar  elem_mat[elem_size * elem_size];
588   CeedInt     count = 0;
589   CeedScalar *vals;
590   CeedCall(CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals));
591   for (CeedInt e = 0; e < num_elem; e++) {
592     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
593       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
594         // Compute B^T*D
595         for (CeedInt n = 0; n < elem_size; n++) {
596           for (CeedInt q = 0; q < num_qpts; q++) {
597             for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
598               const CeedInt btd_index = n * (num_qpts * num_eval_mode_in) + (num_eval_mode_in * q + e_in);
599               CeedScalar    sum       = 0.0;
600               for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
601                 const CeedInt b_out_index     = (num_eval_mode_out * q + e_out) * elem_size + n;
602                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_mode_out + e_out) * num_comp + comp_out;
603                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
604                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
605               }
606               BTD_mat[btd_index] = sum;
607             }
608           }
609         }
610         // form element matrix itself (for each block component)
611         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_mode_in));
612 
613         // put element matrix in coordinate data structure
614         for (CeedInt i = 0; i < elem_size; i++) {
615           for (CeedInt j = 0; j < elem_size; j++) {
616             vals[offset + count] = elem_mat[i * elem_size + j];
617             count++;
618           }
619         }
620       }
621     }
622   }
623   if (count != local_num_entries) {
624     // LCOV_EXCL_START
625     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
626     // LCOV_EXCL_STOP
627   }
628   CeedCall(CeedVectorRestoreArray(values, &vals));
629 
630   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
631   CeedCall(CeedVectorDestroy(&assembled_qf));
632 
633   return CEED_ERROR_SUCCESS;
634 }
635 
636 /**
637   @brief Count number of entries for assembled CeedOperator
638 
639   @param[in]  op          CeedOperator to assemble
640   @param[out] num_entries Number of entries in assembled representation
641 
642   @return An error code: 0 - success, otherwise - failure
643 
644   @ref Utility
645 **/
646 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
647   CeedElemRestriction rstr;
648   CeedInt             num_elem, elem_size, num_comp;
649 
650   if (op->is_composite) {
651     // LCOV_EXCL_START
652     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
653     // LCOV_EXCL_STOP
654   }
655   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
656   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
657   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
658   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
659   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
660 
661   return CEED_ERROR_SUCCESS;
662 }
663 
664 /**
665   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
666 
667   @param[in]  op_fine      Fine grid operator
668   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
669   @param[in]  rstr_coarse  Coarse grid restriction
670   @param[in]  basis_coarse Coarse grid active vector basis
671   @param[in]  basis_c_to_f Basis for coarse to fine interpolation
672   @param[out] op_coarse    Coarse grid operator
673   @param[out] op_prolong   Coarse to fine operator
674   @param[out] op_restrict  Fine to coarse operator
675 
676   @return An error code: 0 - success, otherwise - failure
677 
678   @ref Developer
679 **/
680 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
681                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
682   Ceed ceed;
683   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
684 
685   // Check for composite operator
686   bool is_composite;
687   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
688   if (is_composite) {
689     // LCOV_EXCL_START
690     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
691     // LCOV_EXCL_STOP
692   }
693 
694   // Coarse Grid
695   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
696   CeedElemRestriction rstr_fine = NULL;
697   // -- Clone input fields
698   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
699     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
700       rstr_fine = op_fine->input_fields[i]->elem_restr;
701       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
702     } else {
703       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_restr,
704                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
705     }
706   }
707   // -- Clone output fields
708   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
709     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
710       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
711     } else {
712       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_restr,
713                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
714     }
715   }
716   // -- Clone QFunctionAssemblyData
717   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
718 
719   // Multiplicity vector
720   CeedVector mult_vec, mult_e_vec;
721   CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
722   CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
723   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
724   CeedCall(CeedVectorSetValue(mult_vec, 0.0));
725   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
726   CeedCall(CeedVectorDestroy(&mult_e_vec));
727   CeedCall(CeedVectorReciprocal(mult_vec));
728 
729   // Restriction
730   CeedInt num_comp;
731   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
732   CeedQFunction qf_restrict;
733   CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
734   CeedInt *num_comp_r_data;
735   CeedCall(CeedCalloc(1, &num_comp_r_data));
736   num_comp_r_data[0] = num_comp;
737   CeedQFunctionContext ctx_r;
738   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
739   CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
740   CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
741   CeedCall(CeedQFunctionContextDestroy(&ctx_r));
742   CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
743   CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
744   CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
745   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
746 
747   CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
748   CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
749   CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
750   CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
751 
752   // Prolongation
753   CeedQFunction qf_prolong;
754   CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
755   CeedInt *num_comp_p_data;
756   CeedCall(CeedCalloc(1, &num_comp_p_data));
757   num_comp_p_data[0] = num_comp;
758   CeedQFunctionContext ctx_p;
759   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
760   CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
761   CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
762   CeedCall(CeedQFunctionContextDestroy(&ctx_p));
763   CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
764   CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
765   CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
766   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
767 
768   CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
769   CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
770   CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
771   CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
772 
773   // Clone name
774   bool   has_name = op_fine->name;
775   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
776   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
777   {
778     char *prolongation_name;
779     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
780     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
781     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
782     CeedCall(CeedFree(&prolongation_name));
783   }
784   {
785     char *restriction_name;
786     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
787     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
788     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
789     CeedCall(CeedFree(&restriction_name));
790   }
791 
792   // Cleanup
793   CeedCall(CeedVectorDestroy(&mult_vec));
794   CeedCall(CeedBasisDestroy(&basis_c_to_f));
795   CeedCall(CeedQFunctionDestroy(&qf_restrict));
796   CeedCall(CeedQFunctionDestroy(&qf_prolong));
797 
798   return CEED_ERROR_SUCCESS;
799 }
800 
801 /**
802   @brief Build 1D mass matrix and Laplacian with perturbation
803 
804   @param[in]  interp_1d   Interpolation matrix in one dimension
805   @param[in]  grad_1d     Gradient matrix in one dimension
806   @param[in]  q_weight_1d Quadrature weights in one dimension
807   @param[in]  P_1d        Number of basis nodes in one dimension
808   @param[in]  Q_1d        Number of quadrature points in one dimension
809   @param[in]  dim         Dimension of basis
810   @param[out] mass        Assembled mass matrix in one dimension
811   @param[out] laplace     Assembled perturbed Laplacian in one dimension
812 
813   @return An error code: 0 - success, otherwise - failure
814 
815   @ref Developer
816 **/
817 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d,
818                                                       CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
819   for (CeedInt i = 0; i < P_1d; i++) {
820     for (CeedInt j = 0; j < P_1d; j++) {
821       CeedScalar sum = 0.0;
822       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
823       mass[i + j * P_1d] = sum;
824     }
825   }
826   // -- Laplacian
827   for (CeedInt i = 0; i < P_1d; i++) {
828     for (CeedInt j = 0; j < P_1d; j++) {
829       CeedScalar sum = 0.0;
830       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
831       laplace[i + j * P_1d] = sum;
832     }
833   }
834   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
835   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
836   return CEED_ERROR_SUCCESS;
837 }
838 CeedPragmaOptimizeOn;
839 
840 /// @}
841 
842 /// ----------------------------------------------------------------------------
843 /// CeedOperator Backend API
844 /// ----------------------------------------------------------------------------
845 /// @addtogroup CeedOperatorBackend
846 /// @{
847 
848 /**
849   @brief Create object holding CeedQFunction assembly data for CeedOperator
850 
851   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
852   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
853 
854   @return An error code: 0 - success, otherwise - failure
855 
856   @ref Backend
857 **/
858 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
859   CeedCall(CeedCalloc(1, data));
860   (*data)->ref_count = 1;
861   (*data)->ceed      = ceed;
862   CeedCall(CeedReference(ceed));
863 
864   return CEED_ERROR_SUCCESS;
865 }
866 
867 /**
868   @brief Increment the reference counter for a CeedQFunctionAssemblyData
869 
870   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
871 
872   @return An error code: 0 - success, otherwise - failure
873 
874   @ref Backend
875 **/
876 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
877   data->ref_count++;
878   return CEED_ERROR_SUCCESS;
879 }
880 
881 /**
882   @brief Set re-use of CeedQFunctionAssemblyData
883 
884   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
885   @param[in]     reuse_data Boolean flag indicating data re-use
886 
887   @return An error code: 0 - success, otherwise - failure
888 
889   @ref Backend
890 **/
891 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
892   data->reuse_data        = reuse_data;
893   data->needs_data_update = true;
894   return CEED_ERROR_SUCCESS;
895 }
896 
897 /**
898   @brief Mark QFunctionAssemblyData as stale
899 
900   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
901   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
902 
903   @return An error code: 0 - success, otherwise - failure
904 
905   @ref Backend
906 **/
907 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
908   data->needs_data_update = needs_data_update;
909   return CEED_ERROR_SUCCESS;
910 }
911 
912 /**
913   @brief Determine if QFunctionAssemblyData needs update
914 
915   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
916   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
917 
918   @return An error code: 0 - success, otherwise - failure
919 
920   @ref Backend
921 **/
922 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
923   *is_update_needed = !data->reuse_data || data->needs_data_update;
924   return CEED_ERROR_SUCCESS;
925 }
926 
927 /**
928   @brief Copy the pointer to a CeedQFunctionAssemblyData.
929            Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
930            Note: If `*data_copy` is non-NULL, then it is assumed that `*data_copy` is a pointer to a CeedQFunctionAssemblyData.
931              This CeedQFunctionAssemblyData will be destroyed if `*data_copy` is the only reference to this CeedQFunctionAssemblyData.
932 
933   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
934   @param[in,out] data_copy Variable to store copied reference
935 
936   @return An error code: 0 - success, otherwise - failure
937 
938   @ref Backend
939 **/
940 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
941   CeedCall(CeedQFunctionAssemblyDataReference(data));
942   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
943   *data_copy = data;
944   return CEED_ERROR_SUCCESS;
945 }
946 
947 /**
948   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
949 
950   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
951   @param[out] is_setup Boolean flag for setup status
952 
953   @return An error code: 0 - success, otherwise - failure
954 
955   @ref Backend
956 **/
957 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
958   *is_setup = data->is_setup;
959   return CEED_ERROR_SUCCESS;
960 }
961 
962 /**
963   @brief Set internal objects for CeedQFunctionAssemblyData
964 
965   @param[in,out] data CeedQFunctionAssemblyData to set objects
966   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
967   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
968 
969   @return An error code: 0 - success, otherwise - failure
970 
971   @ref Backend
972 **/
973 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
974   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
975   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
976 
977   data->is_setup = true;
978   return CEED_ERROR_SUCCESS;
979 }
980 
981 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
982   if (!data->is_setup) {
983     // LCOV_EXCL_START
984     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
985     // LCOV_EXCL_STOP
986   }
987 
988   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
989   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
990 
991   return CEED_ERROR_SUCCESS;
992 }
993 
994 /**
995   @brief Destroy CeedQFunctionAssemblyData
996 
997   @param[in,out] data  CeedQFunctionAssemblyData to destroy
998 
999   @return An error code: 0 - success, otherwise - failure
1000 
1001   @ref Backend
1002 **/
1003 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1004   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1005 
1006   CeedCall(CeedDestroy(&(*data)->ceed));
1007   CeedCall(CeedVectorDestroy(&(*data)->vec));
1008   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1009 
1010   CeedCall(CeedFree(data));
1011   return CEED_ERROR_SUCCESS;
1012 }
1013 
1014 /**
1015   @brief Get CeedOperatorAssemblyData
1016 
1017   @param[in]  op   CeedOperator to assemble
1018   @param[out] data CeedQFunctionAssemblyData
1019 
1020   @return An error code: 0 - success, otherwise - failure
1021 
1022   @ref Backend
1023 **/
1024 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1025   if (!op->op_assembled) {
1026     CeedOperatorAssemblyData data;
1027 
1028     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1029     op->op_assembled = data;
1030   }
1031   *data = op->op_assembled;
1032 
1033   return CEED_ERROR_SUCCESS;
1034 }
1035 
1036 /**
1037   @brief Create object holding CeedOperator assembly data
1038 
1039   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1040   @param[in]  op   CeedOperator to be assembled
1041   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1042 
1043   @return An error code: 0 - success, otherwise - failure
1044 
1045   @ref Backend
1046 **/
1047 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1048   CeedCall(CeedCalloc(1, data));
1049   (*data)->ceed = ceed;
1050   CeedCall(CeedReference(ceed));
1051 
1052   // Build OperatorAssembly data
1053   CeedQFunction       qf;
1054   CeedQFunctionField *qf_fields;
1055   CeedOperatorField  *op_fields;
1056   CeedInt             num_input_fields;
1057   CeedCall(CeedOperatorGetQFunction(op, &qf));
1058   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1059   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1060 
1061   // Determine active input basis
1062   CeedInt       num_eval_mode_in = 0, dim = 1;
1063   CeedEvalMode *eval_mode_in = NULL;
1064   CeedBasis     basis_in     = NULL;
1065   for (CeedInt i = 0; i < num_input_fields; i++) {
1066     CeedVector vec;
1067     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1068     if (vec == CEED_VECTOR_ACTIVE) {
1069       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1070       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1071       CeedEvalMode eval_mode;
1072       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1073       switch (eval_mode) {
1074         case CEED_EVAL_NONE:
1075         case CEED_EVAL_INTERP:
1076           CeedCall(CeedRealloc(num_eval_mode_in + 1, &eval_mode_in));
1077           eval_mode_in[num_eval_mode_in] = eval_mode;
1078           num_eval_mode_in += 1;
1079           break;
1080         case CEED_EVAL_GRAD:
1081           CeedCall(CeedRealloc(num_eval_mode_in + dim, &eval_mode_in));
1082           for (CeedInt d = 0; d < dim; d++) {
1083             eval_mode_in[num_eval_mode_in + d] = eval_mode;
1084           }
1085           num_eval_mode_in += dim;
1086           break;
1087         case CEED_EVAL_WEIGHT:
1088         case CEED_EVAL_DIV:
1089         case CEED_EVAL_CURL:
1090           break;  // Caught by QF Assembly
1091       }
1092     }
1093   }
1094   (*data)->num_eval_mode_in = num_eval_mode_in;
1095   (*data)->eval_mode_in     = eval_mode_in;
1096   CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->basis_in));
1097 
1098   // Determine active output basis
1099   CeedInt num_output_fields;
1100   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1101   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1102   CeedInt       num_eval_mode_out = 0;
1103   CeedEvalMode *eval_mode_out     = NULL;
1104   CeedBasis     basis_out         = NULL;
1105   for (CeedInt i = 0; i < num_output_fields; i++) {
1106     CeedVector vec;
1107     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1108     if (vec == CEED_VECTOR_ACTIVE) {
1109       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1110       CeedEvalMode eval_mode;
1111       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1112       switch (eval_mode) {
1113         case CEED_EVAL_NONE:
1114         case CEED_EVAL_INTERP:
1115           CeedCall(CeedRealloc(num_eval_mode_out + 1, &eval_mode_out));
1116           eval_mode_out[num_eval_mode_out] = eval_mode;
1117           num_eval_mode_out += 1;
1118           break;
1119         case CEED_EVAL_GRAD:
1120           CeedCall(CeedRealloc(num_eval_mode_out + dim, &eval_mode_out));
1121           for (CeedInt d = 0; d < dim; d++) {
1122             eval_mode_out[num_eval_mode_out + d] = eval_mode;
1123           }
1124           num_eval_mode_out += dim;
1125           break;
1126         case CEED_EVAL_WEIGHT:
1127         case CEED_EVAL_DIV:
1128         case CEED_EVAL_CURL:
1129           break;  // Caught by QF Assembly
1130       }
1131     }
1132   }
1133   (*data)->num_eval_mode_out = num_eval_mode_out;
1134   (*data)->eval_mode_out     = eval_mode_out;
1135   CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->basis_out));
1136 
1137   return CEED_ERROR_SUCCESS;
1138 }
1139 
1140 /**
1141   @brief Get CeedOperator CeedEvalModes for assembly
1142 
1143   @param[in]  data              CeedOperatorAssemblyData
1144   @param[out] num_eval_mode_in  Pointer to hold number of input CeedEvalModes, or NULL
1145   @param[out] eval_mode_in      Pointer to hold input CeedEvalModes, or NULL
1146   @param[out] num_eval_mode_out Pointer to hold number of output CeedEvalModes, or NULL
1147   @param[out] eval_mode_out     Pointer to hold output CeedEvalModes, or NULL
1148 
1149   @return An error code: 0 - success, otherwise - failure
1150 
1151   @ref Backend
1152 **/
1153 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_eval_mode_in, const CeedEvalMode **eval_mode_in,
1154                                          CeedInt *num_eval_mode_out, const CeedEvalMode **eval_mode_out) {
1155   if (num_eval_mode_in) *num_eval_mode_in = data->num_eval_mode_in;
1156   if (eval_mode_in) *eval_mode_in = data->eval_mode_in;
1157   if (num_eval_mode_out) *num_eval_mode_out = data->num_eval_mode_out;
1158   if (eval_mode_out) *eval_mode_out = data->eval_mode_out;
1159 
1160   return CEED_ERROR_SUCCESS;
1161 }
1162 
1163 /**
1164   @brief Get CeedOperator CeedBasis data for assembly
1165 
1166   @param[in]  data      CeedOperatorAssemblyData
1167   @param[out] basis_in  Pointer to hold active input CeedBasis, or NULL
1168   @param[out] B_in      Pointer to hold assembled active input B, or NULL
1169   @param[out] basis_out Pointer to hold active output CeedBasis, or NULL
1170   @param[out] B_out     Pointer to hold assembled active output B, or NULL
1171 
1172   @return An error code: 0 - success, otherwise - failure
1173 
1174   @ref Backend
1175 **/
1176 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedBasis *basis_in, const CeedScalar **B_in, CeedBasis *basis_out,
1177                                      const CeedScalar **B_out) {
1178   // Assemble B_in, B_out if needed
1179   if (B_in && !data->B_in) {
1180     CeedInt           num_qpts, elem_size;
1181     CeedScalar       *B_in, *identity = NULL;
1182     const CeedScalar *interp_in, *grad_in;
1183     bool              has_eval_none = false;
1184 
1185     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_in, &num_qpts));
1186     CeedCall(CeedBasisGetNumNodes(data->basis_in, &elem_size));
1187     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_in, &B_in));
1188 
1189     for (CeedInt i = 0; i < data->num_eval_mode_in; i++) {
1190       has_eval_none = has_eval_none || (data->eval_mode_in[i] == CEED_EVAL_NONE);
1191     }
1192     if (has_eval_none) {
1193       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1194       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1195         identity[i * elem_size + i] = 1.0;
1196       }
1197     }
1198     CeedCall(CeedBasisGetInterp(data->basis_in, &interp_in));
1199     CeedCall(CeedBasisGetGrad(data->basis_in, &grad_in));
1200 
1201     for (CeedInt q = 0; q < num_qpts; q++) {
1202       for (CeedInt n = 0; n < elem_size; n++) {
1203         CeedInt d_in = -1;
1204         for (CeedInt e_in = 0; e_in < data->num_eval_mode_in; e_in++) {
1205           const CeedInt     qq = data->num_eval_mode_in * q;
1206           const CeedScalar *b  = NULL;
1207 
1208           if (data->eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in++;
1209           CeedOperatorGetBasisPointer(data->eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &b);
1210           B_in[(qq + e_in) * elem_size + n] = b[q * elem_size + n];
1211         }
1212       }
1213     }
1214     data->B_in = B_in;
1215   }
1216 
1217   if (B_out && !data->B_out) {
1218     CeedInt           num_qpts, elem_size;
1219     CeedScalar       *B_out, *identity = NULL;
1220     const CeedScalar *interp_out, *grad_out;
1221     bool              has_eval_none = false;
1222 
1223     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_out, &num_qpts));
1224     CeedCall(CeedBasisGetNumNodes(data->basis_out, &elem_size));
1225     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_out, &B_out));
1226 
1227     for (CeedInt i = 0; i < data->num_eval_mode_out; i++) {
1228       has_eval_none = has_eval_none || (data->eval_mode_out[i] == CEED_EVAL_NONE);
1229     }
1230     if (has_eval_none) {
1231       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1232       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1233         identity[i * elem_size + i] = 1.0;
1234       }
1235     }
1236     CeedCall(CeedBasisGetInterp(data->basis_out, &interp_out));
1237     CeedCall(CeedBasisGetGrad(data->basis_out, &grad_out));
1238 
1239     for (CeedInt q = 0; q < num_qpts; q++) {
1240       for (CeedInt n = 0; n < elem_size; n++) {
1241         CeedInt d_out = -1;
1242         for (CeedInt e_out = 0; e_out < data->num_eval_mode_out; e_out++) {
1243           const CeedInt     qq = data->num_eval_mode_out * q;
1244           const CeedScalar *b  = NULL;
1245 
1246           if (data->eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out++;
1247           CeedOperatorGetBasisPointer(data->eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &b);
1248           B_out[(qq + e_out) * elem_size + n] = b[q * elem_size + n];
1249         }
1250       }
1251     }
1252     data->B_out = B_out;
1253   }
1254 
1255   if (basis_in) *basis_in = data->basis_in;
1256   if (B_in) *B_in = data->B_in;
1257   if (basis_out) *basis_out = data->basis_out;
1258   if (B_out) *B_out = data->B_out;
1259 
1260   return CEED_ERROR_SUCCESS;
1261 }
1262 
1263 /**
1264   @brief Destroy CeedOperatorAssemblyData
1265 
1266   @param[in,out] data CeedOperatorAssemblyData to destroy
1267 
1268   @return An error code: 0 - success, otherwise - failure
1269 
1270   @ref Backend
1271 **/
1272 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1273   if (!*data) return CEED_ERROR_SUCCESS;
1274 
1275   CeedCall(CeedDestroy(&(*data)->ceed));
1276   CeedCall(CeedBasisDestroy(&(*data)->basis_in));
1277   CeedCall(CeedBasisDestroy(&(*data)->basis_out));
1278   CeedCall(CeedFree(&(*data)->eval_mode_in));
1279   CeedCall(CeedFree(&(*data)->eval_mode_out));
1280   CeedCall(CeedFree(&(*data)->B_in));
1281   CeedCall(CeedFree(&(*data)->B_out));
1282 
1283   CeedCall(CeedFree(data));
1284   return CEED_ERROR_SUCCESS;
1285 }
1286 
1287 /// @}
1288 
1289 /// ----------------------------------------------------------------------------
1290 /// CeedOperator Public API
1291 /// ----------------------------------------------------------------------------
1292 /// @addtogroup CeedOperatorUser
1293 /// @{
1294 
1295 /**
1296   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1297 
1298   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1299     The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices
1300 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the
1301 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1302 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1303 and producing the output [dv_0, dv_1, v].
1304 
1305   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1306 
1307   @param[in]  op        CeedOperator to assemble CeedQFunction
1308   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1309   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1310   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1311 
1312   @return An error code: 0 - success, otherwise - failure
1313 
1314   @ref User
1315 **/
1316 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1317   CeedCall(CeedOperatorCheckReady(op));
1318 
1319   if (op->LinearAssembleQFunction) {
1320     // Backend version
1321     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1322   } else {
1323     // Operator fallback
1324     CeedOperator op_fallback;
1325 
1326     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1327     if (op_fallback) {
1328       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1329     } else {
1330       // LCOV_EXCL_START
1331       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1332       // LCOV_EXCL_STOP
1333     }
1334   }
1335   return CEED_ERROR_SUCCESS;
1336 }
1337 
1338 /**
1339   @brief Assemble CeedQFunction and store result internally.
1340            Return copied references of stored data to the caller.
1341            Caller is responsible for ownership and destruction of the copied references.
1342            See also @ref CeedOperatorLinearAssembleQFunction
1343 
1344   @param[in]  op        CeedOperator to assemble CeedQFunction
1345   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1346   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1347   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1348 
1349   @return An error code: 0 - success, otherwise - failure
1350 
1351   @ref User
1352 **/
1353 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1354   CeedCall(CeedOperatorCheckReady(op));
1355 
1356   if (op->LinearAssembleQFunctionUpdate) {
1357     // Backend version
1358     bool                qf_assembled_is_setup;
1359     CeedVector          assembled_vec  = NULL;
1360     CeedElemRestriction assembled_rstr = NULL;
1361 
1362     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1363     if (qf_assembled_is_setup) {
1364       bool update_needed;
1365 
1366       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1367       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1368       if (update_needed) {
1369         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1370       }
1371     } else {
1372       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1373       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1374     }
1375     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1376 
1377     // Copy reference from internally held copy
1378     *assembled = NULL;
1379     *rstr      = NULL;
1380     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1381     CeedCall(CeedVectorDestroy(&assembled_vec));
1382     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1383     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1384   } else {
1385     // Operator fallback
1386     CeedOperator op_fallback;
1387 
1388     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1389     if (op_fallback) {
1390       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1391     } else {
1392       // LCOV_EXCL_START
1393       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1394       // LCOV_EXCL_STOP
1395     }
1396   }
1397 
1398   return CEED_ERROR_SUCCESS;
1399 }
1400 
1401 /**
1402   @brief Assemble the diagonal of a square linear CeedOperator
1403 
1404   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1405 
1406   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1407 
1408   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1409 
1410   @param[in]  op        CeedOperator to assemble CeedQFunction
1411   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1412   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1413 
1414   @return An error code: 0 - success, otherwise - failure
1415 
1416   @ref User
1417 **/
1418 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1419   CeedCall(CeedOperatorCheckReady(op));
1420 
1421   CeedSize input_size = 0, output_size = 0;
1422   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1423   if (input_size != output_size) {
1424     // LCOV_EXCL_START
1425     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1426     // LCOV_EXCL_STOP
1427   }
1428 
1429   if (op->LinearAssembleDiagonal) {
1430     // Backend version
1431     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1432     return CEED_ERROR_SUCCESS;
1433   } else if (op->LinearAssembleAddDiagonal) {
1434     // Backend version with zeroing first
1435     CeedCall(CeedVectorSetValue(assembled, 0.0));
1436     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1437     return CEED_ERROR_SUCCESS;
1438   } else {
1439     // Operator fallback
1440     CeedOperator op_fallback;
1441 
1442     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1443     if (op_fallback) {
1444       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1445       return CEED_ERROR_SUCCESS;
1446     }
1447   }
1448   // Default interface implementation
1449   CeedCall(CeedVectorSetValue(assembled, 0.0));
1450   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1451 
1452   return CEED_ERROR_SUCCESS;
1453 }
1454 
1455 /**
1456   @brief Assemble the diagonal of a square linear CeedOperator
1457 
1458   This sums into a CeedVector the diagonal of a linear CeedOperator.
1459 
1460   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1461 
1462   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1463 
1464   @param[in]  op        CeedOperator to assemble CeedQFunction
1465   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1466   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1467 
1468   @return An error code: 0 - success, otherwise - failure
1469 
1470   @ref User
1471 **/
1472 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1473   CeedCall(CeedOperatorCheckReady(op));
1474 
1475   CeedSize input_size = 0, output_size = 0;
1476   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1477   if (input_size != output_size) {
1478     // LCOV_EXCL_START
1479     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1480     // LCOV_EXCL_STOP
1481   }
1482 
1483   if (op->LinearAssembleAddDiagonal) {
1484     // Backend version
1485     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1486     return CEED_ERROR_SUCCESS;
1487   } else {
1488     // Operator fallback
1489     CeedOperator op_fallback;
1490 
1491     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1492     if (op_fallback) {
1493       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1494       return CEED_ERROR_SUCCESS;
1495     }
1496   }
1497   // Default interface implementation
1498   bool is_composite;
1499   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1500   if (is_composite) {
1501     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1502   } else {
1503     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1504   }
1505 
1506   return CEED_ERROR_SUCCESS;
1507 }
1508 
1509 /**
1510   @brief Assemble the point block diagonal of a square linear CeedOperator
1511 
1512   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1513 
1514   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1515 
1516   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1517 
1518   @param[in]  op        CeedOperator to assemble CeedQFunction
1519   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1520 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1521 component in].
1522   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1523 
1524   @return An error code: 0 - success, otherwise - failure
1525 
1526   @ref User
1527 **/
1528 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1529   CeedCall(CeedOperatorCheckReady(op));
1530 
1531   CeedSize input_size = 0, output_size = 0;
1532   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1533   if (input_size != output_size) {
1534     // LCOV_EXCL_START
1535     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1536     // LCOV_EXCL_STOP
1537   }
1538 
1539   if (op->LinearAssemblePointBlockDiagonal) {
1540     // Backend version
1541     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1542     return CEED_ERROR_SUCCESS;
1543   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1544     // Backend version with zeroing first
1545     CeedCall(CeedVectorSetValue(assembled, 0.0));
1546     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1547     return CEED_ERROR_SUCCESS;
1548   } else {
1549     // Operator fallback
1550     CeedOperator op_fallback;
1551 
1552     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1553     if (op_fallback) {
1554       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1555       return CEED_ERROR_SUCCESS;
1556     }
1557   }
1558   // Default interface implementation
1559   CeedCall(CeedVectorSetValue(assembled, 0.0));
1560   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1561 
1562   return CEED_ERROR_SUCCESS;
1563 }
1564 
1565 /**
1566   @brief Assemble the point block diagonal of a square linear CeedOperator
1567 
1568   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1569 
1570   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1571 
1572   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1573 
1574   @param[in]  op        CeedOperator to assemble CeedQFunction
1575   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1576 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1577 component in].
1578   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1579 
1580   @return An error code: 0 - success, otherwise - failure
1581 
1582   @ref User
1583 **/
1584 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1585   CeedCall(CeedOperatorCheckReady(op));
1586 
1587   CeedSize input_size = 0, output_size = 0;
1588   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1589   if (input_size != output_size) {
1590     // LCOV_EXCL_START
1591     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1592     // LCOV_EXCL_STOP
1593   }
1594 
1595   if (op->LinearAssembleAddPointBlockDiagonal) {
1596     // Backend version
1597     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1598     return CEED_ERROR_SUCCESS;
1599   } else {
1600     // Operator fallback
1601     CeedOperator op_fallback;
1602 
1603     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1604     if (op_fallback) {
1605       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1606       return CEED_ERROR_SUCCESS;
1607     }
1608   }
1609   // Default interface implementation
1610   bool is_composite;
1611   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1612   if (is_composite) {
1613     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1614   } else {
1615     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1616   }
1617 
1618   return CEED_ERROR_SUCCESS;
1619 }
1620 
1621 /**
1622    @brief Fully assemble the nonzero pattern of a linear operator.
1623 
1624    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1625 
1626    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1627 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1628 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1629 
1630    This will generally be slow unless your operator is low-order.
1631 
1632    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1633 
1634    @param[in]  op          CeedOperator to assemble
1635    @param[out] num_entries Number of entries in coordinate nonzero pattern
1636    @param[out] rows        Row number for each entry
1637    @param[out] cols        Column number for each entry
1638 
1639    @ref User
1640 **/
1641 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1642   CeedInt       num_suboperators, single_entries;
1643   CeedOperator *sub_operators;
1644   bool          is_composite;
1645   CeedCall(CeedOperatorCheckReady(op));
1646 
1647   if (op->LinearAssembleSymbolic) {
1648     // Backend version
1649     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1650     return CEED_ERROR_SUCCESS;
1651   } else {
1652     // Operator fallback
1653     CeedOperator op_fallback;
1654 
1655     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1656     if (op_fallback) {
1657       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1658       return CEED_ERROR_SUCCESS;
1659     }
1660   }
1661 
1662   // Default interface implementation
1663 
1664   // count entries and allocate rows, cols arrays
1665   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1666   *num_entries = 0;
1667   if (is_composite) {
1668     CeedCall(CeedOperatorGetNumSub(op, &num_suboperators));
1669     CeedCall(CeedOperatorGetSubList(op, &sub_operators));
1670     for (CeedInt k = 0; k < num_suboperators; ++k) {
1671       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1672       *num_entries += single_entries;
1673     }
1674   } else {
1675     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1676     *num_entries += single_entries;
1677   }
1678   CeedCall(CeedCalloc(*num_entries, rows));
1679   CeedCall(CeedCalloc(*num_entries, cols));
1680 
1681   // assemble nonzero locations
1682   CeedInt offset = 0;
1683   if (is_composite) {
1684     CeedCall(CeedOperatorGetNumSub(op, &num_suboperators));
1685     CeedCall(CeedOperatorGetSubList(op, &sub_operators));
1686     for (CeedInt k = 0; k < num_suboperators; ++k) {
1687       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1688       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1689       offset += single_entries;
1690     }
1691   } else {
1692     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1693   }
1694 
1695   return CEED_ERROR_SUCCESS;
1696 }
1697 
1698 /**
1699    @brief Fully assemble the nonzero entries of a linear operator.
1700 
1701    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1702 
1703    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1704 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1705 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1706 
1707    This will generally be slow unless your operator is low-order.
1708 
1709    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1710 
1711    @param[in]  op     CeedOperator to assemble
1712    @param[out] values Values to assemble into matrix
1713 
1714    @ref User
1715 **/
1716 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1717   CeedInt       num_suboperators, single_entries = 0;
1718   CeedOperator *sub_operators;
1719   CeedCall(CeedOperatorCheckReady(op));
1720 
1721   if (op->LinearAssemble) {
1722     // Backend version
1723     CeedCall(op->LinearAssemble(op, values));
1724     return CEED_ERROR_SUCCESS;
1725   } else {
1726     // Operator fallback
1727     CeedOperator op_fallback;
1728 
1729     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1730     if (op_fallback) {
1731       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
1732       return CEED_ERROR_SUCCESS;
1733     }
1734   }
1735 
1736   // Default interface implementation
1737   bool is_composite;
1738   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1739 
1740   CeedInt offset = 0;
1741   if (is_composite) {
1742     CeedCall(CeedOperatorGetNumSub(op, &num_suboperators));
1743     CeedCall(CeedOperatorGetSubList(op, &sub_operators));
1744     for (CeedInt k = 0; k < num_suboperators; k++) {
1745       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
1746       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1747       offset += single_entries;
1748     }
1749   } else {
1750     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
1751   }
1752 
1753   return CEED_ERROR_SUCCESS;
1754 }
1755 
1756 /**
1757   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
1758 grid interpolation
1759 
1760   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1761 
1762   @param[in]  op_fine      Fine grid operator
1763   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
1764   @param[in]  rstr_coarse  Coarse grid restriction
1765   @param[in]  basis_coarse Coarse grid active vector basis
1766   @param[out] op_coarse    Coarse grid operator
1767   @param[out] op_prolong   Coarse to fine operator
1768   @param[out] op_restrict  Fine to coarse operator
1769 
1770   @return An error code: 0 - success, otherwise - failure
1771 
1772   @ref User
1773 **/
1774 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1775                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
1776   CeedCall(CeedOperatorCheckReady(op_fine));
1777 
1778   // Build prolongation matrix
1779   CeedBasis basis_fine, basis_c_to_f;
1780   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1781   CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
1782 
1783   // Core code
1784   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1785 
1786   return CEED_ERROR_SUCCESS;
1787 }
1788 
1789 /**
1790   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
1791 
1792   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1793 
1794   @param[in]  op_fine       Fine grid operator
1795   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
1796   @param[in]  rstr_coarse   Coarse grid restriction
1797   @param[in]  basis_coarse  Coarse grid active vector basis
1798   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
1799   @param[out] op_coarse     Coarse grid operator
1800   @param[out] op_prolong    Coarse to fine operator
1801   @param[out] op_restrict   Fine to coarse operator
1802 
1803   @return An error code: 0 - success, otherwise - failure
1804 
1805   @ref User
1806 **/
1807 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1808                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
1809                                              CeedOperator *op_restrict) {
1810   CeedCall(CeedOperatorCheckReady(op_fine));
1811   Ceed ceed;
1812   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1813 
1814   // Check for compatible quadrature spaces
1815   CeedBasis basis_fine;
1816   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1817   CeedInt Q_f, Q_c;
1818   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
1819   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
1820   if (Q_f != Q_c) {
1821     // LCOV_EXCL_START
1822     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
1823     // LCOV_EXCL_STOP
1824   }
1825 
1826   // Coarse to fine basis
1827   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
1828   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
1829   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
1830   CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
1831   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
1832   P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
1833   CeedScalar *q_ref, *q_weight, *grad;
1834   CeedCall(CeedCalloc(P_1d_f, &q_ref));
1835   CeedCall(CeedCalloc(P_1d_f, &q_weight));
1836   CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
1837   CeedBasis basis_c_to_f;
1838   CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
1839   CeedCall(CeedFree(&q_ref));
1840   CeedCall(CeedFree(&q_weight));
1841   CeedCall(CeedFree(&grad));
1842 
1843   // Core code
1844   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1845   return CEED_ERROR_SUCCESS;
1846 }
1847 
1848 /**
1849   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
1850 
1851   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1852 
1853   @param[in]  op_fine       Fine grid operator
1854   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
1855   @param[in]  rstr_coarse   Coarse grid restriction
1856   @param[in]  basis_coarse  Coarse grid active vector basis
1857   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
1858   @param[out] op_coarse     Coarse grid operator
1859   @param[out] op_prolong    Coarse to fine operator
1860   @param[out] op_restrict   Fine to coarse operator
1861 
1862   @return An error code: 0 - success, otherwise - failure
1863 
1864   @ref User
1865 **/
1866 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1867                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
1868                                        CeedOperator *op_restrict) {
1869   CeedCall(CeedOperatorCheckReady(op_fine));
1870   Ceed ceed;
1871   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1872 
1873   // Check for compatible quadrature spaces
1874   CeedBasis basis_fine;
1875   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1876   CeedInt Q_f, Q_c;
1877   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
1878   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
1879   if (Q_f != Q_c) {
1880     // LCOV_EXCL_START
1881     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
1882     // LCOV_EXCL_STOP
1883   }
1884 
1885   // Coarse to fine basis
1886   CeedElemTopology topo;
1887   CeedCall(CeedBasisGetTopology(basis_fine, &topo));
1888   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
1889   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
1890   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
1891   CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
1892   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
1893   CeedScalar *q_ref, *q_weight, *grad;
1894   CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
1895   CeedCall(CeedCalloc(num_nodes_f, &q_weight));
1896   CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
1897   CeedBasis basis_c_to_f;
1898   CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
1899   CeedCall(CeedFree(&q_ref));
1900   CeedCall(CeedFree(&q_weight));
1901   CeedCall(CeedFree(&grad));
1902 
1903   // Core code
1904   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1905   return CEED_ERROR_SUCCESS;
1906 }
1907 
1908 /**
1909   @brief Build a FDM based approximate inverse for each element for a CeedOperator
1910 
1911   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
1912     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V.
1913     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T
1914 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
1915 
1916   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1917 
1918   @param[in]  op      CeedOperator to create element inverses
1919   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
1920   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1921 
1922   @return An error code: 0 - success, otherwise - failure
1923 
1924   @ref User
1925 **/
1926 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
1927   CeedCall(CeedOperatorCheckReady(op));
1928 
1929   if (op->CreateFDMElementInverse) {
1930     // Backend version
1931     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
1932     return CEED_ERROR_SUCCESS;
1933   } else {
1934     // Operator fallback
1935     CeedOperator op_fallback;
1936 
1937     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1938     if (op_fallback) {
1939       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
1940       return CEED_ERROR_SUCCESS;
1941     }
1942   }
1943 
1944   // Default interface implementation
1945   Ceed ceed, ceed_parent;
1946   CeedCall(CeedOperatorGetCeed(op, &ceed));
1947   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
1948   ceed_parent = ceed_parent ? ceed_parent : ceed;
1949   CeedQFunction qf;
1950   CeedCall(CeedOperatorGetQFunction(op, &qf));
1951 
1952   // Determine active input basis
1953   bool                interp = false, grad = false;
1954   CeedBasis           basis = NULL;
1955   CeedElemRestriction rstr  = NULL;
1956   CeedOperatorField  *op_fields;
1957   CeedQFunctionField *qf_fields;
1958   CeedInt             num_input_fields;
1959   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
1960   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
1961   for (CeedInt i = 0; i < num_input_fields; i++) {
1962     CeedVector vec;
1963     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1964     if (vec == CEED_VECTOR_ACTIVE) {
1965       CeedEvalMode eval_mode;
1966       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1967       interp = interp || eval_mode == CEED_EVAL_INTERP;
1968       grad   = grad || eval_mode == CEED_EVAL_GRAD;
1969       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
1970       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
1971     }
1972   }
1973   if (!basis) {
1974     // LCOV_EXCL_START
1975     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
1976     // LCOV_EXCL_STOP
1977   }
1978   CeedSize l_size = 1;
1979   CeedInt  P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
1980   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
1981   CeedCall(CeedBasisGetNumNodes(basis, &elem_size));
1982   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
1983   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
1984   CeedCall(CeedBasisGetDimension(basis, &dim));
1985   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
1986   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
1987   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
1988 
1989   // Build and diagonalize 1D Mass and Laplacian
1990   bool tensor_basis;
1991   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
1992   if (!tensor_basis) {
1993     // LCOV_EXCL_START
1994     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
1995     // LCOV_EXCL_STOP
1996   }
1997   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
1998   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
1999   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2000   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2001   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2002   CeedCall(CeedCalloc(P_1d, &lambda));
2003   // -- Build matrices
2004   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2005   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2006   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2007   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2008   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2009 
2010   // -- Diagonalize
2011   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2012   CeedCall(CeedFree(&mass));
2013   CeedCall(CeedFree(&laplace));
2014   for (CeedInt i = 0; i < P_1d; i++) {
2015     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2016   }
2017   CeedCall(CeedFree(&x));
2018 
2019   // Assemble QFunction
2020   CeedVector          assembled;
2021   CeedElemRestriction rstr_qf;
2022   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2023   CeedInt layout[3];
2024   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2025   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2026   CeedScalar max_norm = 0;
2027   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2028 
2029   // Calculate element averages
2030   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2031   CeedScalar       *elem_avg;
2032   const CeedScalar *assembled_array, *q_weight_array;
2033   CeedVector        q_weight;
2034   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2035   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2036   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2037   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2038   CeedCall(CeedCalloc(num_elem, &elem_avg));
2039   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2040   for (CeedInt e = 0; e < num_elem; e++) {
2041     CeedInt count = 0;
2042     for (CeedInt q = 0; q < num_qpts; q++) {
2043       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2044         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2045           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2046           count++;
2047         }
2048       }
2049     }
2050     if (count) {
2051       elem_avg[e] /= count;
2052     } else {
2053       elem_avg[e] = 1.0;
2054     }
2055   }
2056   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2057   CeedCall(CeedVectorDestroy(&assembled));
2058   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2059   CeedCall(CeedVectorDestroy(&q_weight));
2060 
2061   // Build FDM diagonal
2062   CeedVector  q_data;
2063   CeedScalar *q_data_array, *fdm_diagonal;
2064   CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal));
2065   const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON;
2066   for (CeedInt c = 0; c < num_comp; c++) {
2067     for (CeedInt n = 0; n < elem_size; n++) {
2068       if (interp) fdm_diagonal[c * elem_size + n] = 1.0;
2069       if (grad) {
2070         for (CeedInt d = 0; d < dim; d++) {
2071           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2072           fdm_diagonal[c * elem_size + n] += lambda[i];
2073         }
2074       }
2075       if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound;
2076     }
2077   }
2078   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data));
2079   CeedCall(CeedVectorSetValue(q_data, 0.0));
2080   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2081   for (CeedInt e = 0; e < num_elem; e++) {
2082     for (CeedInt c = 0; c < num_comp; c++) {
2083       for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]);
2084     }
2085   }
2086   CeedCall(CeedFree(&elem_avg));
2087   CeedCall(CeedFree(&fdm_diagonal));
2088   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2089 
2090   // Setup FDM operator
2091   // -- Basis
2092   CeedBasis   fdm_basis;
2093   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2094   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2095   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2096   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2097   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2098   CeedCall(CeedFree(&fdm_interp));
2099   CeedCall(CeedFree(&grad_dummy));
2100   CeedCall(CeedFree(&q_ref_dummy));
2101   CeedCall(CeedFree(&q_weight_dummy));
2102   CeedCall(CeedFree(&lambda));
2103 
2104   // -- Restriction
2105   CeedElemRestriction rstr_qd_i;
2106   CeedInt             strides[3] = {1, elem_size, elem_size * num_comp};
2107   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i));
2108   // -- QFunction
2109   CeedQFunction qf_fdm;
2110   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2111   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2112   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2113   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2114   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2115   // -- QFunction context
2116   CeedInt *num_comp_data;
2117   CeedCall(CeedCalloc(1, &num_comp_data));
2118   num_comp_data[0] = num_comp;
2119   CeedQFunctionContext ctx_fdm;
2120   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2121   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2122   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2123   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2124   // -- Operator
2125   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2126   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2127   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2128   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2129 
2130   // Cleanup
2131   CeedCall(CeedVectorDestroy(&q_data));
2132   CeedCall(CeedBasisDestroy(&fdm_basis));
2133   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2134   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2135 
2136   return CEED_ERROR_SUCCESS;
2137 }
2138 
2139 /// @}
2140