xref: /libCEED/interface/ceed-preconditioning.c (revision fd8f24fa4ef1026f43db0b90b0c68f0c3af48272)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed-impl.h>
9 #include <ceed.h>
10 #include <ceed/backend.h>
11 #include <assert.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = NULL;
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_rstr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_rstr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
128     // Cleanup
129     CeedCall(CeedQFunctionDestroy(&qf_fallback));
130     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
131     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
132   }
133   CeedCall(CeedOperatorSetName(op_fallback, op->name));
134   CeedCall(CeedOperatorCheckReady(op_fallback));
135   // Note: No ref-counting here so we don't get caught in a reference loop.
136   //       The op holds the only reference to op_fallback and is responsible for deleting itself and op_fallback.
137   op->op_fallback                 = op_fallback;
138   op_fallback->op_fallback_parent = op;
139 
140   return CEED_ERROR_SUCCESS;
141 }
142 
143 /**
144   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
145 
146   @param[in]  op          CeedOperator to retrieve fallback for
147   @param[out] op_fallback Fallback CeedOperator
148 
149   @return An error code: 0 - success, otherwise - failure
150 
151   @ref Developer
152 **/
153 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
154   // Create if needed
155   if (!op->op_fallback) {
156     CeedCall(CeedOperatorCreateFallback(op));
157   }
158   if (op->op_fallback) {
159     bool is_debug;
160 
161     CeedCall(CeedIsDebug(op->ceed, &is_debug));
162     if (is_debug) {
163       Ceed        ceed, ceed_fallback;
164       const char *resource, *resource_fallback;
165 
166       CeedCall(CeedOperatorGetCeed(op, &ceed));
167       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
168       CeedCall(CeedGetResource(ceed, &resource));
169       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
170 
171       CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- CeedOperator Fallback ----------\n");
172       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
173                 op->op_fallback);
174     }
175   }
176   *op_fallback = op->op_fallback;
177 
178   return CEED_ERROR_SUCCESS;
179 }
180 
181 /**
182   @brief Select correct basis matrix pointer based on CeedEvalMode
183 
184   @param[in]  basis     CeedBasis from which to get the basis matrix
185   @param[in]  eval_mode Current basis evaluation mode
186   @param[in]  identity  Pointer to identity matrix
187   @param[out] basis_ptr Basis pointer to set
188 
189   @ref Developer
190 **/
191 static inline int CeedOperatorGetBasisPointer(CeedBasis basis, CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar **basis_ptr) {
192   switch (eval_mode) {
193     case CEED_EVAL_NONE:
194       *basis_ptr = identity;
195       break;
196     case CEED_EVAL_INTERP:
197       CeedCall(CeedBasisGetInterp(basis, basis_ptr));
198       break;
199     case CEED_EVAL_GRAD:
200       CeedCall(CeedBasisGetGrad(basis, basis_ptr));
201       break;
202     case CEED_EVAL_DIV:
203       CeedCall(CeedBasisGetDiv(basis, basis_ptr));
204       break;
205     case CEED_EVAL_CURL:
206       CeedCall(CeedBasisGetCurl(basis, basis_ptr));
207       break;
208     case CEED_EVAL_WEIGHT:
209       break;  // Caught by QF Assembly
210   }
211   assert(*basis_ptr != NULL);
212 
213   return CEED_ERROR_SUCCESS;
214 }
215 
216 /**
217   @brief Create point block restriction for active operator field
218 
219   @param[in]  rstr            Original CeedElemRestriction for active field
220   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
221 
222   @return An error code: 0 - success, otherwise - failure
223 
224   @ref Developer
225 **/
226 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
227   Ceed ceed;
228   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
229   const CeedInt *offsets;
230   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
231 
232   // Expand offsets
233   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
234   CeedSize l_size;
235   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
236   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
237   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
238   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
239   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
240   CeedInt shift = num_comp;
241   if (comp_stride != 1) shift *= num_comp;
242   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
243   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
244     pointblock_offsets[i] = offsets[i] * shift;
245   }
246 
247   // Create new restriction
248   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
249                                      pointblock_offsets, pointblock_rstr));
250 
251   // Cleanup
252   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
253 
254   return CEED_ERROR_SUCCESS;
255 }
256 
257 /**
258   @brief Core logic for assembling operator diagonal or point block diagonal
259 
260   @param[in]  op            CeedOperator to assemble point block diagonal
261   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
262   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
263   @param[out] assembled     CeedVector to store assembled diagonal
264 
265   @return An error code: 0 - success, otherwise - failure
266 
267   @ref Developer
268 **/
269 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
270   Ceed ceed;
271   CeedCall(CeedOperatorGetCeed(op, &ceed));
272 
273   // Assemble QFunction
274   CeedQFunction       qf;
275   const CeedScalar   *assembled_qf_array;
276   CeedVector          assembled_qf        = NULL;
277   CeedElemRestriction assembled_elem_rstr = NULL;
278   CeedInt             num_input_fields, num_output_fields;
279   CeedInt             layout[3];
280 
281   CeedCall(CeedOperatorGetQFunction(op, &qf));
282   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
283   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &assembled_elem_rstr, request));
284   CeedCall(CeedElemRestrictionGetELayout(assembled_elem_rstr, &layout));
285   CeedCall(CeedElemRestrictionDestroy(&assembled_elem_rstr));
286   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
287 
288   // Get assembly data
289   CeedOperatorAssemblyData data;
290   const CeedEvalMode     **eval_modes_in, **eval_modes_out;
291   CeedInt                 *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
292   CeedSize               **eval_mode_offsets_in, **eval_mode_offsets_out, num_output_components;
293   CeedBasis               *active_bases;
294   CeedElemRestriction     *active_elem_rstrs;
295   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
296   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, &eval_mode_offsets_in,
297                                                 &num_eval_modes_out, &eval_modes_out, &eval_mode_offsets_out, &num_output_components));
298   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &active_bases, NULL, NULL));
299   CeedCall(CeedOperatorAssemblyDataGetElemRestrictions(data, NULL, &active_elem_rstrs));
300 
301   // Loop over all active bases
302   for (CeedInt b = 0; b < num_active_bases; b++) {
303     // Assemble point block diagonal restriction, if needed
304     CeedElemRestriction diag_elem_rstr;
305     if (is_pointblock) {
306       CeedCall(CeedOperatorCreateActivePointBlockRestriction(active_elem_rstrs[b], &diag_elem_rstr));
307     } else {
308       CeedCall(CeedElemRestrictionCreateUnsignedCopy(active_elem_rstrs[b], &diag_elem_rstr));
309     }
310 
311     // Create diagonal vector
312     CeedVector elem_diag;
313     CeedCall(CeedElemRestrictionCreateVector(diag_elem_rstr, NULL, &elem_diag));
314 
315     // Assemble element operator diagonals
316     CeedScalar *elem_diag_array;
317     CeedInt     num_elem, num_nodes, num_qpts, num_components;
318 
319     CeedCall(CeedVectorSetValue(elem_diag, 0.0));
320     CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
321     CeedCall(CeedElemRestrictionGetNumElements(diag_elem_rstr, &num_elem));
322     CeedCall(CeedBasisGetNumNodes(active_bases[b], &num_nodes));
323     CeedCall(CeedBasisGetNumComponents(active_bases[b], &num_components));
324     CeedCall(CeedBasisGetNumQuadraturePoints(active_bases[b], &num_qpts));
325 
326     // Construct identity matrix for basis if required
327     bool        has_eval_none = false;
328     CeedScalar *identity      = NULL;
329     for (CeedInt i = 0; i < num_eval_modes_in[b]; i++) {
330       has_eval_none = has_eval_none || (eval_modes_in[b][i] == CEED_EVAL_NONE);
331     }
332     for (CeedInt i = 0; i < num_eval_modes_out[b]; i++) {
333       has_eval_none = has_eval_none || (eval_modes_out[b][i] == CEED_EVAL_NONE);
334     }
335     if (has_eval_none) {
336       CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
337       for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
338     }
339 
340     // Compute the diagonal of B^T D B
341     // Each element
342     for (CeedSize e = 0; e < num_elem; e++) {
343       // Each basis eval mode pair
344       CeedInt      d_out              = 0, q_comp_out;
345       CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
346       for (CeedInt e_out = 0; e_out < num_eval_modes_out[b]; e_out++) {
347         const CeedScalar *B_t = NULL;
348         CeedOperatorGetBasisPointer(active_bases[b], eval_modes_out[b][e_out], identity, &B_t);
349         CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_out[b][e_out], &q_comp_out));
350         if (q_comp_out > 1) {
351           if (e_out == 0 || eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
352           else B_t = &B_t[(++d_out) * num_qpts * num_nodes];
353         }
354         eval_mode_out_prev = eval_modes_out[b][e_out];
355 
356         CeedInt      d_in              = 0, q_comp_in;
357         CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
358         for (CeedInt e_in = 0; e_in < num_eval_modes_in[b]; e_in++) {
359           const CeedScalar *B = NULL;
360           CeedOperatorGetBasisPointer(active_bases[b], eval_modes_in[b][e_in], identity, &B);
361           CeedCall(CeedBasisGetNumQuadratureComponents(active_bases[b], eval_modes_in[b][e_in], &q_comp_in));
362           if (q_comp_in > 1) {
363             if (e_in == 0 || eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
364             else B = &B[(++d_in) * num_qpts * num_nodes];
365           }
366           eval_mode_in_prev = eval_modes_in[b][e_in];
367 
368           // Each component
369           for (CeedInt c_out = 0; c_out < num_components; c_out++) {
370             // Each qpt/node pair
371             for (CeedInt q = 0; q < num_qpts; q++) {
372               if (is_pointblock) {
373                 // Point Block Diagonal
374                 for (CeedInt c_in = 0; c_in < num_components; c_in++) {
375                   const CeedSize c_offset = (eval_mode_offsets_in[b][e_in] + c_in) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
376                   const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
377                   for (CeedInt n = 0; n < num_nodes; n++) {
378                     elem_diag_array[((e * num_components + c_out) * num_components + c_in) * num_nodes + n] +=
379                         B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
380                   }
381                 }
382               } else {
383                 // Diagonal Only
384                 const CeedInt    c_offset = (eval_mode_offsets_in[b][e_in] + c_out) * num_output_components + eval_mode_offsets_out[b][e_out] + c_out;
385                 const CeedScalar qf_value = assembled_qf_array[q * layout[0] + c_offset * layout[1] + e * layout[2]];
386                 for (CeedInt n = 0; n < num_nodes; n++) {
387                   elem_diag_array[(e * num_components + c_out) * num_nodes + n] += B_t[q * num_nodes + n] * qf_value * B[q * num_nodes + n];
388                 }
389               }
390             }
391           }
392         }
393       }
394     }
395     CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
396 
397     // Assemble local operator diagonal
398     CeedCall(CeedElemRestrictionApply(diag_elem_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
399 
400     // Cleanup
401     CeedCall(CeedElemRestrictionDestroy(&diag_elem_rstr));
402     CeedCall(CeedVectorDestroy(&elem_diag));
403     CeedCall(CeedFree(&identity));
404   }
405   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
406   CeedCall(CeedVectorDestroy(&assembled_qf));
407 
408   return CEED_ERROR_SUCCESS;
409 }
410 
411 /**
412   @brief Core logic for assembling composite operator diagonal
413 
414   @param[in]  op            CeedOperator to assemble point block diagonal
415   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
416   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
417   @param[out] assembled     CeedVector to store assembled diagonal
418 
419   @return An error code: 0 - success, otherwise - failure
420 
421   @ref Developer
422 **/
423 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
424                                                                  CeedVector assembled) {
425   CeedInt       num_sub;
426   CeedOperator *suboperators;
427   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
428   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
429   for (CeedInt i = 0; i < num_sub; i++) {
430     if (is_pointblock) {
431       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
432     } else {
433       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
434     }
435   }
436   return CEED_ERROR_SUCCESS;
437 }
438 
439 /**
440   @brief Build nonzero pattern for non-composite operator
441 
442   Users should generally use CeedOperatorLinearAssembleSymbolic()
443 
444   @param[in]  op     CeedOperator to assemble nonzero pattern
445   @param[in]  offset Offset for number of entries
446   @param[out] rows   Row number for each entry
447   @param[out] cols   Column number for each entry
448 
449   @return An error code: 0 - success, otherwise - failure
450 
451   @ref Developer
452 **/
453 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
454   Ceed ceed;
455   bool is_composite;
456   CeedCall(CeedOperatorGetCeed(op, &ceed));
457   CeedCall(CeedOperatorIsComposite(op, &is_composite));
458   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
459 
460   CeedSize num_nodes;
461   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
462   CeedElemRestriction active_rstr;
463   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
464   CeedElemRestriction index_elem_rstr;
465   CeedCall(CeedElemRestrictionCreateUnorientedCopy(active_rstr, &index_elem_rstr));
466   CeedInt num_elem, elem_size, num_comp;
467   CeedCall(CeedElemRestrictionGetNumElements(index_elem_rstr, &num_elem));
468   CeedCall(CeedElemRestrictionGetElementSize(index_elem_rstr, &elem_size));
469   CeedCall(CeedElemRestrictionGetNumComponents(index_elem_rstr, &num_comp));
470   CeedInt layout_er[3];
471   CeedCall(CeedElemRestrictionGetELayout(index_elem_rstr, &layout_er));
472 
473   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
474 
475   // Determine elem_dof relation
476   CeedVector index_vec;
477   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
478   CeedScalar *array;
479   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
480   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
481   CeedCall(CeedVectorRestoreArray(index_vec, &array));
482   CeedVector elem_dof;
483   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
484   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
485   CeedCall(CeedElemRestrictionApply(index_elem_rstr, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
486   const CeedScalar *elem_dof_a;
487   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
488   CeedCall(CeedVectorDestroy(&index_vec));
489 
490   // Determine i, j locations for element matrices
491   CeedSize count = 0;
492   for (CeedInt e = 0; e < num_elem; e++) {
493     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
494       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
495         for (CeedInt i = 0; i < elem_size; i++) {
496           for (CeedInt j = 0; j < elem_size; j++) {
497             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
498             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
499 
500             const CeedInt row = elem_dof_a[elem_dof_index_row];
501             const CeedInt col = elem_dof_a[elem_dof_index_col];
502 
503             rows[offset + count] = row;
504             cols[offset + count] = col;
505             count++;
506           }
507         }
508       }
509     }
510   }
511   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
512   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
513   CeedCall(CeedVectorDestroy(&elem_dof));
514   CeedCall(CeedElemRestrictionDestroy(&index_elem_rstr));
515 
516   return CEED_ERROR_SUCCESS;
517 }
518 
519 /**
520   @brief Assemble nonzero entries for non-composite operator
521 
522   Users should generally use CeedOperatorLinearAssemble()
523 
524   @param[in]  op     CeedOperator to assemble
525   @param[in]  offset Offset for number of entries
526   @param[out] values Values to assemble into matrix
527 
528   @return An error code: 0 - success, otherwise - failure
529 
530   @ref Developer
531 **/
532 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
533   Ceed ceed;
534   bool is_composite;
535   CeedCall(CeedOperatorGetCeed(op, &ceed));
536   CeedCall(CeedOperatorIsComposite(op, &is_composite));
537 
538   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
539 
540   // Early exit for empty operator
541   {
542     CeedInt num_elem = 0;
543 
544     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
545     if (num_elem == 0) return CEED_ERROR_SUCCESS;
546   }
547 
548   if (op->LinearAssembleSingle) {
549     // Backend version
550     CeedCall(op->LinearAssembleSingle(op, offset, values));
551     return CEED_ERROR_SUCCESS;
552   } else {
553     // Operator fallback
554     CeedOperator op_fallback;
555 
556     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
557     if (op_fallback) {
558       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
559       return CEED_ERROR_SUCCESS;
560     }
561   }
562 
563   // Assemble QFunction
564   CeedQFunction qf;
565   CeedCall(CeedOperatorGetQFunction(op, &qf));
566   CeedVector          assembled_qf = NULL;
567   CeedElemRestriction rstr_q       = NULL;
568   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
569 
570   CeedInt            num_input_fields, num_output_fields;
571   CeedOperatorField *input_fields;
572   CeedOperatorField *output_fields;
573   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
574 
575   // Get assembly data
576   CeedOperatorAssemblyData data;
577   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
578   const CeedEvalMode **eval_modes_in, **eval_modes_out;
579   CeedInt             *num_eval_modes_in, *num_eval_modes_out, num_active_bases;
580   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_active_bases, &num_eval_modes_in, &eval_modes_in, NULL, &num_eval_modes_out,
581                                                 &eval_modes_out, NULL, NULL));
582   CeedBasis *bases;
583   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &bases, NULL, NULL));
584   CeedBasis basis_in = bases[0];
585 
586   CeedCheck(num_active_bases == 1, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with multiple active bases");
587   CeedCheck(num_eval_modes_in[0] > 0 && num_eval_modes_out[0] > 0, ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
588 
589   CeedElemRestriction active_rstr;
590   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
591   CeedInt num_elem, elem_size, num_qpts, num_comp;
592   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
593   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
594   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
595   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
596 
597   CeedRestrictionType rstr_type;
598   CeedCall(CeedElemRestrictionGetType(active_rstr, &rstr_type));
599   const bool     *orients      = NULL;
600   const CeedInt8 *curl_orients = NULL;
601   if (rstr_type == CEED_RESTRICTION_ORIENTED) {
602     CeedCall(CeedElemRestrictionGetOrientations(active_rstr, CEED_MEM_HOST, &orients));
603   } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) {
604     CeedCall(CeedElemRestrictionGetCurlOrientations(active_rstr, CEED_MEM_HOST, &curl_orients));
605   }
606 
607   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
608 
609   // Loop over elements and put in data structure
610   const CeedScalar *assembled_qf_array;
611   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
612 
613   CeedInt layout_qf[3];
614   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
615   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
616 
617   // We store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
618   const CeedScalar **B_mats_in, **B_mats_out;
619   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, NULL, &B_mats_in, &B_mats_out));
620   const CeedScalar *B_mat_in = B_mats_in[0], *B_mat_out = B_mats_out[0];
621   CeedScalar        BTD_mat[elem_size * num_qpts * num_eval_modes_in[0]];
622   CeedScalar        elem_mat[elem_size * elem_size];
623   CeedSize          count = 0;
624   CeedScalar       *vals;
625   CeedCall(CeedVectorGetArray(values, CEED_MEM_HOST, &vals));
626   for (CeedSize e = 0; e < num_elem; e++) {
627     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
628       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
629         // Compute B^T*D
630         for (CeedSize n = 0; n < elem_size; n++) {
631           for (CeedSize q = 0; q < num_qpts; q++) {
632             for (CeedInt e_in = 0; e_in < num_eval_modes_in[0]; e_in++) {
633               const CeedSize btd_index = n * (num_qpts * num_eval_modes_in[0]) + (num_eval_modes_in[0] * q + e_in);
634               CeedScalar     sum       = 0.0;
635               for (CeedInt e_out = 0; e_out < num_eval_modes_out[0]; e_out++) {
636                 const CeedSize b_out_index     = (num_eval_modes_out[0] * q + e_out) * elem_size + n;
637                 const CeedSize eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_modes_out[0] + e_out) * num_comp + comp_out;
638                 const CeedSize qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
639                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
640               }
641               BTD_mat[btd_index] = sum;
642             }
643           }
644         }
645 
646         // Form element matrix itself (for each block component)
647         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_modes_in[0]));
648 
649         // Transform the element matrix if required
650         if (orients) {
651           const bool *elem_orients = &orients[e * elem_size];
652           for (CeedInt i = 0; i < elem_size; i++) {
653             for (CeedInt j = 0; j < elem_size; j++) {
654               elem_mat[i * elem_size + j] *= elem_orients[i] ? -1.0 : 1.0;
655               elem_mat[i * elem_size + j] *= elem_orients[j] ? -1.0 : 1.0;
656             }
657           }
658         } else if (curl_orients) {
659           const CeedInt8 *elem_curl_orients = &curl_orients[e * 3 * elem_size];
660           CeedScalar      o_elem_mat[elem_size * elem_size];
661           // T^T*(B^T*D*B)
662           for (CeedInt i = 0; i < elem_size; i++) {
663             for (CeedInt j = 0; j < elem_size; j++) {
664               o_elem_mat[i * elem_size + j] = elem_mat[i * elem_size + j] * elem_curl_orients[3 * i + 1] +
665                                               (i > 0 ? elem_mat[(i - 1) * elem_size + j] * elem_curl_orients[3 * i - 1] : 0.0) +
666                                               (i < elem_size - 1 ? elem_mat[(i + 1) * elem_size + j] * elem_curl_orients[3 * i + 3] : 0.0);
667             }
668           }
669           // T^T*(B^T*D*B)*T
670           for (CeedInt i = 0; i < elem_size; i++) {
671             for (CeedInt j = 0; j < elem_size; j++) {
672               elem_mat[i * elem_size + j] = o_elem_mat[i * elem_size + j] * elem_curl_orients[3 * j + 1] +
673                                             (j > 0 ? o_elem_mat[i * elem_size + j - 1] * elem_curl_orients[3 * j - 1] : 0.0) +
674                                             (j < elem_size - 1 ? o_elem_mat[i * elem_size + j + 1] * elem_curl_orients[3 * j + 3] : 0.0);
675             }
676           }
677         }
678 
679         // Put element matrix in coordinate data structure
680         for (CeedInt i = 0; i < elem_size; i++) {
681           for (CeedInt j = 0; j < elem_size; j++) {
682             vals[offset + count] = elem_mat[i * elem_size + j];
683             count++;
684           }
685         }
686       }
687     }
688   }
689   CeedCheck(count == local_num_entries, ceed, CEED_ERROR_MAJOR, "Error computing entries");
690   CeedCall(CeedVectorRestoreArray(values, &vals));
691 
692   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
693   CeedCall(CeedVectorDestroy(&assembled_qf));
694 
695   if (rstr_type == CEED_RESTRICTION_ORIENTED) {
696     CeedCall(CeedElemRestrictionRestoreOrientations(active_rstr, &orients));
697   } else if (rstr_type == CEED_RESTRICTION_CURL_ORIENTED) {
698     CeedCall(CeedElemRestrictionRestoreCurlOrientations(active_rstr, &curl_orients));
699   }
700 
701   return CEED_ERROR_SUCCESS;
702 }
703 
704 /**
705   @brief Count number of entries for assembled CeedOperator
706 
707   @param[in]  op          CeedOperator to assemble
708   @param[out] num_entries Number of entries in assembled representation
709 
710   @return An error code: 0 - success, otherwise - failure
711 
712   @ref Utility
713 **/
714 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedSize *num_entries) {
715   bool                is_composite;
716   CeedElemRestriction rstr;
717   CeedInt             num_elem, elem_size, num_comp;
718 
719   CeedCall(CeedOperatorIsComposite(op, &is_composite));
720   CeedCheck(!is_composite, op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
721   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
722   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
723   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
724   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
725   *num_entries = (CeedSize)elem_size * num_comp * elem_size * num_comp * num_elem;
726 
727   return CEED_ERROR_SUCCESS;
728 }
729 
730 /**
731   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
732 
733   @param[in]  op_fine      Fine grid operator
734   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
735   @param[in]  rstr_coarse  Coarse grid restriction
736   @param[in]  basis_coarse Coarse grid active vector basis
737   @param[in]  basis_c_to_f Basis for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
738   @param[out] op_coarse    Coarse grid operator
739   @param[out] op_prolong   Coarse to fine operator, or NULL
740   @param[out] op_restrict  Fine to coarse operator, or NULL
741 
742   @return An error code: 0 - success, otherwise - failure
743 
744   @ref Developer
745 **/
746 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
747                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
748   Ceed                ceed;
749   CeedVector          mult_vec         = NULL;
750   CeedElemRestriction rstr_p_mult_fine = NULL;
751   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
752 
753   // Check for composite operator
754   bool is_composite;
755   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
756   CeedCheck(!is_composite, ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
757 
758   // Coarse Grid
759   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
760   CeedElemRestriction rstr_fine = NULL;
761   // -- Clone input fields
762   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
763     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
764       rstr_fine = op_fine->input_fields[i]->elem_rstr;
765       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
766     } else {
767       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_rstr,
768                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
769     }
770   }
771   // -- Clone output fields
772   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
773     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
774       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
775     } else {
776       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_rstr,
777                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
778     }
779   }
780   // -- Clone QFunctionAssemblyData
781   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
782 
783   // Multiplicity vector
784   if (op_restrict || op_prolong) {
785     CeedRestrictionType rstr_type;
786     CeedVector          mult_e_vec;
787 
788     CeedCall(CeedElemRestrictionGetType(rstr_fine, &rstr_type));
789     CeedCheck(rstr_type != CEED_RESTRICTION_CURL_ORIENTED, ceed, CEED_ERROR_UNSUPPORTED,
790               "Element restrictions created with CeedElemRestrictionCreateCurlOriented are not supported");
791     CeedCheck(p_mult_fine, ceed, CEED_ERROR_INCOMPATIBLE, "Prolongation or restriction operator creation requires fine grid multiplicity vector");
792     CeedCall(CeedElemRestrictionCreateUnsignedCopy(rstr_fine, &rstr_p_mult_fine));
793     CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
794     CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
795     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
796     CeedCall(CeedVectorSetValue(mult_vec, 0.0));
797     CeedCall(CeedElemRestrictionApply(rstr_p_mult_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
798     CeedCall(CeedVectorDestroy(&mult_e_vec));
799     CeedCall(CeedVectorReciprocal(mult_vec));
800   }
801 
802   // Clone name
803   bool   has_name = op_fine->name;
804   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
805   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
806 
807   // Check that coarse to fine basis is provided if prolong/restrict operators are requested
808   CeedCheck(basis_c_to_f || (!op_restrict && !op_prolong), ceed, CEED_ERROR_INCOMPATIBLE,
809             "Prolongation or restriction operator creation requires coarse-to-fine basis");
810 
811   // Restriction/Prolongation Operators
812   CeedInt num_comp;
813   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
814 
815   // Restriction
816   if (op_restrict) {
817     CeedInt             *num_comp_r_data;
818     CeedQFunction        qf_restrict;
819     CeedQFunctionContext ctx_r;
820 
821     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
822     CeedCall(CeedCalloc(1, &num_comp_r_data));
823     num_comp_r_data[0] = num_comp;
824     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
825     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
826     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
827     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
828     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
829     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
830     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
831     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
832 
833     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
834     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
835     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec));
836     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
837 
838     // Set name
839     char *restriction_name;
840     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
841     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
842     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
843     CeedCall(CeedFree(&restriction_name));
844 
845     // Check
846     CeedCall(CeedOperatorCheckReady(*op_restrict));
847 
848     // Cleanup
849     CeedCall(CeedQFunctionDestroy(&qf_restrict));
850   }
851 
852   // Prolongation
853   if (op_prolong) {
854     CeedInt             *num_comp_p_data;
855     CeedQFunction        qf_prolong;
856     CeedQFunctionContext ctx_p;
857 
858     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
859     CeedCall(CeedCalloc(1, &num_comp_p_data));
860     num_comp_p_data[0] = num_comp;
861     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
862     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
863     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
864     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
865     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
866     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
867     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
868     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
869 
870     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
871     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
872     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_p_mult_fine, CEED_BASIS_COLLOCATED, mult_vec));
873     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
874 
875     // Set name
876     char *prolongation_name;
877     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
878     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
879     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
880     CeedCall(CeedFree(&prolongation_name));
881 
882     // Check
883     CeedCall(CeedOperatorCheckReady(*op_prolong));
884 
885     // Cleanup
886     CeedCall(CeedQFunctionDestroy(&qf_prolong));
887   }
888 
889   // Check
890   CeedCall(CeedOperatorCheckReady(*op_coarse));
891 
892   // Cleanup
893   CeedCall(CeedVectorDestroy(&mult_vec));
894   CeedCall(CeedElemRestrictionDestroy(&rstr_p_mult_fine));
895   CeedCall(CeedBasisDestroy(&basis_c_to_f));
896 
897   return CEED_ERROR_SUCCESS;
898 }
899 
900 /**
901   @brief Build 1D mass matrix and Laplacian with perturbation
902 
903   @param[in]  interp_1d   Interpolation matrix in one dimension
904   @param[in]  grad_1d     Gradient matrix in one dimension
905   @param[in]  q_weight_1d Quadrature weights in one dimension
906   @param[in]  P_1d        Number of basis nodes in one dimension
907   @param[in]  Q_1d        Number of quadrature points in one dimension
908   @param[in]  dim         Dimension of basis
909   @param[out] mass        Assembled mass matrix in one dimension
910   @param[out] laplace     Assembled perturbed Laplacian in one dimension
911 
912   @return An error code: 0 - success, otherwise - failure
913 
914   @ref Developer
915 **/
916 CeedPragmaOptimizeOff
917 static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d, CeedInt P_1d, CeedInt Q_1d,
918                                 CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
919   for (CeedInt i = 0; i < P_1d; i++) {
920     for (CeedInt j = 0; j < P_1d; j++) {
921       CeedScalar sum = 0.0;
922       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
923       mass[i + j * P_1d] = sum;
924     }
925   }
926   // -- Laplacian
927   for (CeedInt i = 0; i < P_1d; i++) {
928     for (CeedInt j = 0; j < P_1d; j++) {
929       CeedScalar sum = 0.0;
930       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
931       laplace[i + j * P_1d] = sum;
932     }
933   }
934   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
935   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
936   return CEED_ERROR_SUCCESS;
937 }
938 CeedPragmaOptimizeOn
939 
940 /// @}
941 
942 /// ----------------------------------------------------------------------------
943 /// CeedOperator Backend API
944 /// ----------------------------------------------------------------------------
945 /// @addtogroup CeedOperatorBackend
946 /// @{
947 
948 /**
949   @brief Create object holding CeedQFunction assembly data for CeedOperator
950 
951   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
952   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
953 
954   @return An error code: 0 - success, otherwise - failure
955 
956   @ref Backend
957 **/
958 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
959   CeedCall(CeedCalloc(1, data));
960   (*data)->ref_count = 1;
961   (*data)->ceed      = ceed;
962   CeedCall(CeedReference(ceed));
963 
964   return CEED_ERROR_SUCCESS;
965 }
966 
967 /**
968   @brief Increment the reference counter for a CeedQFunctionAssemblyData
969 
970   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
971 
972   @return An error code: 0 - success, otherwise - failure
973 
974   @ref Backend
975 **/
976 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
977   data->ref_count++;
978   return CEED_ERROR_SUCCESS;
979 }
980 
981 /**
982   @brief Set re-use of CeedQFunctionAssemblyData
983 
984   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
985   @param[in]     reuse_data Boolean flag indicating data re-use
986 
987   @return An error code: 0 - success, otherwise - failure
988 
989   @ref Backend
990 **/
991 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
992   data->reuse_data        = reuse_data;
993   data->needs_data_update = true;
994   return CEED_ERROR_SUCCESS;
995 }
996 
997 /**
998   @brief Mark QFunctionAssemblyData as stale
999 
1000   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
1001   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
1002 
1003   @return An error code: 0 - success, otherwise - failure
1004 
1005   @ref Backend
1006 **/
1007 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
1008   data->needs_data_update = needs_data_update;
1009   return CEED_ERROR_SUCCESS;
1010 }
1011 
1012 /**
1013   @brief Determine if QFunctionAssemblyData needs update
1014 
1015   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
1016   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1017 
1018   @return An error code: 0 - success, otherwise - failure
1019 
1020   @ref Backend
1021 **/
1022 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
1023   *is_update_needed = !data->reuse_data || data->needs_data_update;
1024   return CEED_ERROR_SUCCESS;
1025 }
1026 
1027 /**
1028   @brief Copy the pointer to a CeedQFunctionAssemblyData.
1029 
1030   Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
1031 
1032   Note: If the value of `data_copy` passed to this function is non-NULL, then it is assumed that `*data_copy` is a pointer to a
1033         CeedQFunctionAssemblyData. This CeedQFunctionAssemblyData will be destroyed if `data_copy` is the only reference to this
1034         CeedQFunctionAssemblyData.
1035 
1036   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
1037   @param[in,out] data_copy Variable to store copied reference
1038 
1039   @return An error code: 0 - success, otherwise - failure
1040 
1041   @ref Backend
1042 **/
1043 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
1044   CeedCall(CeedQFunctionAssemblyDataReference(data));
1045   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
1046   *data_copy = data;
1047   return CEED_ERROR_SUCCESS;
1048 }
1049 
1050 /**
1051   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1052 
1053   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
1054   @param[out] is_setup Boolean flag for setup status
1055 
1056   @return An error code: 0 - success, otherwise - failure
1057 
1058   @ref Backend
1059 **/
1060 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1061   *is_setup = data->is_setup;
1062   return CEED_ERROR_SUCCESS;
1063 }
1064 
1065 /**
1066   @brief Set internal objects for CeedQFunctionAssemblyData
1067 
1068   @param[in,out] data CeedQFunctionAssemblyData to set objects
1069   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1070   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1071 
1072   @return An error code: 0 - success, otherwise - failure
1073 
1074   @ref Backend
1075 **/
1076 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1077   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1078   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1079 
1080   data->is_setup = true;
1081   return CEED_ERROR_SUCCESS;
1082 }
1083 
1084 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1085   CeedCheck(data->is_setup, data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1086 
1087   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1088   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1089 
1090   return CEED_ERROR_SUCCESS;
1091 }
1092 
1093 /**
1094   @brief Destroy CeedQFunctionAssemblyData
1095 
1096   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1097 
1098   @return An error code: 0 - success, otherwise - failure
1099 
1100   @ref Backend
1101 **/
1102 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1103   if (!*data || --(*data)->ref_count > 0) {
1104     *data = NULL;
1105     return CEED_ERROR_SUCCESS;
1106   }
1107   CeedCall(CeedDestroy(&(*data)->ceed));
1108   CeedCall(CeedVectorDestroy(&(*data)->vec));
1109   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1110 
1111   CeedCall(CeedFree(data));
1112   return CEED_ERROR_SUCCESS;
1113 }
1114 
1115 /**
1116   @brief Get CeedOperatorAssemblyData
1117 
1118   @param[in]  op   CeedOperator to assemble
1119   @param[out] data CeedQFunctionAssemblyData
1120 
1121   @return An error code: 0 - success, otherwise - failure
1122 
1123   @ref Backend
1124 **/
1125 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1126   if (!op->op_assembled) {
1127     CeedOperatorAssemblyData data;
1128 
1129     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1130     op->op_assembled = data;
1131   }
1132   *data = op->op_assembled;
1133 
1134   return CEED_ERROR_SUCCESS;
1135 }
1136 
1137 /**
1138   @brief Create object holding CeedOperator assembly data.
1139 
1140   The CeedOperatorAssemblyData holds an array with references to every active CeedBasis used in the CeedOperator.
1141   An array with references to the corresponding active CeedElemRestrictions is also stored.
1142   For each active CeedBasis, the CeedOperatorAssemblyData holds an array of all input and output CeedEvalModes for this CeedBasis.
1143   The CeedOperatorAssemblyData holds an array of offsets for indexing into the assembled CeedQFunction arrays to the row representing each
1144 CeedEvalMode.
1145   The number of input columns across all active bases for the assembled CeedQFunction is also stored.
1146   Lastly, the CeedOperatorAssembly data holds assembled matrices representing the full action of the CeedBasis for all CeedEvalModes.
1147 
1148   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1149   @param[in]  op   CeedOperator to be assembled
1150   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1151 
1152   @return An error code: 0 - success, otherwise - failure
1153 
1154   @ref Backend
1155 **/
1156 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1157   CeedInt num_active_bases = 0;
1158 
1159   // Allocate
1160   CeedCall(CeedCalloc(1, data));
1161   (*data)->ceed = ceed;
1162   CeedCall(CeedReference(ceed));
1163 
1164   // Build OperatorAssembly data
1165   CeedQFunction       qf;
1166   CeedQFunctionField *qf_fields;
1167   CeedOperatorField  *op_fields;
1168   CeedInt             num_input_fields;
1169   CeedCall(CeedOperatorGetQFunction(op, &qf));
1170   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1171   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1172 
1173   // Determine active input basis
1174   CeedInt       *num_eval_modes_in = NULL, *num_eval_modes_out = NULL, offset = 0;
1175   CeedEvalMode **eval_modes_in = NULL, **eval_modes_out = NULL;
1176   CeedSize     **eval_mode_offsets_in = NULL, **eval_mode_offsets_out = NULL;
1177   for (CeedInt i = 0; i < num_input_fields; i++) {
1178     CeedVector vec;
1179     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1180     if (vec == CEED_VECTOR_ACTIVE) {
1181       CeedBasis    basis_in = NULL;
1182       CeedEvalMode eval_mode;
1183       CeedInt      index = -1, num_comp, q_comp;
1184       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1185       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1186       CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
1187       CeedCall(CeedBasisGetNumQuadratureComponents(basis_in, eval_mode, &q_comp));
1188       for (CeedInt i = 0; i < num_active_bases; i++) {
1189         if ((*data)->active_bases[i] == basis_in) index = i;
1190       }
1191       if (index == -1) {
1192         CeedElemRestriction elem_rstr_in;
1193         index = num_active_bases;
1194         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1195         (*data)->active_bases[num_active_bases] = NULL;
1196         CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->active_bases[num_active_bases]));
1197         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1198         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1199         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_in));
1200         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_in, &(*data)->active_elem_rstrs[num_active_bases]));
1201         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1202         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1203         num_eval_modes_in[index]  = 0;
1204         num_eval_modes_out[index] = 0;
1205         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1206         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1207         eval_modes_in[index]  = NULL;
1208         eval_modes_out[index] = NULL;
1209         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1210         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1211         eval_mode_offsets_in[index]  = NULL;
1212         eval_mode_offsets_out[index] = NULL;
1213         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1214         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1215         (*data)->assembled_bases_in[index]  = NULL;
1216         (*data)->assembled_bases_out[index] = NULL;
1217         num_active_bases++;
1218       }
1219       if (eval_mode != CEED_EVAL_WEIGHT) {
1220         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1221         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_modes_in[index]));
1222         CeedCall(CeedRealloc(num_eval_modes_in[index] + q_comp, &eval_mode_offsets_in[index]));
1223         for (CeedInt d = 0; d < q_comp; d++) {
1224           eval_modes_in[index][num_eval_modes_in[index] + d]        = eval_mode;
1225           eval_mode_offsets_in[index][num_eval_modes_in[index] + d] = offset;
1226           offset += num_comp;
1227         }
1228         num_eval_modes_in[index] += q_comp;
1229       }
1230     }
1231   }
1232   (*data)->num_eval_modes_in    = num_eval_modes_in;
1233   (*data)->eval_modes_in        = eval_modes_in;
1234   (*data)->eval_mode_offsets_in = eval_mode_offsets_in;
1235 
1236   // Determine active output basis
1237   CeedInt num_output_fields;
1238   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1239   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1240   offset = 0;
1241   for (CeedInt i = 0; i < num_output_fields; i++) {
1242     CeedVector vec;
1243     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1244     if (vec == CEED_VECTOR_ACTIVE) {
1245       CeedBasis    basis_out = NULL;
1246       CeedEvalMode eval_mode;
1247       CeedInt      index = -1, num_comp, q_comp;
1248       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1249       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1250       CeedCall(CeedBasisGetNumComponents(basis_out, &num_comp));
1251       CeedCall(CeedBasisGetNumQuadratureComponents(basis_out, eval_mode, &q_comp));
1252       for (CeedInt i = 0; i < num_active_bases; i++) {
1253         if ((*data)->active_bases[i] == basis_out) index = i;
1254       }
1255       if (index == -1) {
1256         CeedElemRestriction elem_rstr_out;
1257         index = num_active_bases;
1258         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_bases));
1259         (*data)->active_bases[num_active_bases] = NULL;
1260         CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->active_bases[num_active_bases]));
1261         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->active_elem_rstrs));
1262         (*data)->active_elem_rstrs[num_active_bases] = NULL;
1263         CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &elem_rstr_out));
1264         CeedCall(CeedElemRestrictionReferenceCopy(elem_rstr_out, &(*data)->active_elem_rstrs[num_active_bases]));
1265         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_in));
1266         CeedCall(CeedRealloc(num_active_bases + 1, &num_eval_modes_out));
1267         num_eval_modes_in[index]  = 0;
1268         num_eval_modes_out[index] = 0;
1269         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_in));
1270         CeedCall(CeedRealloc(num_active_bases + 1, &eval_modes_out));
1271         eval_modes_in[index]  = NULL;
1272         eval_modes_out[index] = NULL;
1273         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_in));
1274         CeedCall(CeedRealloc(num_active_bases + 1, &eval_mode_offsets_out));
1275         eval_mode_offsets_in[index]  = NULL;
1276         eval_mode_offsets_out[index] = NULL;
1277         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_in));
1278         CeedCall(CeedRealloc(num_active_bases + 1, &(*data)->assembled_bases_out));
1279         (*data)->assembled_bases_in[index]  = NULL;
1280         (*data)->assembled_bases_out[index] = NULL;
1281         num_active_bases++;
1282       }
1283       if (eval_mode != CEED_EVAL_WEIGHT) {
1284         // q_comp = 1 if CEED_EVAL_NONE, CEED_EVAL_WEIGHT caught by QF Assembly
1285         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_modes_out[index]));
1286         CeedCall(CeedRealloc(num_eval_modes_out[index] + q_comp, &eval_mode_offsets_out[index]));
1287         for (CeedInt d = 0; d < q_comp; d++) {
1288           eval_modes_out[index][num_eval_modes_out[index] + d]        = eval_mode;
1289           eval_mode_offsets_out[index][num_eval_modes_out[index] + d] = offset;
1290           offset += num_comp;
1291         }
1292         num_eval_modes_out[index] += q_comp;
1293       }
1294     }
1295   }
1296   (*data)->num_output_components = offset;
1297   (*data)->num_eval_modes_out    = num_eval_modes_out;
1298   (*data)->eval_modes_out        = eval_modes_out;
1299   (*data)->eval_mode_offsets_out = eval_mode_offsets_out;
1300   (*data)->num_active_bases      = num_active_bases;
1301 
1302   return CEED_ERROR_SUCCESS;
1303 }
1304 
1305 /**
1306   @brief Get CeedOperator CeedEvalModes for assembly.
1307 
1308   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1309 
1310   @param[in]  data                  CeedOperatorAssemblyData
1311   @param[out] num_active_bases      Total number of active bases
1312   @param[out] num_eval_modes_in     Pointer to hold array of numbers of input CeedEvalModes, or NULL.
1313                                       `eval_modes_in[0]` holds an array of eval modes for the first active basis.
1314   @param[out] eval_modes_in         Pointer to hold arrays of input CeedEvalModes, or NULL.
1315   @param[out] eval_mode_offsets_in  Pointer to hold arrays of input offsets at each quadrature point.
1316   @param[out] num_eval_modes_out    Pointer to hold array of numbers of output CeedEvalModes, or NULL
1317   @param[out] eval_modes_out        Pointer to hold arrays of output CeedEvalModes, or NULL.
1318   @param[out] eval_mode_offsets_out Pointer to hold arrays of output offsets at each quadrature point
1319   @param[out] num_output_components The number of columns in the assembled CeedQFunction matrix for each quadrature point,
1320                                       including contributions of all active bases
1321 
1322   @return An error code: 0 - success, otherwise - failure
1323 
1324 
1325   @ref Backend
1326 **/
1327 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedInt **num_eval_modes_in,
1328                                          const CeedEvalMode ***eval_modes_in, CeedSize ***eval_mode_offsets_in, CeedInt **num_eval_modes_out,
1329                                          const CeedEvalMode ***eval_modes_out, CeedSize ***eval_mode_offsets_out, CeedSize *num_output_components) {
1330   if (num_active_bases) *num_active_bases = data->num_active_bases;
1331   if (num_eval_modes_in) *num_eval_modes_in = data->num_eval_modes_in;
1332   if (eval_modes_in) *eval_modes_in = (const CeedEvalMode **)data->eval_modes_in;
1333   if (eval_mode_offsets_in) *eval_mode_offsets_in = data->eval_mode_offsets_in;
1334   if (num_eval_modes_out) *num_eval_modes_out = data->num_eval_modes_out;
1335   if (eval_modes_out) *eval_modes_out = (const CeedEvalMode **)data->eval_modes_out;
1336   if (eval_mode_offsets_out) *eval_mode_offsets_out = data->eval_mode_offsets_out;
1337   if (num_output_components) *num_output_components = data->num_output_components;
1338 
1339   return CEED_ERROR_SUCCESS;
1340 }
1341 
1342 /**
1343   @brief Get CeedOperator CeedBasis data for assembly.
1344 
1345   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1346 
1347   @param[in]  data                CeedOperatorAssemblyData
1348   @param[out] num_active_bases    Number of active bases, or NULL
1349   @param[out] active_bases        Pointer to hold active CeedBasis, or NULL
1350   @param[out] assembled_bases_in  Pointer to hold assembled active input B, or NULL
1351   @param[out] assembled_bases_out Pointer to hold assembled active output B, or NULL
1352 
1353   @return An error code: 0 - success, otherwise - failure
1354 
1355   @ref Backend
1356 **/
1357 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedInt *num_active_bases, CeedBasis **active_bases,
1358                                      const CeedScalar ***assembled_bases_in, const CeedScalar ***assembled_bases_out) {
1359   // Assemble B_in, B_out if needed
1360   if (assembled_bases_in && !data->assembled_bases_in[0]) {
1361     CeedInt num_qpts;
1362 
1363     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1364     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1365       CeedInt     num_nodes;
1366       CeedScalar *B_in = NULL, *identity = NULL;
1367       bool        has_eval_none = false;
1368 
1369       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1370       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_in[b], &B_in));
1371 
1372       for (CeedInt i = 0; i < data->num_eval_modes_in[b]; i++) {
1373         has_eval_none = has_eval_none || (data->eval_modes_in[b][i] == CEED_EVAL_NONE);
1374       }
1375       if (has_eval_none) {
1376         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1377         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1378           identity[i * num_nodes + i] = 1.0;
1379         }
1380       }
1381 
1382       for (CeedInt q = 0; q < num_qpts; q++) {
1383         for (CeedInt n = 0; n < num_nodes; n++) {
1384           CeedInt      d_in              = 0, q_comp_in;
1385           CeedEvalMode eval_mode_in_prev = CEED_EVAL_NONE;
1386           for (CeedInt e_in = 0; e_in < data->num_eval_modes_in[b]; e_in++) {
1387             const CeedInt     qq = data->num_eval_modes_in[b] * q;
1388             const CeedScalar *B  = NULL;
1389             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_in[b][e_in], identity, &B);
1390             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_in[b][e_in], &q_comp_in));
1391             if (q_comp_in > 1) {
1392               if (e_in == 0 || data->eval_modes_in[b][e_in] != eval_mode_in_prev) d_in = 0;
1393               else B = &B[(++d_in) * num_qpts * num_nodes];
1394             }
1395             eval_mode_in_prev                 = data->eval_modes_in[b][e_in];
1396             B_in[(qq + e_in) * num_nodes + n] = B[q * num_nodes + n];
1397           }
1398         }
1399       }
1400       if (identity) CeedCall(CeedFree(&identity));
1401       data->assembled_bases_in[b] = B_in;
1402     }
1403   }
1404 
1405   if (assembled_bases_out && !data->assembled_bases_out[0]) {
1406     CeedInt num_qpts;
1407 
1408     CeedCall(CeedBasisGetNumQuadraturePoints(data->active_bases[0], &num_qpts));
1409     for (CeedInt b = 0; b < data->num_active_bases; b++) {
1410       CeedInt     num_nodes;
1411       bool        has_eval_none = false;
1412       CeedScalar *B_out = NULL, *identity = NULL;
1413 
1414       CeedCall(CeedBasisGetNumNodes(data->active_bases[b], &num_nodes));
1415       CeedCall(CeedCalloc(num_qpts * num_nodes * data->num_eval_modes_out[b], &B_out));
1416 
1417       for (CeedInt i = 0; i < data->num_eval_modes_out[b]; i++) {
1418         has_eval_none = has_eval_none || (data->eval_modes_out[b][i] == CEED_EVAL_NONE);
1419       }
1420       if (has_eval_none) {
1421         CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
1422         for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) {
1423           identity[i * num_nodes + i] = 1.0;
1424         }
1425       }
1426 
1427       for (CeedInt q = 0; q < num_qpts; q++) {
1428         for (CeedInt n = 0; n < num_nodes; n++) {
1429           CeedInt      d_out              = 0, q_comp_out;
1430           CeedEvalMode eval_mode_out_prev = CEED_EVAL_NONE;
1431           for (CeedInt e_out = 0; e_out < data->num_eval_modes_out[b]; e_out++) {
1432             const CeedInt     qq = data->num_eval_modes_out[b] * q;
1433             const CeedScalar *B  = NULL;
1434             CeedOperatorGetBasisPointer(data->active_bases[b], data->eval_modes_out[b][e_out], identity, &B);
1435             CeedCall(CeedBasisGetNumQuadratureComponents(data->active_bases[b], data->eval_modes_out[b][e_out], &q_comp_out));
1436             if (q_comp_out > 1) {
1437               if (e_out == 0 || data->eval_modes_out[b][e_out] != eval_mode_out_prev) d_out = 0;
1438               else B = &B[(++d_out) * num_qpts * num_nodes];
1439             }
1440             eval_mode_out_prev                  = data->eval_modes_out[b][e_out];
1441             B_out[(qq + e_out) * num_nodes + n] = B[q * num_nodes + n];
1442           }
1443         }
1444       }
1445       if (identity) CeedCall(CeedFree(&identity));
1446       data->assembled_bases_out[b] = B_out;
1447     }
1448   }
1449 
1450   // Pass out assembled data
1451   if (active_bases) *active_bases = data->active_bases;
1452   if (assembled_bases_in) *assembled_bases_in = (const CeedScalar **)data->assembled_bases_in;
1453   if (assembled_bases_out) *assembled_bases_out = (const CeedScalar **)data->assembled_bases_out;
1454 
1455   return CEED_ERROR_SUCCESS;
1456 }
1457 
1458 /**
1459   @brief Get CeedOperator CeedBasis data for assembly.
1460 
1461   Note: See CeedOperatorAssemblyDataCreate for a full description of the data stored in this object.
1462 
1463   @param[in]  data                  CeedOperatorAssemblyData
1464   @param[out] num_active_elem_rstrs Number of active element restrictions, or NULL
1465   @param[out] active_elem_rstrs     Pointer to hold active CeedElemRestrictions, or NULL
1466 
1467   @return An error code: 0 - success, otherwise - failure
1468 
1469   @ref Backend
1470 **/
1471 int CeedOperatorAssemblyDataGetElemRestrictions(CeedOperatorAssemblyData data, CeedInt *num_active_elem_rstrs,
1472                                                 CeedElemRestriction **active_elem_rstrs) {
1473   if (num_active_elem_rstrs) *num_active_elem_rstrs = data->num_active_bases;
1474   if (active_elem_rstrs) *active_elem_rstrs = data->active_elem_rstrs;
1475 
1476   return CEED_ERROR_SUCCESS;
1477 }
1478 
1479 /**
1480   @brief Destroy CeedOperatorAssemblyData
1481 
1482   @param[in,out] data CeedOperatorAssemblyData to destroy
1483 
1484   @return An error code: 0 - success, otherwise - failure
1485 
1486   @ref Backend
1487 **/
1488 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1489   if (!*data) {
1490     *data = NULL;
1491     return CEED_ERROR_SUCCESS;
1492   }
1493   CeedCall(CeedDestroy(&(*data)->ceed));
1494   for (CeedInt b = 0; b < (*data)->num_active_bases; b++) {
1495     CeedCall(CeedBasisDestroy(&(*data)->active_bases[b]));
1496     CeedCall(CeedElemRestrictionDestroy(&(*data)->active_elem_rstrs[b]));
1497     CeedCall(CeedFree(&(*data)->eval_modes_in[b]));
1498     CeedCall(CeedFree(&(*data)->eval_modes_out[b]));
1499     CeedCall(CeedFree(&(*data)->eval_mode_offsets_in[b]));
1500     CeedCall(CeedFree(&(*data)->eval_mode_offsets_out[b]));
1501     CeedCall(CeedFree(&(*data)->assembled_bases_in[b]));
1502     CeedCall(CeedFree(&(*data)->assembled_bases_out[b]));
1503   }
1504   CeedCall(CeedFree(&(*data)->active_bases));
1505   CeedCall(CeedFree(&(*data)->active_elem_rstrs));
1506   CeedCall(CeedFree(&(*data)->num_eval_modes_in));
1507   CeedCall(CeedFree(&(*data)->num_eval_modes_out));
1508   CeedCall(CeedFree(&(*data)->eval_modes_in));
1509   CeedCall(CeedFree(&(*data)->eval_modes_out));
1510   CeedCall(CeedFree(&(*data)->eval_mode_offsets_in));
1511   CeedCall(CeedFree(&(*data)->eval_mode_offsets_out));
1512   CeedCall(CeedFree(&(*data)->assembled_bases_in));
1513   CeedCall(CeedFree(&(*data)->assembled_bases_out));
1514 
1515   CeedCall(CeedFree(data));
1516   return CEED_ERROR_SUCCESS;
1517 }
1518 
1519 /// @}
1520 
1521 /// ----------------------------------------------------------------------------
1522 /// CeedOperator Public API
1523 /// ----------------------------------------------------------------------------
1524 /// @addtogroup CeedOperatorUser
1525 /// @{
1526 
1527 /**
1528   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1529 
1530   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1531   The vector `assembled` is of shape `[num_elements, num_input_fields, num_output_fields, num_quad_points]` and contains column-major matrices
1532 representing the action of the CeedQFunction for a corresponding quadrature point on an element.
1533 
1534   Inputs and outputs are in the order provided by the user when adding CeedOperator fields.
1535   For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order, would result in an assembled QFunction
1536 that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1537 
1538   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1539 
1540   @param[in]  op        CeedOperator to assemble CeedQFunction
1541   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1542   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1543   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1544 
1545   @return An error code: 0 - success, otherwise - failure
1546 
1547   @ref User
1548 **/
1549 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1550   CeedCall(CeedOperatorCheckReady(op));
1551 
1552   if (op->LinearAssembleQFunction) {
1553     // Backend version
1554     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1555   } else {
1556     // Operator fallback
1557     CeedOperator op_fallback;
1558 
1559     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1560     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1561     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1562   }
1563   return CEED_ERROR_SUCCESS;
1564 }
1565 
1566 /**
1567   @brief Assemble CeedQFunction and store result internally.
1568 
1569   Return copied references of stored data to the caller.
1570   Caller is responsible for ownership and destruction of the copied references.
1571   See also @ref CeedOperatorLinearAssembleQFunction
1572 
1573   Note: If the value of `assembled` or `rstr` passed to this function are non-NULL, then it is assumed that they hold valid pointers.
1574         These objects will be destroyed if `*assembled` or `*rstr` is the only reference to the object.
1575 
1576   @param[in]  op        CeedOperator to assemble CeedQFunction
1577   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1578   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1579   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1580 
1581   @return An error code: 0 - success, otherwise - failure
1582 
1583   @ref User
1584 **/
1585 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1586   int (*LinearAssembleQFunctionUpdate)(CeedOperator, CeedVector, CeedElemRestriction, CeedRequest *) = NULL;
1587   CeedOperator op_assemble                                                                           = NULL;
1588 
1589   CeedCall(CeedOperatorCheckReady(op));
1590 
1591   // Determine if fallback parent or operator has implementation
1592   if (op->op_fallback_parent && op->op_fallback_parent->LinearAssembleQFunctionUpdate) {
1593     // -- Backend version for op fallback parent is faster, if it exists
1594     LinearAssembleQFunctionUpdate = op->op_fallback_parent->LinearAssembleQFunctionUpdate;
1595     op_assemble                   = op->op_fallback_parent;
1596   } else if (op->LinearAssembleQFunctionUpdate) {
1597     // -- Backend version for op
1598     LinearAssembleQFunctionUpdate = op->LinearAssembleQFunctionUpdate;
1599     op_assemble                   = op;
1600   }
1601 
1602   // Assemble QFunction
1603   if (LinearAssembleQFunctionUpdate) {
1604     // Backend or fallback parent version
1605     bool                qf_assembled_is_setup;
1606     CeedVector          assembled_vec  = NULL;
1607     CeedElemRestriction assembled_rstr = NULL;
1608 
1609     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1610     if (qf_assembled_is_setup) {
1611       bool update_needed;
1612 
1613       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1614       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1615       if (update_needed) CeedCall(LinearAssembleQFunctionUpdate(op_assemble, assembled_vec, assembled_rstr, request));
1616     } else {
1617       CeedCall(CeedOperatorLinearAssembleQFunction(op_assemble, &assembled_vec, &assembled_rstr, request));
1618       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1619     }
1620     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1621 
1622     // Copy reference from internally held copy
1623     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1624     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1625     CeedCall(CeedVectorDestroy(&assembled_vec));
1626     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1627   } else {
1628     // Operator fallback
1629     CeedOperator op_fallback;
1630 
1631     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1632     if (op_fallback) CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1633     else return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1634   }
1635 
1636   return CEED_ERROR_SUCCESS;
1637 }
1638 
1639 /**
1640   @brief Assemble the diagonal of a square linear CeedOperator
1641 
1642   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1643 
1644   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1645 
1646   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1647 
1648   @param[in]  op        CeedOperator to assemble CeedQFunction
1649   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1650   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1651 
1652   @return An error code: 0 - success, otherwise - failure
1653 
1654   @ref User
1655 **/
1656 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1657   bool is_composite;
1658   CeedCall(CeedOperatorCheckReady(op));
1659   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1660 
1661   CeedSize input_size = 0, output_size = 0;
1662   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1663   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1664 
1665   // Early exit for empty operator
1666   if (!is_composite) {
1667     CeedInt num_elem = 0;
1668 
1669     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1670     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1671   }
1672 
1673   if (op->LinearAssembleDiagonal) {
1674     // Backend version
1675     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1676     return CEED_ERROR_SUCCESS;
1677   } else if (op->LinearAssembleAddDiagonal) {
1678     // Backend version with zeroing first
1679     CeedCall(CeedVectorSetValue(assembled, 0.0));
1680     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1681     return CEED_ERROR_SUCCESS;
1682   } else {
1683     // Operator fallback
1684     CeedOperator op_fallback;
1685 
1686     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1687     if (op_fallback) {
1688       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1689       return CEED_ERROR_SUCCESS;
1690     }
1691   }
1692   // Default interface implementation
1693   CeedCall(CeedVectorSetValue(assembled, 0.0));
1694   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1695 
1696   return CEED_ERROR_SUCCESS;
1697 }
1698 
1699 /**
1700   @brief Assemble the diagonal of a square linear CeedOperator
1701 
1702   This sums into a CeedVector the diagonal of a linear CeedOperator.
1703 
1704   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1705 
1706   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1707 
1708   @param[in]  op        CeedOperator to assemble CeedQFunction
1709   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1710   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1711 
1712   @return An error code: 0 - success, otherwise - failure
1713 
1714   @ref User
1715 **/
1716 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1717   bool is_composite;
1718   CeedCall(CeedOperatorCheckReady(op));
1719   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1720 
1721   CeedSize input_size = 0, output_size = 0;
1722   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1723   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1724 
1725   // Early exit for empty operator
1726   if (!is_composite) {
1727     CeedInt num_elem = 0;
1728 
1729     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1730     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1731   }
1732 
1733   if (op->LinearAssembleAddDiagonal) {
1734     // Backend version
1735     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1736     return CEED_ERROR_SUCCESS;
1737   } else {
1738     // Operator fallback
1739     CeedOperator op_fallback;
1740 
1741     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1742     if (op_fallback) {
1743       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1744       return CEED_ERROR_SUCCESS;
1745     }
1746   }
1747   // Default interface implementation
1748   if (is_composite) {
1749     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1750   } else {
1751     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1752   }
1753 
1754   return CEED_ERROR_SUCCESS;
1755 }
1756 
1757 /**
1758   @brief Assemble the point block diagonal of a square linear CeedOperator
1759 
1760   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1761 
1762   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1763 
1764   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1765 
1766   @param[in]  op        CeedOperator to assemble CeedQFunction
1767   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1768 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1769 component in].
1770   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1771 
1772   @return An error code: 0 - success, otherwise - failure
1773 
1774   @ref User
1775 **/
1776 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1777   bool is_composite;
1778   CeedCall(CeedOperatorCheckReady(op));
1779   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1780 
1781   CeedSize input_size = 0, output_size = 0;
1782   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1783   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1784 
1785   // Early exit for empty operator
1786   if (!is_composite) {
1787     CeedInt num_elem = 0;
1788 
1789     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1790     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1791   }
1792 
1793   if (op->LinearAssemblePointBlockDiagonal) {
1794     // Backend version
1795     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1796     return CEED_ERROR_SUCCESS;
1797   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1798     // Backend version with zeroing first
1799     CeedCall(CeedVectorSetValue(assembled, 0.0));
1800     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1801     return CEED_ERROR_SUCCESS;
1802   } else {
1803     // Operator fallback
1804     CeedOperator op_fallback;
1805 
1806     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1807     if (op_fallback) {
1808       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1809       return CEED_ERROR_SUCCESS;
1810     }
1811   }
1812   // Default interface implementation
1813   CeedCall(CeedVectorSetValue(assembled, 0.0));
1814   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1815 
1816   return CEED_ERROR_SUCCESS;
1817 }
1818 
1819 /**
1820   @brief Assemble the point block diagonal of a square linear CeedOperator
1821 
1822   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1823 
1824   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1825 
1826   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1827 
1828   @param[in]  op        CeedOperator to assemble CeedQFunction
1829   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1830 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1831 component in].
1832   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1833 
1834   @return An error code: 0 - success, otherwise - failure
1835 
1836   @ref User
1837 **/
1838 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1839   bool is_composite;
1840   CeedCall(CeedOperatorCheckReady(op));
1841   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1842 
1843   CeedSize input_size = 0, output_size = 0;
1844   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1845   CeedCheck(input_size == output_size, op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1846 
1847   // Early exit for empty operator
1848   if (!is_composite) {
1849     CeedInt num_elem = 0;
1850 
1851     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1852     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1853   }
1854 
1855   if (op->LinearAssembleAddPointBlockDiagonal) {
1856     // Backend version
1857     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1858     return CEED_ERROR_SUCCESS;
1859   } else {
1860     // Operator fallback
1861     CeedOperator op_fallback;
1862 
1863     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1864     if (op_fallback) {
1865       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1866       return CEED_ERROR_SUCCESS;
1867     }
1868   }
1869   // Default interface implementation
1870   if (is_composite) {
1871     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1872   } else {
1873     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1874   }
1875 
1876   return CEED_ERROR_SUCCESS;
1877 }
1878 
1879 /**
1880    @brief Fully assemble the nonzero pattern of a linear operator.
1881 
1882    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1883 
1884    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1885 matrix in entry (i, j).
1886   Note that the (i, j) pairs are not unique and may repeat.
1887   This function returns the number of entries and their (i, j) locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1888 
1889    This will generally be slow unless your operator is low-order.
1890 
1891    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1892 
1893    @param[in]  op          CeedOperator to assemble
1894    @param[out] num_entries Number of entries in coordinate nonzero pattern
1895    @param[out] rows        Row number for each entry
1896    @param[out] cols        Column number for each entry
1897 
1898    @ref User
1899 **/
1900 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1901   CeedInt       num_suboperators;
1902   CeedSize      single_entries;
1903   CeedOperator *sub_operators;
1904   bool          is_composite;
1905   CeedCall(CeedOperatorCheckReady(op));
1906   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1907 
1908   if (op->LinearAssembleSymbolic) {
1909     // Backend version
1910     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1911     return CEED_ERROR_SUCCESS;
1912   } else {
1913     // Operator fallback
1914     CeedOperator op_fallback;
1915 
1916     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1917     if (op_fallback) {
1918       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1919       return CEED_ERROR_SUCCESS;
1920     }
1921   }
1922 
1923   // Default interface implementation
1924 
1925   // count entries and allocate rows, cols arrays
1926   *num_entries = 0;
1927   if (is_composite) {
1928     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1929     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1930     for (CeedInt k = 0; k < num_suboperators; ++k) {
1931       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1932       *num_entries += single_entries;
1933     }
1934   } else {
1935     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1936     *num_entries += single_entries;
1937   }
1938   CeedCall(CeedCalloc(*num_entries, rows));
1939   CeedCall(CeedCalloc(*num_entries, cols));
1940 
1941   // assemble nonzero locations
1942   CeedInt offset = 0;
1943   if (is_composite) {
1944     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1945     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1946     for (CeedInt k = 0; k < num_suboperators; ++k) {
1947       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1948       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1949       offset += single_entries;
1950     }
1951   } else {
1952     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1953   }
1954 
1955   return CEED_ERROR_SUCCESS;
1956 }
1957 
1958 /**
1959    @brief Fully assemble the nonzero entries of a linear operator.
1960 
1961    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1962 
1963    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1964 matrix in entry (i, j).
1965   Note that the (i, j) pairs are not unique and may repeat.
1966   This function returns the values of the nonzero entries to be added, their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1967 
1968    This will generally be slow unless your operator is low-order.
1969 
1970    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1971 
1972    @param[in]  op     CeedOperator to assemble
1973    @param[out] values Values to assemble into matrix
1974 
1975    @ref User
1976 **/
1977 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1978   CeedInt       num_suboperators;
1979   CeedSize      single_entries = 0;
1980   CeedOperator *sub_operators;
1981   bool          is_composite;
1982   CeedCall(CeedOperatorCheckReady(op));
1983   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1984 
1985   // Early exit for empty operator
1986   if (!is_composite) {
1987     CeedInt num_elem = 0;
1988 
1989     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1990     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1991   }
1992 
1993   if (op->LinearAssemble) {
1994     // Backend version
1995     CeedCall(op->LinearAssemble(op, values));
1996     return CEED_ERROR_SUCCESS;
1997   } else {
1998     // Operator fallback
1999     CeedOperator op_fallback;
2000 
2001     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2002     if (op_fallback) {
2003       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
2004       return CEED_ERROR_SUCCESS;
2005     }
2006   }
2007 
2008   // Default interface implementation
2009   CeedInt offset = 0;
2010   CeedCall(CeedVectorSetValue(values, 0.0));
2011   if (is_composite) {
2012     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2013     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2014     for (CeedInt k = 0; k < num_suboperators; k++) {
2015       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
2016       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
2017       offset += single_entries;
2018     }
2019   } else {
2020     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
2021   }
2022 
2023   return CEED_ERROR_SUCCESS;
2024 }
2025 
2026 /**
2027   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
2028 
2029   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2030 
2031   @param[in]  op               Composite CeedOperator
2032   @param[in]  num_skip_indices Number of suboperators to skip
2033   @param[in]  skip_indices     Array of indices of suboperators to skip
2034   @param[out] mult             Vector to store multiplicity (of size l_size)
2035 
2036   @return An error code: 0 - success, otherwise - failure
2037 
2038   @ref User
2039 **/
2040 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
2041   CeedCall(CeedOperatorCheckReady(op));
2042 
2043   Ceed                ceed;
2044   CeedInt             num_suboperators;
2045   CeedSize            l_vec_len;
2046   CeedScalar         *mult_array;
2047   CeedVector          ones_l_vec;
2048   CeedElemRestriction elem_rstr, mult_elem_rstr;
2049   CeedOperator       *sub_operators;
2050 
2051   CeedCall(CeedOperatorGetCeed(op, &ceed));
2052 
2053   // Zero mult vector
2054   CeedCall(CeedVectorSetValue(mult, 0.0));
2055 
2056   // Get suboperators
2057   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
2058   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
2059   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
2060 
2061   // Work vector
2062   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
2063   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
2064   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
2065   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
2066 
2067   // Compute multiplicity across suboperators
2068   for (CeedInt i = 0; i < num_suboperators; i++) {
2069     const CeedScalar *sub_mult_array;
2070     CeedVector        sub_mult_l_vec, ones_e_vec;
2071 
2072     // -- Check for suboperator to skip
2073     for (CeedInt j = 0; j < num_skip_indices; j++) {
2074       if (skip_indices[j] == i) continue;
2075     }
2076 
2077     // -- Sub operator multiplicity
2078     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_rstr));
2079     CeedCall(CeedElemRestrictionCreateUnorientedCopy(elem_rstr, &mult_elem_rstr));
2080     CeedCall(CeedElemRestrictionCreateVector(mult_elem_rstr, &sub_mult_l_vec, &ones_e_vec));
2081     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
2082     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
2083     CeedCall(CeedElemRestrictionApply(mult_elem_rstr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
2084     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
2085     // ---- Flag every node present in the current suboperator
2086     for (CeedInt j = 0; j < l_vec_len; j++) {
2087       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
2088     }
2089     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
2090     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
2091     CeedCall(CeedVectorDestroy(&ones_e_vec));
2092     CeedCall(CeedElemRestrictionDestroy(&mult_elem_rstr));
2093   }
2094   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
2095   CeedCall(CeedVectorDestroy(&ones_l_vec));
2096 
2097   return CEED_ERROR_SUCCESS;
2098 }
2099 
2100 /**
2101   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
2102 grid interpolation
2103 
2104   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2105 
2106   @param[in]  op_fine      Fine grid operator
2107   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2108   @param[in]  rstr_coarse  Coarse grid restriction
2109   @param[in]  basis_coarse Coarse grid active vector basis
2110   @param[out] op_coarse    Coarse grid operator
2111   @param[out] op_prolong   Coarse to fine operator, or NULL
2112   @param[out] op_restrict  Fine to coarse operator, or NULL
2113 
2114   @return An error code: 0 - success, otherwise - failure
2115 
2116   @ref User
2117 **/
2118 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2119                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
2120   CeedCall(CeedOperatorCheckReady(op_fine));
2121 
2122   // Build prolongation matrix, if required
2123   CeedBasis basis_c_to_f = NULL;
2124   if (op_prolong || op_restrict) {
2125     CeedBasis basis_fine;
2126     CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2127     CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
2128   }
2129 
2130   // Core code
2131   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2132 
2133   return CEED_ERROR_SUCCESS;
2134 }
2135 
2136 /**
2137   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
2138 
2139   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2140 
2141   @param[in]  op_fine       Fine grid operator
2142   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2143   @param[in]  rstr_coarse   Coarse grid restriction
2144   @param[in]  basis_coarse  Coarse grid active vector basis
2145   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2146   @param[out] op_coarse     Coarse grid operator
2147   @param[out] op_prolong    Coarse to fine operator, or NULL
2148   @param[out] op_restrict   Fine to coarse operator, or NULL
2149 
2150   @return An error code: 0 - success, otherwise - failure
2151 
2152   @ref User
2153 **/
2154 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2155                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2156                                              CeedOperator *op_restrict) {
2157   CeedCall(CeedOperatorCheckReady(op_fine));
2158   Ceed ceed;
2159   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2160 
2161   // Check for compatible quadrature spaces
2162   CeedBasis basis_fine;
2163   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2164   CeedInt Q_f, Q_c;
2165   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2166   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2167   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2168 
2169   // Create coarse to fine basis, if required
2170   CeedBasis basis_c_to_f = NULL;
2171   if (op_prolong || op_restrict) {
2172     // Check if interpolation matrix is provided
2173     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2174               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2175     CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2176     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2177     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2178     CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
2179     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2180     P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
2181     CeedScalar *q_ref, *q_weight, *grad;
2182     CeedCall(CeedCalloc(P_1d_f, &q_ref));
2183     CeedCall(CeedCalloc(P_1d_f, &q_weight));
2184     CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
2185     CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2186     CeedCall(CeedFree(&q_ref));
2187     CeedCall(CeedFree(&q_weight));
2188     CeedCall(CeedFree(&grad));
2189   }
2190 
2191   // Core code
2192   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2193   return CEED_ERROR_SUCCESS;
2194 }
2195 
2196 /**
2197   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2198 
2199   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2200 
2201   @param[in]  op_fine       Fine grid operator
2202   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter, or NULL if not creating prolongation/restriction operators
2203   @param[in]  rstr_coarse   Coarse grid restriction
2204   @param[in]  basis_coarse  Coarse grid active vector basis
2205   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation, or NULL if not creating prolongation/restriction operators
2206   @param[out] op_coarse     Coarse grid operator
2207   @param[out] op_prolong    Coarse to fine operator, or NULL
2208   @param[out] op_restrict   Fine to coarse operator, or NULL
2209 
2210   @return An error code: 0 - success, otherwise - failure
2211 
2212   @ref User
2213 **/
2214 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2215                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2216                                        CeedOperator *op_restrict) {
2217   CeedCall(CeedOperatorCheckReady(op_fine));
2218   Ceed ceed;
2219   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2220 
2221   // Check for compatible quadrature spaces
2222   CeedBasis basis_fine;
2223   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2224   CeedInt Q_f, Q_c;
2225   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2226   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2227   CeedCheck(Q_f == Q_c, ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2228 
2229   // Coarse to fine basis
2230   CeedBasis basis_c_to_f = NULL;
2231   if (op_prolong || op_restrict) {
2232     // Check if interpolation matrix is provided
2233     CeedCheck(interp_c_to_f, ceed, CEED_ERROR_INCOMPATIBLE,
2234               "Prolongation or restriction operator creation requires coarse-to-fine interpolation matrix");
2235     CeedElemTopology topo;
2236     CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2237     CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2238     CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2239     CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2240     CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2241     CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2242     CeedScalar *q_ref, *q_weight, *grad;
2243     CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2244     CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2245     CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2246     CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2247     CeedCall(CeedFree(&q_ref));
2248     CeedCall(CeedFree(&q_weight));
2249     CeedCall(CeedFree(&grad));
2250   }
2251 
2252   // Core code
2253   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2254   return CEED_ERROR_SUCCESS;
2255 }
2256 
2257 /**
2258   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2259 
2260   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2261   This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, \f$M = V^T V, K = V^T S V\f$.
2262   The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form \f$V^T
2263 \hat S V\f$.
2264   The CeedOperator must be linear and non-composite.
2265   The associated CeedQFunction must therefore also be linear.
2266 
2267   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2268 
2269   @param[in]  op      CeedOperator to create element inverses
2270   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2271   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2272 
2273   @return An error code: 0 - success, otherwise - failure
2274 
2275   @ref User
2276 **/
2277 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2278   CeedCall(CeedOperatorCheckReady(op));
2279 
2280   if (op->CreateFDMElementInverse) {
2281     // Backend version
2282     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2283     return CEED_ERROR_SUCCESS;
2284   } else {
2285     // Operator fallback
2286     CeedOperator op_fallback;
2287 
2288     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2289     if (op_fallback) {
2290       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2291       return CEED_ERROR_SUCCESS;
2292     }
2293   }
2294 
2295   // Default interface implementation
2296   Ceed ceed, ceed_parent;
2297   CeedCall(CeedOperatorGetCeed(op, &ceed));
2298   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2299   ceed_parent = ceed_parent ? ceed_parent : ceed;
2300   CeedQFunction qf;
2301   CeedCall(CeedOperatorGetQFunction(op, &qf));
2302 
2303   // Determine active input basis
2304   bool                interp = false, grad = false;
2305   CeedBasis           basis = NULL;
2306   CeedElemRestriction rstr  = NULL;
2307   CeedOperatorField  *op_fields;
2308   CeedQFunctionField *qf_fields;
2309   CeedInt             num_input_fields;
2310   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2311   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2312   for (CeedInt i = 0; i < num_input_fields; i++) {
2313     CeedVector vec;
2314     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2315     if (vec == CEED_VECTOR_ACTIVE) {
2316       CeedEvalMode eval_mode;
2317       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2318       interp = interp || eval_mode == CEED_EVAL_INTERP;
2319       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2320       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2321       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2322     }
2323   }
2324   CeedCheck(basis, ceed, CEED_ERROR_BACKEND, "No active field set");
2325   CeedSize l_size = 1;
2326   CeedInt  P_1d, Q_1d, num_nodes, num_qpts, dim, num_comp = 1, num_elem = 1;
2327   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2328   CeedCall(CeedBasisGetNumNodes(basis, &num_nodes));
2329   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2330   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2331   CeedCall(CeedBasisGetDimension(basis, &dim));
2332   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2333   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2334   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2335 
2336   // Build and diagonalize 1D Mass and Laplacian
2337   bool is_tensor_basis;
2338   CeedCall(CeedBasisIsTensor(basis, &is_tensor_basis));
2339   CeedCheck(is_tensor_basis, ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2340   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2341   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2342   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2343   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2344   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2345   CeedCall(CeedCalloc(P_1d, &lambda));
2346   // -- Build matrices
2347   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2348   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2349   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2350   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2351   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2352 
2353   // -- Diagonalize
2354   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2355   CeedCall(CeedFree(&mass));
2356   CeedCall(CeedFree(&laplace));
2357   for (CeedInt i = 0; i < P_1d; i++) {
2358     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2359   }
2360   CeedCall(CeedFree(&x));
2361 
2362   // Assemble QFunction
2363   CeedVector          assembled = NULL;
2364   CeedElemRestriction rstr_qf   = NULL;
2365   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2366   CeedInt layout[3];
2367   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2368   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2369   CeedScalar max_norm = 0;
2370   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2371 
2372   // Calculate element averages
2373   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2374   CeedScalar       *elem_avg;
2375   const CeedScalar *assembled_array, *q_weight_array;
2376   CeedVector        q_weight;
2377   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2378   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2379   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2380   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2381   CeedCall(CeedCalloc(num_elem, &elem_avg));
2382   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2383   for (CeedInt e = 0; e < num_elem; e++) {
2384     CeedInt count = 0;
2385     for (CeedInt q = 0; q < num_qpts; q++) {
2386       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2387         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2388           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2389           count++;
2390         }
2391       }
2392     }
2393     if (count) {
2394       elem_avg[e] /= count;
2395     } else {
2396       elem_avg[e] = 1.0;
2397     }
2398   }
2399   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2400   CeedCall(CeedVectorDestroy(&assembled));
2401   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2402   CeedCall(CeedVectorDestroy(&q_weight));
2403 
2404   // Build FDM diagonal
2405   CeedVector  q_data;
2406   CeedScalar *q_data_array, *fdm_diagonal;
2407   CeedCall(CeedCalloc(num_comp * num_nodes, &fdm_diagonal));
2408   const CeedScalar fdm_diagonal_bound = num_nodes * CEED_EPSILON;
2409   for (CeedInt c = 0; c < num_comp; c++) {
2410     for (CeedInt n = 0; n < num_nodes; n++) {
2411       if (interp) fdm_diagonal[c * num_nodes + n] = 1.0;
2412       if (grad) {
2413         for (CeedInt d = 0; d < dim; d++) {
2414           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2415           fdm_diagonal[c * num_nodes + n] += lambda[i];
2416         }
2417       }
2418       if (fabs(fdm_diagonal[c * num_nodes + n]) < fdm_diagonal_bound) fdm_diagonal[c * num_nodes + n] = fdm_diagonal_bound;
2419     }
2420   }
2421   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * num_nodes, &q_data));
2422   CeedCall(CeedVectorSetValue(q_data, 0.0));
2423   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2424   for (CeedInt e = 0; e < num_elem; e++) {
2425     for (CeedInt c = 0; c < num_comp; c++) {
2426       for (CeedInt n = 0; n < num_nodes; n++) q_data_array[(e * num_comp + c) * num_nodes + n] = 1. / (elem_avg[e] * fdm_diagonal[c * num_nodes + n]);
2427     }
2428   }
2429   CeedCall(CeedFree(&elem_avg));
2430   CeedCall(CeedFree(&fdm_diagonal));
2431   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2432 
2433   // Setup FDM operator
2434   // -- Basis
2435   CeedBasis   fdm_basis;
2436   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2437   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2438   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2439   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2440   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2441   CeedCall(CeedFree(&fdm_interp));
2442   CeedCall(CeedFree(&grad_dummy));
2443   CeedCall(CeedFree(&q_ref_dummy));
2444   CeedCall(CeedFree(&q_weight_dummy));
2445   CeedCall(CeedFree(&lambda));
2446 
2447   // -- Restriction
2448   CeedElemRestriction rstr_qd_i;
2449   CeedInt             strides[3] = {1, num_nodes, num_nodes * num_comp};
2450   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, num_nodes, num_comp, num_elem * num_comp * num_nodes, strides, &rstr_qd_i));
2451   // -- QFunction
2452   CeedQFunction qf_fdm;
2453   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2454   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2455   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2456   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2457   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2458   // -- QFunction context
2459   CeedInt *num_comp_data;
2460   CeedCall(CeedCalloc(1, &num_comp_data));
2461   num_comp_data[0] = num_comp;
2462   CeedQFunctionContext ctx_fdm;
2463   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2464   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2465   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2466   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2467   // -- Operator
2468   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2469   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2470   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2471   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2472 
2473   // Cleanup
2474   CeedCall(CeedVectorDestroy(&q_data));
2475   CeedCall(CeedBasisDestroy(&fdm_basis));
2476   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2477   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2478 
2479   return CEED_ERROR_SUCCESS;
2480 }
2481 
2482 /// @}
2483