xref: /libCEED/interface/ceed-preconditioning.c (revision 2eb0be0b68ebb7cb25cc038250732c3239325ad2)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed/backend.h>
11 #include <ceed/ceed.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   Ceed ceed_fallback;
84 
85   // Check not already created
86   if (op->op_fallback) return CEED_ERROR_SUCCESS;
87 
88   // Fallback Ceed
89   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
90   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
91 
92   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
93   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
94 
95   // Clone Op
96   CeedOperator op_fallback;
97   if (op->is_composite) {
98     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
99     for (CeedInt i = 0; i < op->num_suboperators; i++) {
100       CeedOperator op_sub_fallback;
101 
102       CeedCall(CeedOperatorGetFallback(op->sub_operators[i], &op_sub_fallback));
103       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
104     }
105   } else {
106     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
107     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
108     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
109     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
110     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
111     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
112       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_restr, op->input_fields[i]->basis,
113                                     op->input_fields[i]->vec));
114     }
115     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
116       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_restr, op->output_fields[i]->basis,
117                                     op->output_fields[i]->vec));
118     }
119     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
120     if (op_fallback->num_qpts == 0) {
121       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
122     }
123     // Cleanup
124     CeedCall(CeedQFunctionDestroy(&qf_fallback));
125     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
126     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
127   }
128   CeedCall(CeedOperatorSetName(op_fallback, op->name));
129   CeedCall(CeedOperatorCheckReady(op_fallback));
130   op->op_fallback = op_fallback;
131 
132   return CEED_ERROR_SUCCESS;
133 }
134 
135 /**
136   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
137 
138   @param[in]  op          CeedOperator to retrieve fallback for
139   @param[out] op_fallback Fallback CeedOperator
140 
141   @return An error code: 0 - success, otherwise - failure
142 
143   @ref Developer
144 **/
145 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
146   // Create if needed
147   if (!op->op_fallback) {
148     CeedCall(CeedOperatorCreateFallback(op));
149   }
150   if (op->op_fallback) {
151     bool is_debug;
152 
153     CeedCall(CeedIsDebug(op->ceed, &is_debug));
154     if (is_debug) {
155       Ceed        ceed_fallback;
156       const char *resource, *resource_fallback;
157 
158       CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
159       CeedCall(CeedGetResource(op->ceed, &resource));
160       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
161 
162       CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
163       CeedDebug(op->ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
164                 op->op_fallback);
165     }
166   }
167   *op_fallback = op->op_fallback;
168 
169   return CEED_ERROR_SUCCESS;
170 }
171 
172 /**
173   @brief Select correct basis matrix pointer based on CeedEvalMode
174 
175   @param[in]  eval_mode Current basis evaluation mode
176   @param[in]  identity  Pointer to identity matrix
177   @param[in]  interp    Pointer to interpolation matrix
178   @param[in]  grad      Pointer to gradient matrix
179   @param[out] basis_ptr Basis pointer to set
180 
181   @ref Developer
182 **/
183 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad,
184                                                const CeedScalar **basis_ptr) {
185   switch (eval_mode) {
186     case CEED_EVAL_NONE:
187       *basis_ptr = identity;
188       break;
189     case CEED_EVAL_INTERP:
190       *basis_ptr = interp;
191       break;
192     case CEED_EVAL_GRAD:
193       *basis_ptr = grad;
194       break;
195     case CEED_EVAL_WEIGHT:
196     case CEED_EVAL_DIV:
197     case CEED_EVAL_CURL:
198       break;  // Caught by QF Assembly
199   }
200   assert(*basis_ptr != NULL);
201 }
202 
203 /**
204   @brief Create point block restriction for active operator field
205 
206   @param[in]  rstr            Original CeedElemRestriction for active field
207   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
208 
209   @return An error code: 0 - success, otherwise - failure
210 
211   @ref Developer
212 **/
213 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
214   Ceed ceed;
215   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
216   const CeedInt *offsets;
217   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
218 
219   // Expand offsets
220   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
221   CeedSize l_size;
222   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
223   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
224   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
225   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
226   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
227   CeedInt shift = num_comp;
228   if (comp_stride != 1) shift *= num_comp;
229   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
230   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
231     pointblock_offsets[i] = offsets[i] * shift;
232   }
233 
234   // Create new restriction
235   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
236                                      pointblock_offsets, pointblock_rstr));
237 
238   // Cleanup
239   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
240 
241   return CEED_ERROR_SUCCESS;
242 }
243 
244 /**
245   @brief Core logic for assembling operator diagonal or point block diagonal
246 
247   @param[in]  op            CeedOperator to assemble point block diagonal
248   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
249   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
250   @param[out] assembled     CeedVector to store assembled diagonal
251 
252   @return An error code: 0 - success, otherwise - failure
253 
254   @ref Developer
255 **/
256 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
257   Ceed ceed;
258   CeedCall(CeedOperatorGetCeed(op, &ceed));
259 
260   // Assemble QFunction
261   CeedQFunction qf;
262   CeedCall(CeedOperatorGetQFunction(op, &qf));
263   CeedInt num_input_fields, num_output_fields;
264   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
265   CeedVector          assembled_qf;
266   CeedElemRestriction rstr;
267   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr, request));
268   CeedInt layout[3];
269   CeedCall(CeedElemRestrictionGetELayout(rstr, &layout));
270   CeedCall(CeedElemRestrictionDestroy(&rstr));
271 
272   // Get assembly data
273   CeedOperatorAssemblyData data;
274   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
275   const CeedEvalMode *eval_mode_in, *eval_mode_out;
276   CeedInt             num_eval_mode_in, num_eval_mode_out;
277   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
278   CeedBasis basis_in, basis_out;
279   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
280   CeedInt num_comp;
281   CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
282 
283   // Assemble point block diagonal restriction, if needed
284   CeedElemRestriction diag_rstr;
285   CeedCall(CeedOperatorGetActiveElemRestriction(op, &diag_rstr));
286   if (is_pointblock) {
287     CeedElemRestriction point_block_rstr;
288     CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_rstr, &point_block_rstr));
289     diag_rstr = point_block_rstr;
290   }
291 
292   // Create diagonal vector
293   CeedVector elem_diag;
294   CeedCall(CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag));
295 
296   // Assemble element operator diagonals
297   CeedScalar       *elem_diag_array;
298   const CeedScalar *assembled_qf_array;
299   CeedCall(CeedVectorSetValue(elem_diag, 0.0));
300   CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
301   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
302   CeedInt num_elem, num_nodes, num_qpts;
303   CeedCall(CeedElemRestrictionGetNumElements(diag_rstr, &num_elem));
304   CeedCall(CeedBasisGetNumNodes(basis_in, &num_nodes));
305   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
306 
307   // Basis matrices
308   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
309   CeedScalar       *identity      = NULL;
310   bool              has_eval_none = false;
311   for (CeedInt i = 0; i < num_eval_mode_in; i++) {
312     has_eval_none = has_eval_none || (eval_mode_in[i] == CEED_EVAL_NONE);
313   }
314   for (CeedInt i = 0; i < num_eval_mode_out; i++) {
315     has_eval_none = has_eval_none || (eval_mode_out[i] == CEED_EVAL_NONE);
316   }
317   if (has_eval_none) {
318     CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
319     for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
320   }
321   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
322   CeedCall(CeedBasisGetInterp(basis_out, &interp_out));
323   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
324   CeedCall(CeedBasisGetGrad(basis_out, &grad_out));
325   // Compute the diagonal of B^T D B
326   // Each element
327   for (CeedInt e = 0; e < num_elem; e++) {
328     CeedInt d_out = -1;
329     // Each basis eval mode pair
330     for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
331       const CeedScalar *bt = NULL;
332       if (eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out += 1;
333       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * num_nodes], &bt);
334       CeedInt d_in = -1;
335       for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
336         const CeedScalar *b = NULL;
337         if (eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in += 1;
338         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * num_nodes], &b);
339         // Each component
340         for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
341           // Each qpoint/node pair
342           for (CeedInt q = 0; q < num_qpts; q++) {
343             if (is_pointblock) {
344               // Point Block Diagonal
345               for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
346                 const CeedScalar qf_value =
347                     assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_in) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
348                                        e * layout[2]];
349                 for (CeedInt n = 0; n < num_nodes; n++) {
350                   elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
351                       bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
352                 }
353               }
354             } else {
355               // Diagonal Only
356               const CeedScalar qf_value =
357                   assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_out) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
358                                      e * layout[2]];
359               for (CeedInt n = 0; n < num_nodes; n++) {
360                 elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
361               }
362             }
363           }
364         }
365       }
366     }
367   }
368   CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
369   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
370 
371   // Assemble local operator diagonal
372   CeedCall(CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
373 
374   // Cleanup
375   if (is_pointblock) {
376     CeedCall(CeedElemRestrictionDestroy(&diag_rstr));
377   }
378   CeedCall(CeedVectorDestroy(&assembled_qf));
379   CeedCall(CeedVectorDestroy(&elem_diag));
380   CeedCall(CeedFree(&identity));
381 
382   return CEED_ERROR_SUCCESS;
383 }
384 
385 /**
386   @brief Core logic for assembling composite operator diagonal
387 
388   @param[in]  op            CeedOperator to assemble point block diagonal
389   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
390   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
391   @param[out] assembled     CeedVector to store assembled diagonal
392 
393   @return An error code: 0 - success, otherwise - failure
394 
395   @ref Developer
396 **/
397 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
398                                                                  CeedVector assembled) {
399   CeedInt       num_sub;
400   CeedOperator *suboperators;
401   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
402   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
403   for (CeedInt i = 0; i < num_sub; i++) {
404     if (is_pointblock) {
405       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
406     } else {
407       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
408     }
409   }
410   return CEED_ERROR_SUCCESS;
411 }
412 
413 /**
414   @brief Build nonzero pattern for non-composite operator
415 
416   Users should generally use CeedOperatorLinearAssembleSymbolic()
417 
418   @param[in]  op     CeedOperator to assemble nonzero pattern
419   @param[in]  offset Offset for number of entries
420   @param[out] rows   Row number for each entry
421   @param[out] cols   Column number for each entry
422 
423   @return An error code: 0 - success, otherwise - failure
424 
425   @ref Developer
426 **/
427 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
428   Ceed ceed = op->ceed;
429   if (op->is_composite) {
430     // LCOV_EXCL_START
431     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
432     // LCOV_EXCL_STOP
433   }
434 
435   CeedSize num_nodes;
436   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
437   CeedElemRestriction rstr_in;
438   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
439   CeedInt num_elem, elem_size, num_comp;
440   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
441   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
442   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
443   CeedInt layout_er[3];
444   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
445 
446   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
447 
448   // Determine elem_dof relation
449   CeedVector index_vec;
450   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
451   CeedScalar *array;
452   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
453   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
454   CeedCall(CeedVectorRestoreArray(index_vec, &array));
455   CeedVector elem_dof;
456   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
457   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
458   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
459   const CeedScalar *elem_dof_a;
460   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
461   CeedCall(CeedVectorDestroy(&index_vec));
462 
463   // Determine i, j locations for element matrices
464   CeedInt count = 0;
465   for (CeedInt e = 0; e < num_elem; e++) {
466     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
467       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
468         for (CeedInt i = 0; i < elem_size; i++) {
469           for (CeedInt j = 0; j < elem_size; j++) {
470             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
471             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
472 
473             const CeedInt row = elem_dof_a[elem_dof_index_row];
474             const CeedInt col = elem_dof_a[elem_dof_index_col];
475 
476             rows[offset + count] = row;
477             cols[offset + count] = col;
478             count++;
479           }
480         }
481       }
482     }
483   }
484   if (count != local_num_entries) {
485     // LCOV_EXCL_START
486     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
487     // LCOV_EXCL_STOP
488   }
489   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
490   CeedCall(CeedVectorDestroy(&elem_dof));
491 
492   return CEED_ERROR_SUCCESS;
493 }
494 
495 /**
496   @brief Assemble nonzero entries for non-composite operator
497 
498   Users should generally use CeedOperatorLinearAssemble()
499 
500   @param[in]  op     CeedOperator to assemble
501   @param[in]  offset Offset for number of entries
502   @param[out] values Values to assemble into matrix
503 
504   @return An error code: 0 - success, otherwise - failure
505 
506   @ref Developer
507 **/
508 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
509   Ceed ceed = op->ceed;
510   if (op->is_composite) {
511     // LCOV_EXCL_START
512     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
513     // LCOV_EXCL_STOP
514   }
515   if (op->num_elem == 0) return CEED_ERROR_SUCCESS;
516 
517   if (op->LinearAssembleSingle) {
518     // Backend version
519     CeedCall(op->LinearAssembleSingle(op, offset, values));
520     return CEED_ERROR_SUCCESS;
521   } else {
522     // Operator fallback
523     CeedOperator op_fallback;
524 
525     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
526     if (op_fallback) {
527       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
528       return CEED_ERROR_SUCCESS;
529     }
530   }
531 
532   // Assemble QFunction
533   CeedQFunction qf;
534   CeedCall(CeedOperatorGetQFunction(op, &qf));
535   CeedVector          assembled_qf;
536   CeedElemRestriction rstr_q;
537   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
538   CeedSize qf_length;
539   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
540 
541   CeedInt            num_input_fields, num_output_fields;
542   CeedOperatorField *input_fields;
543   CeedOperatorField *output_fields;
544   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
545 
546   // Get assembly data
547   CeedOperatorAssemblyData data;
548   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
549   const CeedEvalMode *eval_mode_in, *eval_mode_out;
550   CeedInt             num_eval_mode_in, num_eval_mode_out;
551   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
552   CeedBasis basis_in, basis_out;
553   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
554 
555   if (num_eval_mode_in == 0 || num_eval_mode_out == 0) {
556     // LCOV_EXCL_START
557     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
558     // LCOV_EXCL_STOP
559   }
560 
561   CeedElemRestriction active_rstr;
562   CeedInt             num_elem, elem_size, num_qpts, num_comp;
563   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
564   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
565   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
566   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
567   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
568 
569   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
570 
571   // loop over elements and put in data structure
572   const CeedScalar *interp_in, *grad_in;
573   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
574   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
575 
576   const CeedScalar *assembled_qf_array;
577   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
578 
579   CeedInt layout_qf[3];
580   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
581   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
582 
583   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
584   const CeedScalar *B_mat_in, *B_mat_out;
585   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &B_mat_in, NULL, &B_mat_out));
586   CeedScalar  BTD_mat[elem_size * num_qpts * num_eval_mode_in];
587   CeedScalar  elem_mat[elem_size * elem_size];
588   CeedInt     count = 0;
589   CeedScalar *vals;
590   CeedCall(CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals));
591   for (CeedInt e = 0; e < num_elem; e++) {
592     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
593       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
594         // Compute B^T*D
595         for (CeedInt n = 0; n < elem_size; n++) {
596           for (CeedInt q = 0; q < num_qpts; q++) {
597             for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
598               const CeedInt btd_index = n * (num_qpts * num_eval_mode_in) + (num_eval_mode_in * q + e_in);
599               CeedScalar    sum       = 0.0;
600               for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
601                 const CeedInt b_out_index     = (num_eval_mode_out * q + e_out) * elem_size + n;
602                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_mode_out + e_out) * num_comp + comp_out;
603                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
604                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
605               }
606               BTD_mat[btd_index] = sum;
607             }
608           }
609         }
610         // form element matrix itself (for each block component)
611         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_mode_in));
612 
613         // put element matrix in coordinate data structure
614         for (CeedInt i = 0; i < elem_size; i++) {
615           for (CeedInt j = 0; j < elem_size; j++) {
616             vals[offset + count] = elem_mat[i * elem_size + j];
617             count++;
618           }
619         }
620       }
621     }
622   }
623   if (count != local_num_entries) {
624     // LCOV_EXCL_START
625     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
626     // LCOV_EXCL_STOP
627   }
628   CeedCall(CeedVectorRestoreArray(values, &vals));
629 
630   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
631   CeedCall(CeedVectorDestroy(&assembled_qf));
632 
633   return CEED_ERROR_SUCCESS;
634 }
635 
636 /**
637   @brief Count number of entries for assembled CeedOperator
638 
639   @param[in]  op          CeedOperator to assemble
640   @param[out] num_entries Number of entries in assembled representation
641 
642   @return An error code: 0 - success, otherwise - failure
643 
644   @ref Utility
645 **/
646 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
647   CeedElemRestriction rstr;
648   CeedInt             num_elem, elem_size, num_comp;
649 
650   if (op->is_composite) {
651     // LCOV_EXCL_START
652     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
653     // LCOV_EXCL_STOP
654   }
655   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
656   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
657   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
658   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
659   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
660 
661   return CEED_ERROR_SUCCESS;
662 }
663 
664 /**
665   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
666 
667   @param[in]  op_fine      Fine grid operator
668   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
669   @param[in]  rstr_coarse  Coarse grid restriction
670   @param[in]  basis_coarse Coarse grid active vector basis
671   @param[in]  basis_c_to_f Basis for coarse to fine interpolation
672   @param[out] op_coarse    Coarse grid operator
673   @param[out] op_prolong   Coarse to fine operator
674   @param[out] op_restrict  Fine to coarse operator
675 
676   @return An error code: 0 - success, otherwise - failure
677 
678   @ref Developer
679 **/
680 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
681                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
682   Ceed ceed;
683   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
684 
685   // Check for composite operator
686   bool is_composite;
687   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
688   if (is_composite) {
689     // LCOV_EXCL_START
690     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
691     // LCOV_EXCL_STOP
692   }
693 
694   // Coarse Grid
695   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
696   CeedElemRestriction rstr_fine = NULL;
697   // -- Clone input fields
698   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
699     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
700       rstr_fine = op_fine->input_fields[i]->elem_restr;
701       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
702     } else {
703       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_restr,
704                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
705     }
706   }
707   // -- Clone output fields
708   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
709     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
710       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
711     } else {
712       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_restr,
713                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
714     }
715   }
716   // -- Clone QFunctionAssemblyData
717   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
718 
719   // Multiplicity vector
720   CeedVector mult_vec, mult_e_vec;
721   CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
722   CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
723   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
724   CeedCall(CeedVectorSetValue(mult_vec, 0.0));
725   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
726   CeedCall(CeedVectorDestroy(&mult_e_vec));
727   CeedCall(CeedVectorReciprocal(mult_vec));
728 
729   // Restriction
730   CeedInt num_comp;
731   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
732   CeedQFunction qf_restrict;
733   CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
734   CeedInt *num_comp_r_data;
735   CeedCall(CeedCalloc(1, &num_comp_r_data));
736   num_comp_r_data[0] = num_comp;
737   CeedQFunctionContext ctx_r;
738   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
739   CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
740   CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
741   CeedCall(CeedQFunctionContextDestroy(&ctx_r));
742   CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
743   CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
744   CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
745   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
746 
747   CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
748   CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
749   CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
750   CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
751 
752   // Prolongation
753   CeedQFunction qf_prolong;
754   CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
755   CeedInt *num_comp_p_data;
756   CeedCall(CeedCalloc(1, &num_comp_p_data));
757   num_comp_p_data[0] = num_comp;
758   CeedQFunctionContext ctx_p;
759   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
760   CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
761   CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
762   CeedCall(CeedQFunctionContextDestroy(&ctx_p));
763   CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
764   CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
765   CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
766   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
767 
768   CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
769   CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
770   CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
771   CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
772 
773   // Clone name
774   bool   has_name = op_fine->name;
775   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
776   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
777   {
778     char *prolongation_name;
779     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
780     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
781     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
782     CeedCall(CeedFree(&prolongation_name));
783   }
784   {
785     char *restriction_name;
786     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
787     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
788     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
789     CeedCall(CeedFree(&restriction_name));
790   }
791 
792   // Check
793   CeedCall(CeedOperatorCheckReady(*op_coarse));
794   CeedCall(CeedOperatorCheckReady(*op_prolong));
795   CeedCall(CeedOperatorCheckReady(*op_restrict));
796 
797   // Cleanup
798   CeedCall(CeedVectorDestroy(&mult_vec));
799   CeedCall(CeedBasisDestroy(&basis_c_to_f));
800   CeedCall(CeedQFunctionDestroy(&qf_restrict));
801   CeedCall(CeedQFunctionDestroy(&qf_prolong));
802 
803   return CEED_ERROR_SUCCESS;
804 }
805 
806 /**
807   @brief Build 1D mass matrix and Laplacian with perturbation
808 
809   @param[in]  interp_1d   Interpolation matrix in one dimension
810   @param[in]  grad_1d     Gradient matrix in one dimension
811   @param[in]  q_weight_1d Quadrature weights in one dimension
812   @param[in]  P_1d        Number of basis nodes in one dimension
813   @param[in]  Q_1d        Number of quadrature points in one dimension
814   @param[in]  dim         Dimension of basis
815   @param[out] mass        Assembled mass matrix in one dimension
816   @param[out] laplace     Assembled perturbed Laplacian in one dimension
817 
818   @return An error code: 0 - success, otherwise - failure
819 
820   @ref Developer
821 **/
822 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d,
823                                                       CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
824   for (CeedInt i = 0; i < P_1d; i++) {
825     for (CeedInt j = 0; j < P_1d; j++) {
826       CeedScalar sum = 0.0;
827       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
828       mass[i + j * P_1d] = sum;
829     }
830   }
831   // -- Laplacian
832   for (CeedInt i = 0; i < P_1d; i++) {
833     for (CeedInt j = 0; j < P_1d; j++) {
834       CeedScalar sum = 0.0;
835       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
836       laplace[i + j * P_1d] = sum;
837     }
838   }
839   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
840   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
841   return CEED_ERROR_SUCCESS;
842 }
843 CeedPragmaOptimizeOn;
844 
845 /// @}
846 
847 /// ----------------------------------------------------------------------------
848 /// CeedOperator Backend API
849 /// ----------------------------------------------------------------------------
850 /// @addtogroup CeedOperatorBackend
851 /// @{
852 
853 /**
854   @brief Create object holding CeedQFunction assembly data for CeedOperator
855 
856   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
857   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
858 
859   @return An error code: 0 - success, otherwise - failure
860 
861   @ref Backend
862 **/
863 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
864   CeedCall(CeedCalloc(1, data));
865   (*data)->ref_count = 1;
866   (*data)->ceed      = ceed;
867   CeedCall(CeedReference(ceed));
868 
869   return CEED_ERROR_SUCCESS;
870 }
871 
872 /**
873   @brief Increment the reference counter for a CeedQFunctionAssemblyData
874 
875   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
876 
877   @return An error code: 0 - success, otherwise - failure
878 
879   @ref Backend
880 **/
881 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
882   data->ref_count++;
883   return CEED_ERROR_SUCCESS;
884 }
885 
886 /**
887   @brief Set re-use of CeedQFunctionAssemblyData
888 
889   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
890   @param[in]     reuse_data Boolean flag indicating data re-use
891 
892   @return An error code: 0 - success, otherwise - failure
893 
894   @ref Backend
895 **/
896 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
897   data->reuse_data        = reuse_data;
898   data->needs_data_update = true;
899   return CEED_ERROR_SUCCESS;
900 }
901 
902 /**
903   @brief Mark QFunctionAssemblyData as stale
904 
905   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
906   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
907 
908   @return An error code: 0 - success, otherwise - failure
909 
910   @ref Backend
911 **/
912 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
913   data->needs_data_update = needs_data_update;
914   return CEED_ERROR_SUCCESS;
915 }
916 
917 /**
918   @brief Determine if QFunctionAssemblyData needs update
919 
920   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
921   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
922 
923   @return An error code: 0 - success, otherwise - failure
924 
925   @ref Backend
926 **/
927 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
928   *is_update_needed = !data->reuse_data || data->needs_data_update;
929   return CEED_ERROR_SUCCESS;
930 }
931 
932 /**
933   @brief Copy the pointer to a CeedQFunctionAssemblyData.
934            Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
935            Note: If `*data_copy` is non-NULL, then it is assumed that `*data_copy` is a pointer to a CeedQFunctionAssemblyData.
936              This CeedQFunctionAssemblyData will be destroyed if `*data_copy` is the only reference to this CeedQFunctionAssemblyData.
937 
938   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
939   @param[in,out] data_copy Variable to store copied reference
940 
941   @return An error code: 0 - success, otherwise - failure
942 
943   @ref Backend
944 **/
945 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
946   CeedCall(CeedQFunctionAssemblyDataReference(data));
947   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
948   *data_copy = data;
949   return CEED_ERROR_SUCCESS;
950 }
951 
952 /**
953   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
954 
955   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
956   @param[out] is_setup Boolean flag for setup status
957 
958   @return An error code: 0 - success, otherwise - failure
959 
960   @ref Backend
961 **/
962 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
963   *is_setup = data->is_setup;
964   return CEED_ERROR_SUCCESS;
965 }
966 
967 /**
968   @brief Set internal objects for CeedQFunctionAssemblyData
969 
970   @param[in,out] data CeedQFunctionAssemblyData to set objects
971   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
972   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
973 
974   @return An error code: 0 - success, otherwise - failure
975 
976   @ref Backend
977 **/
978 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
979   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
980   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
981 
982   data->is_setup = true;
983   return CEED_ERROR_SUCCESS;
984 }
985 
986 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
987   if (!data->is_setup) {
988     // LCOV_EXCL_START
989     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
990     // LCOV_EXCL_STOP
991   }
992 
993   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
994   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
995 
996   return CEED_ERROR_SUCCESS;
997 }
998 
999 /**
1000   @brief Destroy CeedQFunctionAssemblyData
1001 
1002   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1003 
1004   @return An error code: 0 - success, otherwise - failure
1005 
1006   @ref Backend
1007 **/
1008 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1009   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1010 
1011   CeedCall(CeedDestroy(&(*data)->ceed));
1012   CeedCall(CeedVectorDestroy(&(*data)->vec));
1013   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1014 
1015   CeedCall(CeedFree(data));
1016   return CEED_ERROR_SUCCESS;
1017 }
1018 
1019 /**
1020   @brief Get CeedOperatorAssemblyData
1021 
1022   @param[in]  op   CeedOperator to assemble
1023   @param[out] data CeedQFunctionAssemblyData
1024 
1025   @return An error code: 0 - success, otherwise - failure
1026 
1027   @ref Backend
1028 **/
1029 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1030   if (!op->op_assembled) {
1031     CeedOperatorAssemblyData data;
1032 
1033     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1034     op->op_assembled = data;
1035   }
1036   *data = op->op_assembled;
1037 
1038   return CEED_ERROR_SUCCESS;
1039 }
1040 
1041 /**
1042   @brief Create object holding CeedOperator assembly data
1043 
1044   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1045   @param[in]  op   CeedOperator to be assembled
1046   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1047 
1048   @return An error code: 0 - success, otherwise - failure
1049 
1050   @ref Backend
1051 **/
1052 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1053   CeedCall(CeedCalloc(1, data));
1054   (*data)->ceed = ceed;
1055   CeedCall(CeedReference(ceed));
1056 
1057   // Build OperatorAssembly data
1058   CeedQFunction       qf;
1059   CeedQFunctionField *qf_fields;
1060   CeedOperatorField  *op_fields;
1061   CeedInt             num_input_fields;
1062   CeedCall(CeedOperatorGetQFunction(op, &qf));
1063   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1064   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1065 
1066   // Determine active input basis
1067   CeedInt       num_eval_mode_in = 0, dim = 1;
1068   CeedEvalMode *eval_mode_in = NULL;
1069   CeedBasis     basis_in     = NULL;
1070   for (CeedInt i = 0; i < num_input_fields; i++) {
1071     CeedVector vec;
1072     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1073     if (vec == CEED_VECTOR_ACTIVE) {
1074       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1075       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1076       CeedEvalMode eval_mode;
1077       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1078       switch (eval_mode) {
1079         case CEED_EVAL_NONE:
1080         case CEED_EVAL_INTERP:
1081           CeedCall(CeedRealloc(num_eval_mode_in + 1, &eval_mode_in));
1082           eval_mode_in[num_eval_mode_in] = eval_mode;
1083           num_eval_mode_in += 1;
1084           break;
1085         case CEED_EVAL_GRAD:
1086           CeedCall(CeedRealloc(num_eval_mode_in + dim, &eval_mode_in));
1087           for (CeedInt d = 0; d < dim; d++) {
1088             eval_mode_in[num_eval_mode_in + d] = eval_mode;
1089           }
1090           num_eval_mode_in += dim;
1091           break;
1092         case CEED_EVAL_WEIGHT:
1093         case CEED_EVAL_DIV:
1094         case CEED_EVAL_CURL:
1095           break;  // Caught by QF Assembly
1096       }
1097     }
1098   }
1099   (*data)->num_eval_mode_in = num_eval_mode_in;
1100   (*data)->eval_mode_in     = eval_mode_in;
1101   CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->basis_in));
1102 
1103   // Determine active output basis
1104   CeedInt num_output_fields;
1105   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1106   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1107   CeedInt       num_eval_mode_out = 0;
1108   CeedEvalMode *eval_mode_out     = NULL;
1109   CeedBasis     basis_out         = NULL;
1110   for (CeedInt i = 0; i < num_output_fields; i++) {
1111     CeedVector vec;
1112     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1113     if (vec == CEED_VECTOR_ACTIVE) {
1114       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1115       CeedEvalMode eval_mode;
1116       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1117       switch (eval_mode) {
1118         case CEED_EVAL_NONE:
1119         case CEED_EVAL_INTERP:
1120           CeedCall(CeedRealloc(num_eval_mode_out + 1, &eval_mode_out));
1121           eval_mode_out[num_eval_mode_out] = eval_mode;
1122           num_eval_mode_out += 1;
1123           break;
1124         case CEED_EVAL_GRAD:
1125           CeedCall(CeedRealloc(num_eval_mode_out + dim, &eval_mode_out));
1126           for (CeedInt d = 0; d < dim; d++) {
1127             eval_mode_out[num_eval_mode_out + d] = eval_mode;
1128           }
1129           num_eval_mode_out += dim;
1130           break;
1131         case CEED_EVAL_WEIGHT:
1132         case CEED_EVAL_DIV:
1133         case CEED_EVAL_CURL:
1134           break;  // Caught by QF Assembly
1135       }
1136     }
1137   }
1138   (*data)->num_eval_mode_out = num_eval_mode_out;
1139   (*data)->eval_mode_out     = eval_mode_out;
1140   CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->basis_out));
1141 
1142   return CEED_ERROR_SUCCESS;
1143 }
1144 
1145 /**
1146   @brief Get CeedOperator CeedEvalModes for assembly
1147 
1148   @param[in]  data              CeedOperatorAssemblyData
1149   @param[out] num_eval_mode_in  Pointer to hold number of input CeedEvalModes, or NULL
1150   @param[out] eval_mode_in      Pointer to hold input CeedEvalModes, or NULL
1151   @param[out] num_eval_mode_out Pointer to hold number of output CeedEvalModes, or NULL
1152   @param[out] eval_mode_out     Pointer to hold output CeedEvalModes, or NULL
1153 
1154   @return An error code: 0 - success, otherwise - failure
1155 
1156   @ref Backend
1157 **/
1158 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_eval_mode_in, const CeedEvalMode **eval_mode_in,
1159                                          CeedInt *num_eval_mode_out, const CeedEvalMode **eval_mode_out) {
1160   if (num_eval_mode_in) *num_eval_mode_in = data->num_eval_mode_in;
1161   if (eval_mode_in) *eval_mode_in = data->eval_mode_in;
1162   if (num_eval_mode_out) *num_eval_mode_out = data->num_eval_mode_out;
1163   if (eval_mode_out) *eval_mode_out = data->eval_mode_out;
1164 
1165   return CEED_ERROR_SUCCESS;
1166 }
1167 
1168 /**
1169   @brief Get CeedOperator CeedBasis data for assembly
1170 
1171   @param[in]  data      CeedOperatorAssemblyData
1172   @param[out] basis_in  Pointer to hold active input CeedBasis, or NULL
1173   @param[out] B_in      Pointer to hold assembled active input B, or NULL
1174   @param[out] basis_out Pointer to hold active output CeedBasis, or NULL
1175   @param[out] B_out     Pointer to hold assembled active output B, or NULL
1176 
1177   @return An error code: 0 - success, otherwise - failure
1178 
1179   @ref Backend
1180 **/
1181 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedBasis *basis_in, const CeedScalar **B_in, CeedBasis *basis_out,
1182                                      const CeedScalar **B_out) {
1183   // Assemble B_in, B_out if needed
1184   if (B_in && !data->B_in) {
1185     CeedInt           num_qpts, elem_size;
1186     CeedScalar       *B_in, *identity = NULL;
1187     const CeedScalar *interp_in, *grad_in;
1188     bool              has_eval_none = false;
1189 
1190     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_in, &num_qpts));
1191     CeedCall(CeedBasisGetNumNodes(data->basis_in, &elem_size));
1192     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_in, &B_in));
1193 
1194     for (CeedInt i = 0; i < data->num_eval_mode_in; i++) {
1195       has_eval_none = has_eval_none || (data->eval_mode_in[i] == CEED_EVAL_NONE);
1196     }
1197     if (has_eval_none) {
1198       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1199       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1200         identity[i * elem_size + i] = 1.0;
1201       }
1202     }
1203     CeedCall(CeedBasisGetInterp(data->basis_in, &interp_in));
1204     CeedCall(CeedBasisGetGrad(data->basis_in, &grad_in));
1205 
1206     for (CeedInt q = 0; q < num_qpts; q++) {
1207       for (CeedInt n = 0; n < elem_size; n++) {
1208         CeedInt d_in = -1;
1209         for (CeedInt e_in = 0; e_in < data->num_eval_mode_in; e_in++) {
1210           const CeedInt     qq = data->num_eval_mode_in * q;
1211           const CeedScalar *b  = NULL;
1212 
1213           if (data->eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in++;
1214           CeedOperatorGetBasisPointer(data->eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &b);
1215           B_in[(qq + e_in) * elem_size + n] = b[q * elem_size + n];
1216         }
1217       }
1218     }
1219     data->B_in = B_in;
1220   }
1221 
1222   if (B_out && !data->B_out) {
1223     CeedInt           num_qpts, elem_size;
1224     CeedScalar       *B_out, *identity = NULL;
1225     const CeedScalar *interp_out, *grad_out;
1226     bool              has_eval_none = false;
1227 
1228     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_out, &num_qpts));
1229     CeedCall(CeedBasisGetNumNodes(data->basis_out, &elem_size));
1230     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_out, &B_out));
1231 
1232     for (CeedInt i = 0; i < data->num_eval_mode_out; i++) {
1233       has_eval_none = has_eval_none || (data->eval_mode_out[i] == CEED_EVAL_NONE);
1234     }
1235     if (has_eval_none) {
1236       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1237       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1238         identity[i * elem_size + i] = 1.0;
1239       }
1240     }
1241     CeedCall(CeedBasisGetInterp(data->basis_out, &interp_out));
1242     CeedCall(CeedBasisGetGrad(data->basis_out, &grad_out));
1243 
1244     for (CeedInt q = 0; q < num_qpts; q++) {
1245       for (CeedInt n = 0; n < elem_size; n++) {
1246         CeedInt d_out = -1;
1247         for (CeedInt e_out = 0; e_out < data->num_eval_mode_out; e_out++) {
1248           const CeedInt     qq = data->num_eval_mode_out * q;
1249           const CeedScalar *b  = NULL;
1250 
1251           if (data->eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out++;
1252           CeedOperatorGetBasisPointer(data->eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &b);
1253           B_out[(qq + e_out) * elem_size + n] = b[q * elem_size + n];
1254         }
1255       }
1256     }
1257     data->B_out = B_out;
1258   }
1259 
1260   if (basis_in) *basis_in = data->basis_in;
1261   if (B_in) *B_in = data->B_in;
1262   if (basis_out) *basis_out = data->basis_out;
1263   if (B_out) *B_out = data->B_out;
1264 
1265   return CEED_ERROR_SUCCESS;
1266 }
1267 
1268 /**
1269   @brief Destroy CeedOperatorAssemblyData
1270 
1271   @param[in,out] data CeedOperatorAssemblyData to destroy
1272 
1273   @return An error code: 0 - success, otherwise - failure
1274 
1275   @ref Backend
1276 **/
1277 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1278   if (!*data) return CEED_ERROR_SUCCESS;
1279 
1280   CeedCall(CeedDestroy(&(*data)->ceed));
1281   CeedCall(CeedBasisDestroy(&(*data)->basis_in));
1282   CeedCall(CeedBasisDestroy(&(*data)->basis_out));
1283   CeedCall(CeedFree(&(*data)->eval_mode_in));
1284   CeedCall(CeedFree(&(*data)->eval_mode_out));
1285   CeedCall(CeedFree(&(*data)->B_in));
1286   CeedCall(CeedFree(&(*data)->B_out));
1287 
1288   CeedCall(CeedFree(data));
1289   return CEED_ERROR_SUCCESS;
1290 }
1291 
1292 /// @}
1293 
1294 /// ----------------------------------------------------------------------------
1295 /// CeedOperator Public API
1296 /// ----------------------------------------------------------------------------
1297 /// @addtogroup CeedOperatorUser
1298 /// @{
1299 
1300 /**
1301   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1302 
1303   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1304     The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices
1305 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the
1306 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1307 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1308 and producing the output [dv_0, dv_1, v].
1309 
1310   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1311 
1312   @param[in]  op        CeedOperator to assemble CeedQFunction
1313   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1314   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1315   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1316 
1317   @return An error code: 0 - success, otherwise - failure
1318 
1319   @ref User
1320 **/
1321 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1322   CeedCall(CeedOperatorCheckReady(op));
1323 
1324   if (op->LinearAssembleQFunction) {
1325     // Backend version
1326     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1327   } else {
1328     // Operator fallback
1329     CeedOperator op_fallback;
1330 
1331     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1332     if (op_fallback) {
1333       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1334     } else {
1335       // LCOV_EXCL_START
1336       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1337       // LCOV_EXCL_STOP
1338     }
1339   }
1340   return CEED_ERROR_SUCCESS;
1341 }
1342 
1343 /**
1344   @brief Assemble CeedQFunction and store result internally.
1345            Return copied references of stored data to the caller.
1346            Caller is responsible for ownership and destruction of the copied references.
1347            See also @ref CeedOperatorLinearAssembleQFunction
1348 
1349   @param[in]  op        CeedOperator to assemble CeedQFunction
1350   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1351   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1352   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1353 
1354   @return An error code: 0 - success, otherwise - failure
1355 
1356   @ref User
1357 **/
1358 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1359   CeedCall(CeedOperatorCheckReady(op));
1360 
1361   if (op->LinearAssembleQFunctionUpdate) {
1362     // Backend version
1363     bool                qf_assembled_is_setup;
1364     CeedVector          assembled_vec  = NULL;
1365     CeedElemRestriction assembled_rstr = NULL;
1366 
1367     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1368     if (qf_assembled_is_setup) {
1369       bool update_needed;
1370 
1371       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1372       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1373       if (update_needed) {
1374         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1375       }
1376     } else {
1377       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1378       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1379     }
1380     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1381 
1382     // Copy reference from internally held copy
1383     *assembled = NULL;
1384     *rstr      = NULL;
1385     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1386     CeedCall(CeedVectorDestroy(&assembled_vec));
1387     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1388     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1389   } else {
1390     // Operator fallback
1391     CeedOperator op_fallback;
1392 
1393     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1394     if (op_fallback) {
1395       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1396     } else {
1397       // LCOV_EXCL_START
1398       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1399       // LCOV_EXCL_STOP
1400     }
1401   }
1402 
1403   return CEED_ERROR_SUCCESS;
1404 }
1405 
1406 /**
1407   @brief Assemble the diagonal of a square linear CeedOperator
1408 
1409   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1410 
1411   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1412 
1413   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1414 
1415   @param[in]  op        CeedOperator to assemble CeedQFunction
1416   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1417   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1418 
1419   @return An error code: 0 - success, otherwise - failure
1420 
1421   @ref User
1422 **/
1423 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1424   CeedCall(CeedOperatorCheckReady(op));
1425 
1426   CeedSize input_size = 0, output_size = 0;
1427   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1428   if (input_size != output_size) {
1429     // LCOV_EXCL_START
1430     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1431     // LCOV_EXCL_STOP
1432   }
1433 
1434   if (op->LinearAssembleDiagonal) {
1435     // Backend version
1436     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1437     return CEED_ERROR_SUCCESS;
1438   } else if (op->LinearAssembleAddDiagonal) {
1439     // Backend version with zeroing first
1440     CeedCall(CeedVectorSetValue(assembled, 0.0));
1441     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1442     return CEED_ERROR_SUCCESS;
1443   } else {
1444     // Operator fallback
1445     CeedOperator op_fallback;
1446 
1447     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1448     if (op_fallback) {
1449       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1450       return CEED_ERROR_SUCCESS;
1451     }
1452   }
1453   // Default interface implementation
1454   CeedCall(CeedVectorSetValue(assembled, 0.0));
1455   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1456 
1457   return CEED_ERROR_SUCCESS;
1458 }
1459 
1460 /**
1461   @brief Assemble the diagonal of a square linear CeedOperator
1462 
1463   This sums into a CeedVector the diagonal of a linear CeedOperator.
1464 
1465   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1466 
1467   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1468 
1469   @param[in]  op        CeedOperator to assemble CeedQFunction
1470   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1471   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1472 
1473   @return An error code: 0 - success, otherwise - failure
1474 
1475   @ref User
1476 **/
1477 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1478   CeedCall(CeedOperatorCheckReady(op));
1479 
1480   CeedSize input_size = 0, output_size = 0;
1481   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1482   if (input_size != output_size) {
1483     // LCOV_EXCL_START
1484     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1485     // LCOV_EXCL_STOP
1486   }
1487 
1488   if (op->LinearAssembleAddDiagonal) {
1489     // Backend version
1490     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1491     return CEED_ERROR_SUCCESS;
1492   } else {
1493     // Operator fallback
1494     CeedOperator op_fallback;
1495 
1496     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1497     if (op_fallback) {
1498       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1499       return CEED_ERROR_SUCCESS;
1500     }
1501   }
1502   // Default interface implementation
1503   bool is_composite;
1504   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1505   if (is_composite) {
1506     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1507   } else {
1508     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1509   }
1510 
1511   return CEED_ERROR_SUCCESS;
1512 }
1513 
1514 /**
1515   @brief Assemble the point block diagonal of a square linear CeedOperator
1516 
1517   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1518 
1519   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1520 
1521   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1522 
1523   @param[in]  op        CeedOperator to assemble CeedQFunction
1524   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1525 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1526 component in].
1527   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1528 
1529   @return An error code: 0 - success, otherwise - failure
1530 
1531   @ref User
1532 **/
1533 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1534   CeedCall(CeedOperatorCheckReady(op));
1535 
1536   CeedSize input_size = 0, output_size = 0;
1537   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1538   if (input_size != output_size) {
1539     // LCOV_EXCL_START
1540     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1541     // LCOV_EXCL_STOP
1542   }
1543 
1544   if (op->LinearAssemblePointBlockDiagonal) {
1545     // Backend version
1546     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1547     return CEED_ERROR_SUCCESS;
1548   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1549     // Backend version with zeroing first
1550     CeedCall(CeedVectorSetValue(assembled, 0.0));
1551     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1552     return CEED_ERROR_SUCCESS;
1553   } else {
1554     // Operator fallback
1555     CeedOperator op_fallback;
1556 
1557     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1558     if (op_fallback) {
1559       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1560       return CEED_ERROR_SUCCESS;
1561     }
1562   }
1563   // Default interface implementation
1564   CeedCall(CeedVectorSetValue(assembled, 0.0));
1565   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1566 
1567   return CEED_ERROR_SUCCESS;
1568 }
1569 
1570 /**
1571   @brief Assemble the point block diagonal of a square linear CeedOperator
1572 
1573   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1574 
1575   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1576 
1577   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1578 
1579   @param[in]  op        CeedOperator to assemble CeedQFunction
1580   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1581 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1582 component in].
1583   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1584 
1585   @return An error code: 0 - success, otherwise - failure
1586 
1587   @ref User
1588 **/
1589 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1590   CeedCall(CeedOperatorCheckReady(op));
1591 
1592   CeedSize input_size = 0, output_size = 0;
1593   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1594   if (input_size != output_size) {
1595     // LCOV_EXCL_START
1596     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1597     // LCOV_EXCL_STOP
1598   }
1599 
1600   if (op->LinearAssembleAddPointBlockDiagonal) {
1601     // Backend version
1602     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1603     return CEED_ERROR_SUCCESS;
1604   } else {
1605     // Operator fallback
1606     CeedOperator op_fallback;
1607 
1608     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1609     if (op_fallback) {
1610       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1611       return CEED_ERROR_SUCCESS;
1612     }
1613   }
1614   // Default interface implementation
1615   bool is_composite;
1616   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1617   if (is_composite) {
1618     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1619   } else {
1620     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1621   }
1622 
1623   return CEED_ERROR_SUCCESS;
1624 }
1625 
1626 /**
1627    @brief Fully assemble the nonzero pattern of a linear operator.
1628 
1629    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1630 
1631    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1632 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1633 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1634 
1635    This will generally be slow unless your operator is low-order.
1636 
1637    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1638 
1639    @param[in]  op          CeedOperator to assemble
1640    @param[out] num_entries Number of entries in coordinate nonzero pattern
1641    @param[out] rows        Row number for each entry
1642    @param[out] cols        Column number for each entry
1643 
1644    @ref User
1645 **/
1646 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1647   CeedInt       num_suboperators, single_entries;
1648   CeedOperator *sub_operators;
1649   bool          is_composite;
1650   CeedCall(CeedOperatorCheckReady(op));
1651 
1652   if (op->LinearAssembleSymbolic) {
1653     // Backend version
1654     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1655     return CEED_ERROR_SUCCESS;
1656   } else {
1657     // Operator fallback
1658     CeedOperator op_fallback;
1659 
1660     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1661     if (op_fallback) {
1662       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1663       return CEED_ERROR_SUCCESS;
1664     }
1665   }
1666 
1667   // Default interface implementation
1668 
1669   // count entries and allocate rows, cols arrays
1670   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1671   *num_entries = 0;
1672   if (is_composite) {
1673     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1674     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1675     for (CeedInt k = 0; k < num_suboperators; ++k) {
1676       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1677       *num_entries += single_entries;
1678     }
1679   } else {
1680     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1681     *num_entries += single_entries;
1682   }
1683   CeedCall(CeedCalloc(*num_entries, rows));
1684   CeedCall(CeedCalloc(*num_entries, cols));
1685 
1686   // assemble nonzero locations
1687   CeedInt offset = 0;
1688   if (is_composite) {
1689     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1690     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1691     for (CeedInt k = 0; k < num_suboperators; ++k) {
1692       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1693       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1694       offset += single_entries;
1695     }
1696   } else {
1697     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1698   }
1699 
1700   return CEED_ERROR_SUCCESS;
1701 }
1702 
1703 /**
1704    @brief Fully assemble the nonzero entries of a linear operator.
1705 
1706    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1707 
1708    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1709 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1710 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1711 
1712    This will generally be slow unless your operator is low-order.
1713 
1714    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1715 
1716    @param[in]  op     CeedOperator to assemble
1717    @param[out] values Values to assemble into matrix
1718 
1719    @ref User
1720 **/
1721 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1722   CeedInt       num_suboperators, single_entries = 0;
1723   CeedOperator *sub_operators;
1724   CeedCall(CeedOperatorCheckReady(op));
1725 
1726   if (op->LinearAssemble) {
1727     // Backend version
1728     CeedCall(op->LinearAssemble(op, values));
1729     return CEED_ERROR_SUCCESS;
1730   } else {
1731     // Operator fallback
1732     CeedOperator op_fallback;
1733 
1734     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1735     if (op_fallback) {
1736       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
1737       return CEED_ERROR_SUCCESS;
1738     }
1739   }
1740 
1741   // Default interface implementation
1742   bool is_composite;
1743   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1744 
1745   CeedInt offset = 0;
1746   if (is_composite) {
1747     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1748     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1749     for (CeedInt k = 0; k < num_suboperators; k++) {
1750       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
1751       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1752       offset += single_entries;
1753     }
1754   } else {
1755     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
1756   }
1757 
1758   return CEED_ERROR_SUCCESS;
1759 }
1760 
1761 /**
1762   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
1763 
1764   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1765 
1766   @param[in]  op               Composite CeedOperator
1767   @param[in]  num_skip_indices Number of suboperators to skip
1768   @param[in]  skip_indices     Array of indices of suboperators to skip
1769   @param[out] mult             Vector to store multiplicity (of size l_size)
1770 
1771   @return An error code: 0 - success, otherwise - failure
1772 
1773   @ref User
1774 **/
1775 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
1776   CeedCall(CeedOperatorCheckReady(op));
1777 
1778   Ceed                ceed;
1779   CeedInt             num_sub_ops;
1780   CeedSize            l_vec_len;
1781   CeedScalar         *mult_array;
1782   CeedVector          ones_l_vec;
1783   CeedElemRestriction elem_restr;
1784   CeedOperator       *sub_ops;
1785 
1786   CeedCall(CeedOperatorGetCeed(op, &ceed));
1787 
1788   // Zero mult vector
1789   CeedCall(CeedVectorSetValue(mult, 0.0));
1790 
1791   // Get suboperators
1792   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub_ops));
1793   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_ops));
1794   if (num_sub_ops == 0) return CEED_ERROR_SUCCESS;
1795 
1796   // Work vector
1797   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
1798   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
1799   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
1800   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
1801 
1802   // Compute multiplicity across suboperators
1803   for (CeedInt i = 0; i < num_sub_ops; i++) {
1804     const CeedScalar *sub_mult_array;
1805     CeedVector        sub_mult_l_vec, ones_e_vec;
1806 
1807     // -- Check for suboperator to skip
1808     for (CeedInt j = 0; j < num_skip_indices; j++) {
1809       if (skip_indices[j] == i) continue;
1810     }
1811 
1812     // -- Sub operator multiplicity
1813     CeedCall(CeedOperatorGetActiveElemRestriction(sub_ops[i], &elem_restr));
1814     CeedCall(CeedElemRestrictionCreateVector(elem_restr, &sub_mult_l_vec, &ones_e_vec));
1815     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
1816     CeedCall(CeedElemRestrictionApply(elem_restr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
1817     CeedCall(CeedElemRestrictionApply(elem_restr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
1818     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
1819     // ---- Flag every node present in the current suboperator
1820     for (CeedInt j = 0; j < l_vec_len; j++) {
1821       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
1822     }
1823     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
1824     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
1825     CeedCall(CeedVectorDestroy(&ones_e_vec));
1826   }
1827   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
1828 
1829   return CEED_ERROR_SUCCESS;
1830 }
1831 
1832 /**
1833   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
1834 grid interpolation
1835 
1836   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1837 
1838   @param[in]  op_fine      Fine grid operator
1839   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
1840   @param[in]  rstr_coarse  Coarse grid restriction
1841   @param[in]  basis_coarse Coarse grid active vector basis
1842   @param[out] op_coarse    Coarse grid operator
1843   @param[out] op_prolong   Coarse to fine operator
1844   @param[out] op_restrict  Fine to coarse operator
1845 
1846   @return An error code: 0 - success, otherwise - failure
1847 
1848   @ref User
1849 **/
1850 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1851                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
1852   CeedCall(CeedOperatorCheckReady(op_fine));
1853 
1854   // Build prolongation matrix
1855   CeedBasis basis_fine, basis_c_to_f;
1856   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1857   CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
1858 
1859   // Core code
1860   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1861 
1862   return CEED_ERROR_SUCCESS;
1863 }
1864 
1865 /**
1866   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
1867 
1868   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1869 
1870   @param[in]  op_fine       Fine grid operator
1871   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
1872   @param[in]  rstr_coarse   Coarse grid restriction
1873   @param[in]  basis_coarse  Coarse grid active vector basis
1874   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
1875   @param[out] op_coarse     Coarse grid operator
1876   @param[out] op_prolong    Coarse to fine operator
1877   @param[out] op_restrict   Fine to coarse operator
1878 
1879   @return An error code: 0 - success, otherwise - failure
1880 
1881   @ref User
1882 **/
1883 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1884                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
1885                                              CeedOperator *op_restrict) {
1886   CeedCall(CeedOperatorCheckReady(op_fine));
1887   Ceed ceed;
1888   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1889 
1890   // Check for compatible quadrature spaces
1891   CeedBasis basis_fine;
1892   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1893   CeedInt Q_f, Q_c;
1894   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
1895   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
1896   if (Q_f != Q_c) {
1897     // LCOV_EXCL_START
1898     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
1899     // LCOV_EXCL_STOP
1900   }
1901 
1902   // Coarse to fine basis
1903   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
1904   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
1905   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
1906   CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
1907   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
1908   P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
1909   CeedScalar *q_ref, *q_weight, *grad;
1910   CeedCall(CeedCalloc(P_1d_f, &q_ref));
1911   CeedCall(CeedCalloc(P_1d_f, &q_weight));
1912   CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
1913   CeedBasis basis_c_to_f;
1914   CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
1915   CeedCall(CeedFree(&q_ref));
1916   CeedCall(CeedFree(&q_weight));
1917   CeedCall(CeedFree(&grad));
1918 
1919   // Core code
1920   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1921   return CEED_ERROR_SUCCESS;
1922 }
1923 
1924 /**
1925   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
1926 
1927   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1928 
1929   @param[in]  op_fine       Fine grid operator
1930   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
1931   @param[in]  rstr_coarse   Coarse grid restriction
1932   @param[in]  basis_coarse  Coarse grid active vector basis
1933   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
1934   @param[out] op_coarse     Coarse grid operator
1935   @param[out] op_prolong    Coarse to fine operator
1936   @param[out] op_restrict   Fine to coarse operator
1937 
1938   @return An error code: 0 - success, otherwise - failure
1939 
1940   @ref User
1941 **/
1942 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1943                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
1944                                        CeedOperator *op_restrict) {
1945   CeedCall(CeedOperatorCheckReady(op_fine));
1946   Ceed ceed;
1947   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1948 
1949   // Check for compatible quadrature spaces
1950   CeedBasis basis_fine;
1951   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1952   CeedInt Q_f, Q_c;
1953   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
1954   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
1955   if (Q_f != Q_c) {
1956     // LCOV_EXCL_START
1957     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
1958     // LCOV_EXCL_STOP
1959   }
1960 
1961   // Coarse to fine basis
1962   CeedElemTopology topo;
1963   CeedCall(CeedBasisGetTopology(basis_fine, &topo));
1964   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
1965   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
1966   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
1967   CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
1968   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
1969   CeedScalar *q_ref, *q_weight, *grad;
1970   CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
1971   CeedCall(CeedCalloc(num_nodes_f, &q_weight));
1972   CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
1973   CeedBasis basis_c_to_f;
1974   CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
1975   CeedCall(CeedFree(&q_ref));
1976   CeedCall(CeedFree(&q_weight));
1977   CeedCall(CeedFree(&grad));
1978 
1979   // Core code
1980   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1981   return CEED_ERROR_SUCCESS;
1982 }
1983 
1984 /**
1985   @brief Build a FDM based approximate inverse for each element for a CeedOperator
1986 
1987   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
1988     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V.
1989     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T
1990 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
1991 
1992   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1993 
1994   @param[in]  op      CeedOperator to create element inverses
1995   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
1996   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1997 
1998   @return An error code: 0 - success, otherwise - failure
1999 
2000   @ref User
2001 **/
2002 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2003   CeedCall(CeedOperatorCheckReady(op));
2004 
2005   if (op->CreateFDMElementInverse) {
2006     // Backend version
2007     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2008     return CEED_ERROR_SUCCESS;
2009   } else {
2010     // Operator fallback
2011     CeedOperator op_fallback;
2012 
2013     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2014     if (op_fallback) {
2015       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2016       return CEED_ERROR_SUCCESS;
2017     }
2018   }
2019 
2020   // Default interface implementation
2021   Ceed ceed, ceed_parent;
2022   CeedCall(CeedOperatorGetCeed(op, &ceed));
2023   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2024   ceed_parent = ceed_parent ? ceed_parent : ceed;
2025   CeedQFunction qf;
2026   CeedCall(CeedOperatorGetQFunction(op, &qf));
2027 
2028   // Determine active input basis
2029   bool                interp = false, grad = false;
2030   CeedBasis           basis = NULL;
2031   CeedElemRestriction rstr  = NULL;
2032   CeedOperatorField  *op_fields;
2033   CeedQFunctionField *qf_fields;
2034   CeedInt             num_input_fields;
2035   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2036   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2037   for (CeedInt i = 0; i < num_input_fields; i++) {
2038     CeedVector vec;
2039     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2040     if (vec == CEED_VECTOR_ACTIVE) {
2041       CeedEvalMode eval_mode;
2042       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2043       interp = interp || eval_mode == CEED_EVAL_INTERP;
2044       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2045       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2046       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2047     }
2048   }
2049   if (!basis) {
2050     // LCOV_EXCL_START
2051     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2052     // LCOV_EXCL_STOP
2053   }
2054   CeedSize l_size = 1;
2055   CeedInt  P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2056   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2057   CeedCall(CeedBasisGetNumNodes(basis, &elem_size));
2058   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2059   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2060   CeedCall(CeedBasisGetDimension(basis, &dim));
2061   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2062   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2063   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2064 
2065   // Build and diagonalize 1D Mass and Laplacian
2066   bool tensor_basis;
2067   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2068   if (!tensor_basis) {
2069     // LCOV_EXCL_START
2070     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2071     // LCOV_EXCL_STOP
2072   }
2073   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2074   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2075   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2076   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2077   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2078   CeedCall(CeedCalloc(P_1d, &lambda));
2079   // -- Build matrices
2080   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2081   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2082   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2083   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2084   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2085 
2086   // -- Diagonalize
2087   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2088   CeedCall(CeedFree(&mass));
2089   CeedCall(CeedFree(&laplace));
2090   for (CeedInt i = 0; i < P_1d; i++) {
2091     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2092   }
2093   CeedCall(CeedFree(&x));
2094 
2095   // Assemble QFunction
2096   CeedVector          assembled;
2097   CeedElemRestriction rstr_qf;
2098   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2099   CeedInt layout[3];
2100   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2101   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2102   CeedScalar max_norm = 0;
2103   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2104 
2105   // Calculate element averages
2106   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2107   CeedScalar       *elem_avg;
2108   const CeedScalar *assembled_array, *q_weight_array;
2109   CeedVector        q_weight;
2110   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2111   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2112   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2113   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2114   CeedCall(CeedCalloc(num_elem, &elem_avg));
2115   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2116   for (CeedInt e = 0; e < num_elem; e++) {
2117     CeedInt count = 0;
2118     for (CeedInt q = 0; q < num_qpts; q++) {
2119       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2120         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2121           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2122           count++;
2123         }
2124       }
2125     }
2126     if (count) {
2127       elem_avg[e] /= count;
2128     } else {
2129       elem_avg[e] = 1.0;
2130     }
2131   }
2132   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2133   CeedCall(CeedVectorDestroy(&assembled));
2134   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2135   CeedCall(CeedVectorDestroy(&q_weight));
2136 
2137   // Build FDM diagonal
2138   CeedVector  q_data;
2139   CeedScalar *q_data_array, *fdm_diagonal;
2140   CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal));
2141   const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON;
2142   for (CeedInt c = 0; c < num_comp; c++) {
2143     for (CeedInt n = 0; n < elem_size; n++) {
2144       if (interp) fdm_diagonal[c * elem_size + n] = 1.0;
2145       if (grad) {
2146         for (CeedInt d = 0; d < dim; d++) {
2147           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2148           fdm_diagonal[c * elem_size + n] += lambda[i];
2149         }
2150       }
2151       if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound;
2152     }
2153   }
2154   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data));
2155   CeedCall(CeedVectorSetValue(q_data, 0.0));
2156   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2157   for (CeedInt e = 0; e < num_elem; e++) {
2158     for (CeedInt c = 0; c < num_comp; c++) {
2159       for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]);
2160     }
2161   }
2162   CeedCall(CeedFree(&elem_avg));
2163   CeedCall(CeedFree(&fdm_diagonal));
2164   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2165 
2166   // Setup FDM operator
2167   // -- Basis
2168   CeedBasis   fdm_basis;
2169   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2170   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2171   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2172   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2173   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2174   CeedCall(CeedFree(&fdm_interp));
2175   CeedCall(CeedFree(&grad_dummy));
2176   CeedCall(CeedFree(&q_ref_dummy));
2177   CeedCall(CeedFree(&q_weight_dummy));
2178   CeedCall(CeedFree(&lambda));
2179 
2180   // -- Restriction
2181   CeedElemRestriction rstr_qd_i;
2182   CeedInt             strides[3] = {1, elem_size, elem_size * num_comp};
2183   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i));
2184   // -- QFunction
2185   CeedQFunction qf_fdm;
2186   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2187   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2188   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2189   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2190   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2191   // -- QFunction context
2192   CeedInt *num_comp_data;
2193   CeedCall(CeedCalloc(1, &num_comp_data));
2194   num_comp_data[0] = num_comp;
2195   CeedQFunctionContext ctx_fdm;
2196   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2197   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2198   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2199   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2200   // -- Operator
2201   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2202   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2203   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2204   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2205 
2206   // Cleanup
2207   CeedCall(CeedVectorDestroy(&q_data));
2208   CeedCall(CeedBasisDestroy(&fdm_basis));
2209   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2210   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2211 
2212   return CEED_ERROR_SUCCESS;
2213 }
2214 
2215 /// @}
2216