xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision addd79febfda0bce97265a79178aabc38559154a)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <assert.h>
9 #include <ceed-impl.h>
10 #include <ceed/backend.h>
11 #include <ceed/ceed.h>
12 #include <math.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced CeedOperator functionality
28 
29   @param[in]  fallback_ceed Ceed on which to create fallback CeedQFunction
30   @param[in]  qf            CeedQFunction to create fallback for
31   @param[out] qf_fallback   fallback CeedQFunction
32 
33   @return An error code: 0 - success, otherwise - failure
34 
35   @ref Developer
36 **/
37 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf, CeedQFunction *qf_fallback) {
38   // Check if NULL qf passed in
39   if (!qf) return CEED_ERROR_SUCCESS;
40 
41   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
42   CeedDebug(qf->ceed, "Creating fallback CeedQFunction\n");
43 
44   char *source_path_with_name = "";
45   if (qf->source_path) {
46     size_t path_len = strlen(qf->source_path), name_len = strlen(qf->kernel_name);
47     CeedCall(CeedCalloc(path_len + name_len + 2, &source_path_with_name));
48     memcpy(source_path_with_name, qf->source_path, path_len);
49     memcpy(&source_path_with_name[path_len], ":", 1);
50     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
51   } else {
52     CeedCall(CeedCalloc(1, &source_path_with_name));
53   }
54 
55   CeedCall(CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length, qf->function, source_path_with_name, qf_fallback));
56   {
57     CeedQFunctionContext ctx;
58 
59     CeedCall(CeedQFunctionGetContext(qf, &ctx));
60     CeedCall(CeedQFunctionSetContext(*qf_fallback, ctx));
61   }
62   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
63     CeedCall(CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name, qf->input_fields[i]->size, qf->input_fields[i]->eval_mode));
64   }
65   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
66     CeedCall(CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name, qf->output_fields[i]->size, qf->output_fields[i]->eval_mode));
67   }
68   CeedCall(CeedFree(&source_path_with_name));
69 
70   return CEED_ERROR_SUCCESS;
71 }
72 
73 /**
74   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced CeedOperator functionality
75 
76   @param[in,out] op CeedOperator to create fallback for
77 
78   @return An error code: 0 - success, otherwise - failure
79 
80   @ref Developer
81 **/
82 static int CeedOperatorCreateFallback(CeedOperator op) {
83   bool is_composite;
84   Ceed ceed_fallback;
85 
86   // Check not already created
87   if (op->op_fallback) return CEED_ERROR_SUCCESS;
88 
89   // Fallback Ceed
90   CeedCall(CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback));
91   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
92 
93   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
94   CeedDebug(op->ceed, "Creating fallback CeedOperator\n");
95 
96   // Clone Op
97   CeedOperator op_fallback;
98   CeedCall(CeedOperatorIsComposite(op, &is_composite));
99   if (is_composite) {
100     CeedInt       num_suboperators;
101     CeedOperator *sub_operators;
102 
103     CeedCall(CeedCompositeOperatorCreate(ceed_fallback, &op_fallback));
104     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
105     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
106     for (CeedInt i = 0; i < num_suboperators; i++) {
107       CeedOperator op_sub_fallback;
108 
109       CeedCall(CeedOperatorGetFallback(sub_operators[i], &op_sub_fallback));
110       CeedCall(CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback));
111     }
112   } else {
113     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
114     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback));
115     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback));
116     CeedCall(CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback));
117     CeedCall(CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback, dqfT_fallback, &op_fallback));
118     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
119       CeedCall(CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name, op->input_fields[i]->elem_restr, op->input_fields[i]->basis,
120                                     op->input_fields[i]->vec));
121     }
122     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
123       CeedCall(CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name, op->output_fields[i]->elem_restr, op->output_fields[i]->basis,
124                                     op->output_fields[i]->vec));
125     }
126     CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled, &op_fallback->qf_assembled));
127     if (op_fallback->num_qpts == 0) {
128       CeedCall(CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts));
129     }
130     // Cleanup
131     CeedCall(CeedQFunctionDestroy(&qf_fallback));
132     CeedCall(CeedQFunctionDestroy(&dqf_fallback));
133     CeedCall(CeedQFunctionDestroy(&dqfT_fallback));
134   }
135   CeedCall(CeedOperatorSetName(op_fallback, op->name));
136   CeedCall(CeedOperatorCheckReady(op_fallback));
137   op->op_fallback = op_fallback;
138 
139   return CEED_ERROR_SUCCESS;
140 }
141 
142 /**
143   @brief Retrieve fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
144 
145   @param[in]  op          CeedOperator to retrieve fallback for
146   @param[out] op_fallback Fallback CeedOperator
147 
148   @return An error code: 0 - success, otherwise - failure
149 
150   @ref Developer
151 **/
152 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
153   // Create if needed
154   if (!op->op_fallback) {
155     CeedCall(CeedOperatorCreateFallback(op));
156   }
157   if (op->op_fallback) {
158     bool is_debug;
159 
160     CeedCall(CeedIsDebug(op->ceed, &is_debug));
161     if (is_debug) {
162       Ceed        ceed, ceed_fallback;
163       const char *resource, *resource_fallback;
164 
165       CeedCall(CeedOperatorGetCeed(op, &ceed));
166       CeedCall(CeedGetOperatorFallbackCeed(ceed, &ceed_fallback));
167       CeedCall(CeedGetResource(ceed, &resource));
168       CeedCall(CeedGetResource(ceed_fallback, &resource_fallback));
169 
170       CeedDebug256(ceed, 1, "---------- CeedOperator Fallback ----------\n");
171       CeedDebug(ceed, "Falling back from %s operator at address %ld to %s operator at address %ld\n", resource, op, resource_fallback,
172                 op->op_fallback);
173     }
174   }
175   *op_fallback = op->op_fallback;
176 
177   return CEED_ERROR_SUCCESS;
178 }
179 
180 /**
181   @brief Select correct basis matrix pointer based on CeedEvalMode
182 
183   @param[in]  eval_mode Current basis evaluation mode
184   @param[in]  identity  Pointer to identity matrix
185   @param[in]  interp    Pointer to interpolation matrix
186   @param[in]  grad      Pointer to gradient matrix
187   @param[out] basis_ptr Basis pointer to set
188 
189   @ref Developer
190 **/
191 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode, const CeedScalar *identity, const CeedScalar *interp, const CeedScalar *grad,
192                                                const CeedScalar **basis_ptr) {
193   switch (eval_mode) {
194     case CEED_EVAL_NONE:
195       *basis_ptr = identity;
196       break;
197     case CEED_EVAL_INTERP:
198       *basis_ptr = interp;
199       break;
200     case CEED_EVAL_GRAD:
201       *basis_ptr = grad;
202       break;
203     case CEED_EVAL_WEIGHT:
204     case CEED_EVAL_DIV:
205     case CEED_EVAL_CURL:
206       break;  // Caught by QF Assembly
207   }
208   assert(*basis_ptr != NULL);
209 }
210 
211 /**
212   @brief Create point block restriction for active operator field
213 
214   @param[in]  rstr            Original CeedElemRestriction for active field
215   @param[out] pointblock_rstr Address of the variable where the newly created CeedElemRestriction will be stored
216 
217   @return An error code: 0 - success, otherwise - failure
218 
219   @ref Developer
220 **/
221 static int CeedOperatorCreateActivePointBlockRestriction(CeedElemRestriction rstr, CeedElemRestriction *pointblock_rstr) {
222   Ceed ceed;
223   CeedCall(CeedElemRestrictionGetCeed(rstr, &ceed));
224   const CeedInt *offsets;
225   CeedCall(CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets));
226 
227   // Expand offsets
228   CeedInt  num_elem, num_comp, elem_size, comp_stride, *pointblock_offsets;
229   CeedSize l_size;
230   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
231   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
232   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
233   CeedCall(CeedElemRestrictionGetCompStride(rstr, &comp_stride));
234   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
235   CeedInt shift = num_comp;
236   if (comp_stride != 1) shift *= num_comp;
237   CeedCall(CeedCalloc(num_elem * elem_size, &pointblock_offsets));
238   for (CeedInt i = 0; i < num_elem * elem_size; i++) {
239     pointblock_offsets[i] = offsets[i] * shift;
240   }
241 
242   // Create new restriction
243   CeedCall(CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp * num_comp, 1, l_size * num_comp, CEED_MEM_HOST, CEED_OWN_POINTER,
244                                      pointblock_offsets, pointblock_rstr));
245 
246   // Cleanup
247   CeedCall(CeedElemRestrictionRestoreOffsets(rstr, &offsets));
248 
249   return CEED_ERROR_SUCCESS;
250 }
251 
252 /**
253   @brief Core logic for assembling operator diagonal or point block diagonal
254 
255   @param[in]  op            CeedOperator to assemble point block diagonal
256   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
257   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
258   @param[out] assembled     CeedVector to store assembled diagonal
259 
260   @return An error code: 0 - success, otherwise - failure
261 
262   @ref Developer
263 **/
264 static inline int CeedSingleOperatorAssembleAddDiagonal_Core(CeedOperator op, CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
265   Ceed ceed;
266   CeedCall(CeedOperatorGetCeed(op, &ceed));
267 
268   // Assemble QFunction
269   CeedQFunction qf;
270   CeedCall(CeedOperatorGetQFunction(op, &qf));
271   CeedInt num_input_fields, num_output_fields;
272   CeedCall(CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields));
273   CeedVector          assembled_qf;
274   CeedElemRestriction rstr;
275   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr, request));
276   CeedInt layout[3];
277   CeedCall(CeedElemRestrictionGetELayout(rstr, &layout));
278   CeedCall(CeedElemRestrictionDestroy(&rstr));
279 
280   // Get assembly data
281   CeedOperatorAssemblyData data;
282   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
283   const CeedEvalMode *eval_mode_in, *eval_mode_out;
284   CeedInt             num_eval_mode_in, num_eval_mode_out;
285   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
286   CeedBasis basis_in, basis_out;
287   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
288   CeedInt num_comp;
289   CeedCall(CeedBasisGetNumComponents(basis_in, &num_comp));
290 
291   // Assemble point block diagonal restriction, if needed
292   CeedElemRestriction diag_rstr;
293   CeedCall(CeedOperatorGetActiveElemRestriction(op, &diag_rstr));
294   if (is_pointblock) {
295     CeedElemRestriction point_block_rstr;
296     CeedCall(CeedOperatorCreateActivePointBlockRestriction(diag_rstr, &point_block_rstr));
297     diag_rstr = point_block_rstr;
298   }
299 
300   // Create diagonal vector
301   CeedVector elem_diag;
302   CeedCall(CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag));
303 
304   // Assemble element operator diagonals
305   CeedScalar       *elem_diag_array;
306   const CeedScalar *assembled_qf_array;
307   CeedCall(CeedVectorSetValue(elem_diag, 0.0));
308   CeedCall(CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array));
309   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
310   CeedInt num_elem, num_nodes, num_qpts;
311   CeedCall(CeedElemRestrictionGetNumElements(diag_rstr, &num_elem));
312   CeedCall(CeedBasisGetNumNodes(basis_in, &num_nodes));
313   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
314 
315   // Basis matrices
316   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
317   CeedScalar       *identity      = NULL;
318   bool              has_eval_none = false;
319   for (CeedInt i = 0; i < num_eval_mode_in; i++) {
320     has_eval_none = has_eval_none || (eval_mode_in[i] == CEED_EVAL_NONE);
321   }
322   for (CeedInt i = 0; i < num_eval_mode_out; i++) {
323     has_eval_none = has_eval_none || (eval_mode_out[i] == CEED_EVAL_NONE);
324   }
325   if (has_eval_none) {
326     CeedCall(CeedCalloc(num_qpts * num_nodes, &identity));
327     for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++) identity[i * num_nodes + i] = 1.0;
328   }
329   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
330   CeedCall(CeedBasisGetInterp(basis_out, &interp_out));
331   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
332   CeedCall(CeedBasisGetGrad(basis_out, &grad_out));
333   // Compute the diagonal of B^T D B
334   // Each element
335   for (CeedInt e = 0; e < num_elem; e++) {
336     CeedInt d_out = -1;
337     // Each basis eval mode pair
338     for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
339       const CeedScalar *bt = NULL;
340       if (eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out += 1;
341       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * num_nodes], &bt);
342       CeedInt d_in = -1;
343       for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
344         const CeedScalar *b = NULL;
345         if (eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in += 1;
346         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * num_nodes], &b);
347         // Each component
348         for (CeedInt c_out = 0; c_out < num_comp; c_out++) {
349           // Each qpoint/node pair
350           for (CeedInt q = 0; q < num_qpts; q++) {
351             if (is_pointblock) {
352               // Point Block Diagonal
353               for (CeedInt c_in = 0; c_in < num_comp; c_in++) {
354                 const CeedScalar qf_value =
355                     assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_in) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
356                                        e * layout[2]];
357                 for (CeedInt n = 0; n < num_nodes; n++) {
358                   elem_diag_array[((e * num_comp + c_out) * num_comp + c_in) * num_nodes + n] +=
359                       bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
360                 }
361               }
362             } else {
363               // Diagonal Only
364               const CeedScalar qf_value =
365                   assembled_qf_array[q * layout[0] + (((e_in * num_comp + c_out) * num_eval_mode_out + e_out) * num_comp + c_out) * layout[1] +
366                                      e * layout[2]];
367               for (CeedInt n = 0; n < num_nodes; n++) {
368                 elem_diag_array[(e * num_comp + c_out) * num_nodes + n] += bt[q * num_nodes + n] * qf_value * b[q * num_nodes + n];
369               }
370             }
371           }
372         }
373       }
374     }
375   }
376   CeedCall(CeedVectorRestoreArray(elem_diag, &elem_diag_array));
377   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
378 
379   // Assemble local operator diagonal
380   CeedCall(CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag, assembled, request));
381 
382   // Cleanup
383   if (is_pointblock) {
384     CeedCall(CeedElemRestrictionDestroy(&diag_rstr));
385   }
386   CeedCall(CeedVectorDestroy(&assembled_qf));
387   CeedCall(CeedVectorDestroy(&elem_diag));
388   CeedCall(CeedFree(&identity));
389 
390   return CEED_ERROR_SUCCESS;
391 }
392 
393 /**
394   @brief Core logic for assembling composite operator diagonal
395 
396   @param[in]  op            CeedOperator to assemble point block diagonal
397   @param[in]  request       Address of CeedRequest for non-blocking completion, else CEED_REQUEST_IMMEDIATE
398   @param[in]  is_pointblock Boolean flag to assemble diagonal or point block diagonal
399   @param[out] assembled     CeedVector to store assembled diagonal
400 
401   @return An error code: 0 - success, otherwise - failure
402 
403   @ref Developer
404 **/
405 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedRequest *request, const bool is_pointblock,
406                                                                  CeedVector assembled) {
407   CeedInt       num_sub;
408   CeedOperator *suboperators;
409   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_sub));
410   CeedCall(CeedCompositeOperatorGetSubList(op, &suboperators));
411   for (CeedInt i = 0; i < num_sub; i++) {
412     if (is_pointblock) {
413       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i], assembled, request));
414     } else {
415       CeedCall(CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled, request));
416     }
417   }
418   return CEED_ERROR_SUCCESS;
419 }
420 
421 /**
422   @brief Build nonzero pattern for non-composite operator
423 
424   Users should generally use CeedOperatorLinearAssembleSymbolic()
425 
426   @param[in]  op     CeedOperator to assemble nonzero pattern
427   @param[in]  offset Offset for number of entries
428   @param[out] rows   Row number for each entry
429   @param[out] cols   Column number for each entry
430 
431   @return An error code: 0 - success, otherwise - failure
432 
433   @ref Developer
434 **/
435 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset, CeedInt *rows, CeedInt *cols) {
436   Ceed ceed;
437   bool is_composite;
438   CeedCall(CeedOperatorGetCeed(op, &ceed));
439   CeedCall(CeedOperatorIsComposite(op, &is_composite));
440 
441   if (is_composite) {
442     // LCOV_EXCL_START
443     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
444     // LCOV_EXCL_STOP
445   }
446 
447   CeedSize num_nodes;
448   CeedCall(CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL));
449   CeedElemRestriction rstr_in;
450   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr_in));
451   CeedInt num_elem, elem_size, num_comp;
452   CeedCall(CeedElemRestrictionGetNumElements(rstr_in, &num_elem));
453   CeedCall(CeedElemRestrictionGetElementSize(rstr_in, &elem_size));
454   CeedCall(CeedElemRestrictionGetNumComponents(rstr_in, &num_comp));
455   CeedInt layout_er[3];
456   CeedCall(CeedElemRestrictionGetELayout(rstr_in, &layout_er));
457 
458   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
459 
460   // Determine elem_dof relation
461   CeedVector index_vec;
462   CeedCall(CeedVectorCreate(ceed, num_nodes, &index_vec));
463   CeedScalar *array;
464   CeedCall(CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array));
465   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
466   CeedCall(CeedVectorRestoreArray(index_vec, &array));
467   CeedVector elem_dof;
468   CeedCall(CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof));
469   CeedCall(CeedVectorSetValue(elem_dof, 0.0));
470   CeedCall(CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec, elem_dof, CEED_REQUEST_IMMEDIATE));
471   const CeedScalar *elem_dof_a;
472   CeedCall(CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a));
473   CeedCall(CeedVectorDestroy(&index_vec));
474 
475   // Determine i, j locations for element matrices
476   CeedInt count = 0;
477   for (CeedInt e = 0; e < num_elem; e++) {
478     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
479       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
480         for (CeedInt i = 0; i < elem_size; i++) {
481           for (CeedInt j = 0; j < elem_size; j++) {
482             const CeedInt elem_dof_index_row = i * layout_er[0] + (comp_out)*layout_er[1] + e * layout_er[2];
483             const CeedInt elem_dof_index_col = j * layout_er[0] + comp_in * layout_er[1] + e * layout_er[2];
484 
485             const CeedInt row = elem_dof_a[elem_dof_index_row];
486             const CeedInt col = elem_dof_a[elem_dof_index_col];
487 
488             rows[offset + count] = row;
489             cols[offset + count] = col;
490             count++;
491           }
492         }
493       }
494     }
495   }
496   if (count != local_num_entries) {
497     // LCOV_EXCL_START
498     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
499     // LCOV_EXCL_STOP
500   }
501   CeedCall(CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a));
502   CeedCall(CeedVectorDestroy(&elem_dof));
503 
504   return CEED_ERROR_SUCCESS;
505 }
506 
507 /**
508   @brief Assemble nonzero entries for non-composite operator
509 
510   Users should generally use CeedOperatorLinearAssemble()
511 
512   @param[in]  op     CeedOperator to assemble
513   @param[in]  offset Offset for number of entries
514   @param[out] values Values to assemble into matrix
515 
516   @return An error code: 0 - success, otherwise - failure
517 
518   @ref Developer
519 **/
520 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset, CeedVector values) {
521   Ceed ceed;
522   bool is_composite;
523   CeedCall(CeedOperatorGetCeed(op, &ceed));
524   CeedCall(CeedOperatorIsComposite(op, &is_composite));
525 
526   if (is_composite) {
527     // LCOV_EXCL_START
528     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
529     // LCOV_EXCL_STOP
530   }
531 
532   // Early exit for empty operator
533   {
534     CeedInt num_elem = 0;
535 
536     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
537     if (num_elem == 0) return CEED_ERROR_SUCCESS;
538   }
539 
540   if (op->LinearAssembleSingle) {
541     // Backend version
542     CeedCall(op->LinearAssembleSingle(op, offset, values));
543     return CEED_ERROR_SUCCESS;
544   } else {
545     // Operator fallback
546     CeedOperator op_fallback;
547 
548     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
549     if (op_fallback) {
550       CeedCall(CeedSingleOperatorAssemble(op_fallback, offset, values));
551       return CEED_ERROR_SUCCESS;
552     }
553   }
554 
555   // Assemble QFunction
556   CeedQFunction qf;
557   CeedCall(CeedOperatorGetQFunction(op, &qf));
558   CeedVector          assembled_qf;
559   CeedElemRestriction rstr_q;
560   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE));
561   CeedSize qf_length;
562   CeedCall(CeedVectorGetLength(assembled_qf, &qf_length));
563 
564   CeedInt            num_input_fields, num_output_fields;
565   CeedOperatorField *input_fields;
566   CeedOperatorField *output_fields;
567   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
568 
569   // Get assembly data
570   CeedOperatorAssemblyData data;
571   CeedCall(CeedOperatorGetOperatorAssemblyData(op, &data));
572   const CeedEvalMode *eval_mode_in, *eval_mode_out;
573   CeedInt             num_eval_mode_in, num_eval_mode_out;
574   CeedCall(CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in, &eval_mode_in, &num_eval_mode_out, &eval_mode_out));
575   CeedBasis basis_in, basis_out;
576   CeedCall(CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out, NULL));
577 
578   if (num_eval_mode_in == 0 || num_eval_mode_out == 0) {
579     // LCOV_EXCL_START
580     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Cannot assemble operator with out inputs/outputs");
581     // LCOV_EXCL_STOP
582   }
583 
584   CeedElemRestriction active_rstr;
585   CeedInt             num_elem, elem_size, num_qpts, num_comp;
586   CeedCall(CeedOperatorGetActiveElemRestriction(op, &active_rstr));
587   CeedCall(CeedElemRestrictionGetNumElements(active_rstr, &num_elem));
588   CeedCall(CeedElemRestrictionGetElementSize(active_rstr, &elem_size));
589   CeedCall(CeedElemRestrictionGetNumComponents(active_rstr, &num_comp));
590   CeedCall(CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts));
591 
592   CeedInt local_num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
593 
594   // loop over elements and put in data structure
595   const CeedScalar *interp_in, *grad_in;
596   CeedCall(CeedBasisGetInterp(basis_in, &interp_in));
597   CeedCall(CeedBasisGetGrad(basis_in, &grad_in));
598 
599   const CeedScalar *assembled_qf_array;
600   CeedCall(CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array));
601 
602   CeedInt layout_qf[3];
603   CeedCall(CeedElemRestrictionGetELayout(rstr_q, &layout_qf));
604   CeedCall(CeedElemRestrictionDestroy(&rstr_q));
605 
606   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
607   const CeedScalar *B_mat_in, *B_mat_out;
608   CeedCall(CeedOperatorAssemblyDataGetBases(data, NULL, &B_mat_in, NULL, &B_mat_out));
609   CeedScalar  BTD_mat[elem_size * num_qpts * num_eval_mode_in];
610   CeedScalar  elem_mat[elem_size * elem_size];
611   CeedInt     count = 0;
612   CeedScalar *vals;
613   CeedCall(CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals));
614   for (CeedInt e = 0; e < num_elem; e++) {
615     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
616       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
617         // Compute B^T*D
618         for (CeedInt n = 0; n < elem_size; n++) {
619           for (CeedInt q = 0; q < num_qpts; q++) {
620             for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
621               const CeedInt btd_index = n * (num_qpts * num_eval_mode_in) + (num_eval_mode_in * q + e_in);
622               CeedScalar    sum       = 0.0;
623               for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
624                 const CeedInt b_out_index     = (num_eval_mode_out * q + e_out) * elem_size + n;
625                 const CeedInt eval_mode_index = ((e_in * num_comp + comp_in) * num_eval_mode_out + e_out) * num_comp + comp_out;
626                 const CeedInt qf_index        = q * layout_qf[0] + eval_mode_index * layout_qf[1] + e * layout_qf[2];
627                 sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
628               }
629               BTD_mat[btd_index] = sum;
630             }
631           }
632         }
633         // form element matrix itself (for each block component)
634         CeedCall(CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size, elem_size, num_qpts * num_eval_mode_in));
635 
636         // put element matrix in coordinate data structure
637         for (CeedInt i = 0; i < elem_size; i++) {
638           for (CeedInt j = 0; j < elem_size; j++) {
639             vals[offset + count] = elem_mat[i * elem_size + j];
640             count++;
641           }
642         }
643       }
644     }
645   }
646   if (count != local_num_entries) {
647     // LCOV_EXCL_START
648     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
649     // LCOV_EXCL_STOP
650   }
651   CeedCall(CeedVectorRestoreArray(values, &vals));
652 
653   CeedCall(CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array));
654   CeedCall(CeedVectorDestroy(&assembled_qf));
655 
656   return CEED_ERROR_SUCCESS;
657 }
658 
659 /**
660   @brief Count number of entries for assembled CeedOperator
661 
662   @param[in]  op          CeedOperator to assemble
663   @param[out] num_entries Number of entries in assembled representation
664 
665   @return An error code: 0 - success, otherwise - failure
666 
667   @ref Utility
668 **/
669 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op, CeedInt *num_entries) {
670   bool                is_composite;
671   CeedElemRestriction rstr;
672   CeedInt             num_elem, elem_size, num_comp;
673 
674   CeedCall(CeedOperatorIsComposite(op, &is_composite));
675   if (is_composite) {
676     // LCOV_EXCL_START
677     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Composite operator not supported");
678     // LCOV_EXCL_STOP
679   }
680   CeedCall(CeedOperatorGetActiveElemRestriction(op, &rstr));
681   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
682   CeedCall(CeedElemRestrictionGetElementSize(rstr, &elem_size));
683   CeedCall(CeedElemRestrictionGetNumComponents(rstr, &num_comp));
684   *num_entries = elem_size * num_comp * elem_size * num_comp * num_elem;
685 
686   return CEED_ERROR_SUCCESS;
687 }
688 
689 /**
690   @brief Common code for creating a multigrid coarse operator and level transfer operators for a CeedOperator
691 
692   @param[in]  op_fine      Fine grid operator
693   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
694   @param[in]  rstr_coarse  Coarse grid restriction
695   @param[in]  basis_coarse Coarse grid active vector basis
696   @param[in]  basis_c_to_f Basis for coarse to fine interpolation
697   @param[out] op_coarse    Coarse grid operator
698   @param[out] op_prolong   Coarse to fine operator
699   @param[out] op_restrict  Fine to coarse operator
700 
701   @return An error code: 0 - success, otherwise - failure
702 
703   @ref Developer
704 **/
705 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
706                                             CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
707   Ceed ceed;
708   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
709 
710   // Check for composite operator
711   bool is_composite;
712   CeedCall(CeedOperatorIsComposite(op_fine, &is_composite));
713   if (is_composite) {
714     // LCOV_EXCL_START
715     return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Automatic multigrid setup for composite operators not supported");
716     // LCOV_EXCL_STOP
717   }
718 
719   // Coarse Grid
720   CeedCall(CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT, op_coarse));
721   CeedElemRestriction rstr_fine = NULL;
722   // -- Clone input fields
723   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
724     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
725       rstr_fine = op_fine->input_fields[i]->elem_restr;
726       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
727     } else {
728       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name, op_fine->input_fields[i]->elem_restr,
729                                     op_fine->input_fields[i]->basis, op_fine->input_fields[i]->vec));
730     }
731   }
732   // -- Clone output fields
733   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
734     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
735       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE));
736     } else {
737       CeedCall(CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name, op_fine->output_fields[i]->elem_restr,
738                                     op_fine->output_fields[i]->basis, op_fine->output_fields[i]->vec));
739     }
740   }
741   // -- Clone QFunctionAssemblyData
742   CeedCall(CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled, &(*op_coarse)->qf_assembled));
743 
744   // Multiplicity vector
745   CeedVector mult_vec, mult_e_vec;
746   CeedCall(CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec));
747   CeedCall(CeedVectorSetValue(mult_e_vec, 0.0));
748   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine, mult_e_vec, CEED_REQUEST_IMMEDIATE));
749   CeedCall(CeedVectorSetValue(mult_vec, 0.0));
750   CeedCall(CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec, CEED_REQUEST_IMMEDIATE));
751   CeedCall(CeedVectorDestroy(&mult_e_vec));
752   CeedCall(CeedVectorReciprocal(mult_vec));
753 
754   // Clone name
755   bool   has_name = op_fine->name;
756   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
757   CeedCall(CeedOperatorSetName(*op_coarse, op_fine->name));
758 
759   // Restriction/Prolongation Operators
760   CeedInt num_comp;
761   CeedCall(CeedBasisGetNumComponents(basis_coarse, &num_comp));
762 
763   // Restriction
764   if (op_restrict) {
765     CeedQFunction qf_restrict;
766     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict));
767     CeedInt *num_comp_r_data;
768     CeedCall(CeedCalloc(1, &num_comp_r_data));
769     num_comp_r_data[0] = num_comp;
770     CeedQFunctionContext ctx_r;
771     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_r));
772     CeedCall(CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_r_data), num_comp_r_data));
773     CeedCall(CeedQFunctionSetContext(qf_restrict, ctx_r));
774     CeedCall(CeedQFunctionContextDestroy(&ctx_r));
775     CeedCall(CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE));
776     CeedCall(CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE));
777     CeedCall(CeedQFunctionAddOutput(qf_restrict, "output", num_comp, CEED_EVAL_INTERP));
778     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp));
779 
780     CeedCall(CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_restrict));
781     CeedCall(CeedOperatorSetField(*op_restrict, "input", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
782     CeedCall(CeedOperatorSetField(*op_restrict, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
783     CeedCall(CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
784 
785     // Set name
786     char *restriction_name;
787     CeedCall(CeedCalloc(17 + name_len, &restriction_name));
788     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
789     CeedCall(CeedOperatorSetName(*op_restrict, restriction_name));
790     CeedCall(CeedFree(&restriction_name));
791 
792     // Check
793     CeedCall(CeedOperatorCheckReady(*op_restrict));
794 
795     // Cleanup
796     CeedCall(CeedQFunctionDestroy(&qf_restrict));
797   }
798 
799   // Prolongation
800   if (op_prolong) {
801     CeedQFunction qf_prolong;
802     CeedCall(CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong));
803     CeedInt *num_comp_p_data;
804     CeedCall(CeedCalloc(1, &num_comp_p_data));
805     num_comp_p_data[0] = num_comp;
806     CeedQFunctionContext ctx_p;
807     CeedCall(CeedQFunctionContextCreate(ceed, &ctx_p));
808     CeedCall(CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_p_data), num_comp_p_data));
809     CeedCall(CeedQFunctionSetContext(qf_prolong, ctx_p));
810     CeedCall(CeedQFunctionContextDestroy(&ctx_p));
811     CeedCall(CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP));
812     CeedCall(CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE));
813     CeedCall(CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE));
814     CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp));
815 
816     CeedCall(CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, op_prolong));
817     CeedCall(CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f, CEED_VECTOR_ACTIVE));
818     CeedCall(CeedOperatorSetField(*op_prolong, "scale", rstr_fine, CEED_BASIS_COLLOCATED, mult_vec));
819     CeedCall(CeedOperatorSetField(*op_prolong, "output", rstr_fine, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE));
820 
821     // Set name
822     char *prolongation_name;
823     CeedCall(CeedCalloc(18 + name_len, &prolongation_name));
824     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "", has_name ? op_fine->name : "");
825     CeedCall(CeedOperatorSetName(*op_prolong, prolongation_name));
826     CeedCall(CeedFree(&prolongation_name));
827 
828     // Check
829     CeedCall(CeedOperatorCheckReady(*op_prolong));
830 
831     // Cleanup
832     CeedCall(CeedQFunctionDestroy(&qf_prolong));
833   }
834 
835   // Check
836   CeedCall(CeedOperatorCheckReady(*op_coarse));
837 
838   // Cleanup
839   CeedCall(CeedVectorDestroy(&mult_vec));
840   CeedCall(CeedBasisDestroy(&basis_c_to_f));
841 
842   return CEED_ERROR_SUCCESS;
843 }
844 
845 /**
846   @brief Build 1D mass matrix and Laplacian with perturbation
847 
848   @param[in]  interp_1d   Interpolation matrix in one dimension
849   @param[in]  grad_1d     Gradient matrix in one dimension
850   @param[in]  q_weight_1d Quadrature weights in one dimension
851   @param[in]  P_1d        Number of basis nodes in one dimension
852   @param[in]  Q_1d        Number of quadrature points in one dimension
853   @param[in]  dim         Dimension of basis
854   @param[out] mass        Assembled mass matrix in one dimension
855   @param[out] laplace     Assembled perturbed Laplacian in one dimension
856 
857   @return An error code: 0 - success, otherwise - failure
858 
859   @ref Developer
860 **/
861 CeedPragmaOptimizeOff static int CeedBuildMassLaplace(const CeedScalar *interp_1d, const CeedScalar *grad_1d, const CeedScalar *q_weight_1d,
862                                                       CeedInt P_1d, CeedInt Q_1d, CeedInt dim, CeedScalar *mass, CeedScalar *laplace) {
863   for (CeedInt i = 0; i < P_1d; i++) {
864     for (CeedInt j = 0; j < P_1d; j++) {
865       CeedScalar sum = 0.0;
866       for (CeedInt k = 0; k < Q_1d; k++) sum += interp_1d[k * P_1d + i] * q_weight_1d[k] * interp_1d[k * P_1d + j];
867       mass[i + j * P_1d] = sum;
868     }
869   }
870   // -- Laplacian
871   for (CeedInt i = 0; i < P_1d; i++) {
872     for (CeedInt j = 0; j < P_1d; j++) {
873       CeedScalar sum = 0.0;
874       for (CeedInt k = 0; k < Q_1d; k++) sum += grad_1d[k * P_1d + i] * q_weight_1d[k] * grad_1d[k * P_1d + j];
875       laplace[i + j * P_1d] = sum;
876     }
877   }
878   CeedScalar perturbation = dim > 2 ? 1e-6 : 1e-4;
879   for (CeedInt i = 0; i < P_1d; i++) laplace[i + P_1d * i] += perturbation;
880   return CEED_ERROR_SUCCESS;
881 }
882 CeedPragmaOptimizeOn;
883 
884 /// @}
885 
886 /// ----------------------------------------------------------------------------
887 /// CeedOperator Backend API
888 /// ----------------------------------------------------------------------------
889 /// @addtogroup CeedOperatorBackend
890 /// @{
891 
892 /**
893   @brief Create object holding CeedQFunction assembly data for CeedOperator
894 
895   @param[in]  ceed A Ceed object where the CeedQFunctionAssemblyData will be created
896   @param[out] data Address of the variable where the newly created CeedQFunctionAssemblyData will be stored
897 
898   @return An error code: 0 - success, otherwise - failure
899 
900   @ref Backend
901 **/
902 int CeedQFunctionAssemblyDataCreate(Ceed ceed, CeedQFunctionAssemblyData *data) {
903   CeedCall(CeedCalloc(1, data));
904   (*data)->ref_count = 1;
905   (*data)->ceed      = ceed;
906   CeedCall(CeedReference(ceed));
907 
908   return CEED_ERROR_SUCCESS;
909 }
910 
911 /**
912   @brief Increment the reference counter for a CeedQFunctionAssemblyData
913 
914   @param[in,out] data CeedQFunctionAssemblyData to increment the reference counter
915 
916   @return An error code: 0 - success, otherwise - failure
917 
918   @ref Backend
919 **/
920 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
921   data->ref_count++;
922   return CEED_ERROR_SUCCESS;
923 }
924 
925 /**
926   @brief Set re-use of CeedQFunctionAssemblyData
927 
928   @param[in,out] data       CeedQFunctionAssemblyData to mark for reuse
929   @param[in]     reuse_data Boolean flag indicating data re-use
930 
931   @return An error code: 0 - success, otherwise - failure
932 
933   @ref Backend
934 **/
935 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data, bool reuse_data) {
936   data->reuse_data        = reuse_data;
937   data->needs_data_update = true;
938   return CEED_ERROR_SUCCESS;
939 }
940 
941 /**
942   @brief Mark QFunctionAssemblyData as stale
943 
944   @param[in,out] data              CeedQFunctionAssemblyData to mark as stale
945   @param[in]     needs_data_update Boolean flag indicating if update is needed or completed
946 
947   @return An error code: 0 - success, otherwise - failure
948 
949   @ref Backend
950 **/
951 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data, bool needs_data_update) {
952   data->needs_data_update = needs_data_update;
953   return CEED_ERROR_SUCCESS;
954 }
955 
956 /**
957   @brief Determine if QFunctionAssemblyData needs update
958 
959   @param[in]  data             CeedQFunctionAssemblyData to mark as stale
960   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
961 
962   @return An error code: 0 - success, otherwise - failure
963 
964   @ref Backend
965 **/
966 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data, bool *is_update_needed) {
967   *is_update_needed = !data->reuse_data || data->needs_data_update;
968   return CEED_ERROR_SUCCESS;
969 }
970 
971 /**
972   @brief Copy the pointer to a CeedQFunctionAssemblyData.
973            Both pointers should be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`.
974            Note: If `*data_copy` is non-NULL, then it is assumed that `*data_copy` is a pointer to a CeedQFunctionAssemblyData.
975              This CeedQFunctionAssemblyData will be destroyed if `*data_copy` is the only reference to this CeedQFunctionAssemblyData.
976 
977   @param[in]     data      CeedQFunctionAssemblyData to copy reference to
978   @param[in,out] data_copy Variable to store copied reference
979 
980   @return An error code: 0 - success, otherwise - failure
981 
982   @ref Backend
983 **/
984 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data, CeedQFunctionAssemblyData *data_copy) {
985   CeedCall(CeedQFunctionAssemblyDataReference(data));
986   CeedCall(CeedQFunctionAssemblyDataDestroy(data_copy));
987   *data_copy = data;
988   return CEED_ERROR_SUCCESS;
989 }
990 
991 /**
992   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
993 
994   @param[in]  data     CeedQFunctionAssemblyData to retrieve status
995   @param[out] is_setup Boolean flag for setup status
996 
997   @return An error code: 0 - success, otherwise - failure
998 
999   @ref Backend
1000 **/
1001 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data, bool *is_setup) {
1002   *is_setup = data->is_setup;
1003   return CEED_ERROR_SUCCESS;
1004 }
1005 
1006 /**
1007   @brief Set internal objects for CeedQFunctionAssemblyData
1008 
1009   @param[in,out] data CeedQFunctionAssemblyData to set objects
1010   @param[in]     vec  CeedVector to store assembled CeedQFunction at quadrature points
1011   @param[in]     rstr CeedElemRestriction for CeedVector containing assembled CeedQFunction
1012 
1013   @return An error code: 0 - success, otherwise - failure
1014 
1015   @ref Backend
1016 **/
1017 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data, CeedVector vec, CeedElemRestriction rstr) {
1018   CeedCall(CeedVectorReferenceCopy(vec, &data->vec));
1019   CeedCall(CeedElemRestrictionReferenceCopy(rstr, &data->rstr));
1020 
1021   data->is_setup = true;
1022   return CEED_ERROR_SUCCESS;
1023 }
1024 
1025 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data, CeedVector *vec, CeedElemRestriction *rstr) {
1026   if (!data->is_setup) {
1027     // LCOV_EXCL_START
1028     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE, "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1029     // LCOV_EXCL_STOP
1030   }
1031 
1032   CeedCall(CeedVectorReferenceCopy(data->vec, vec));
1033   CeedCall(CeedElemRestrictionReferenceCopy(data->rstr, rstr));
1034 
1035   return CEED_ERROR_SUCCESS;
1036 }
1037 
1038 /**
1039   @brief Destroy CeedQFunctionAssemblyData
1040 
1041   @param[in,out] data  CeedQFunctionAssemblyData to destroy
1042 
1043   @return An error code: 0 - success, otherwise - failure
1044 
1045   @ref Backend
1046 **/
1047 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1048   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1049 
1050   CeedCall(CeedDestroy(&(*data)->ceed));
1051   CeedCall(CeedVectorDestroy(&(*data)->vec));
1052   CeedCall(CeedElemRestrictionDestroy(&(*data)->rstr));
1053 
1054   CeedCall(CeedFree(data));
1055   return CEED_ERROR_SUCCESS;
1056 }
1057 
1058 /**
1059   @brief Get CeedOperatorAssemblyData
1060 
1061   @param[in]  op   CeedOperator to assemble
1062   @param[out] data CeedQFunctionAssemblyData
1063 
1064   @return An error code: 0 - success, otherwise - failure
1065 
1066   @ref Backend
1067 **/
1068 int CeedOperatorGetOperatorAssemblyData(CeedOperator op, CeedOperatorAssemblyData *data) {
1069   if (!op->op_assembled) {
1070     CeedOperatorAssemblyData data;
1071 
1072     CeedCall(CeedOperatorAssemblyDataCreate(op->ceed, op, &data));
1073     op->op_assembled = data;
1074   }
1075   *data = op->op_assembled;
1076 
1077   return CEED_ERROR_SUCCESS;
1078 }
1079 
1080 /**
1081   @brief Create object holding CeedOperator assembly data
1082 
1083   @param[in]  ceed Ceed object where the CeedOperatorAssemblyData will be created
1084   @param[in]  op   CeedOperator to be assembled
1085   @param[out] data Address of the variable where the newly created CeedOperatorAssemblyData will be stored
1086 
1087   @return An error code: 0 - success, otherwise - failure
1088 
1089   @ref Backend
1090 **/
1091 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op, CeedOperatorAssemblyData *data) {
1092   CeedCall(CeedCalloc(1, data));
1093   (*data)->ceed = ceed;
1094   CeedCall(CeedReference(ceed));
1095 
1096   // Build OperatorAssembly data
1097   CeedQFunction       qf;
1098   CeedQFunctionField *qf_fields;
1099   CeedOperatorField  *op_fields;
1100   CeedInt             num_input_fields;
1101   CeedCall(CeedOperatorGetQFunction(op, &qf));
1102   CeedCall(CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL));
1103   CeedCall(CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL));
1104 
1105   // Determine active input basis
1106   CeedInt       num_eval_mode_in = 0, dim = 1;
1107   CeedEvalMode *eval_mode_in = NULL;
1108   CeedBasis     basis_in     = NULL;
1109   for (CeedInt i = 0; i < num_input_fields; i++) {
1110     CeedVector vec;
1111     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1112     if (vec == CEED_VECTOR_ACTIVE) {
1113       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_in));
1114       CeedCall(CeedBasisGetDimension(basis_in, &dim));
1115       CeedEvalMode eval_mode;
1116       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1117       switch (eval_mode) {
1118         case CEED_EVAL_NONE:
1119         case CEED_EVAL_INTERP:
1120           CeedCall(CeedRealloc(num_eval_mode_in + 1, &eval_mode_in));
1121           eval_mode_in[num_eval_mode_in] = eval_mode;
1122           num_eval_mode_in += 1;
1123           break;
1124         case CEED_EVAL_GRAD:
1125           CeedCall(CeedRealloc(num_eval_mode_in + dim, &eval_mode_in));
1126           for (CeedInt d = 0; d < dim; d++) {
1127             eval_mode_in[num_eval_mode_in + d] = eval_mode;
1128           }
1129           num_eval_mode_in += dim;
1130           break;
1131         case CEED_EVAL_WEIGHT:
1132         case CEED_EVAL_DIV:
1133         case CEED_EVAL_CURL:
1134           break;  // Caught by QF Assembly
1135       }
1136     }
1137   }
1138   (*data)->num_eval_mode_in = num_eval_mode_in;
1139   (*data)->eval_mode_in     = eval_mode_in;
1140   CeedCall(CeedBasisReferenceCopy(basis_in, &(*data)->basis_in));
1141 
1142   // Determine active output basis
1143   CeedInt num_output_fields;
1144   CeedCall(CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields));
1145   CeedCall(CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields));
1146   CeedInt       num_eval_mode_out = 0;
1147   CeedEvalMode *eval_mode_out     = NULL;
1148   CeedBasis     basis_out         = NULL;
1149   for (CeedInt i = 0; i < num_output_fields; i++) {
1150     CeedVector vec;
1151     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
1152     if (vec == CEED_VECTOR_ACTIVE) {
1153       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis_out));
1154       CeedEvalMode eval_mode;
1155       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
1156       switch (eval_mode) {
1157         case CEED_EVAL_NONE:
1158         case CEED_EVAL_INTERP:
1159           CeedCall(CeedRealloc(num_eval_mode_out + 1, &eval_mode_out));
1160           eval_mode_out[num_eval_mode_out] = eval_mode;
1161           num_eval_mode_out += 1;
1162           break;
1163         case CEED_EVAL_GRAD:
1164           CeedCall(CeedRealloc(num_eval_mode_out + dim, &eval_mode_out));
1165           for (CeedInt d = 0; d < dim; d++) {
1166             eval_mode_out[num_eval_mode_out + d] = eval_mode;
1167           }
1168           num_eval_mode_out += dim;
1169           break;
1170         case CEED_EVAL_WEIGHT:
1171         case CEED_EVAL_DIV:
1172         case CEED_EVAL_CURL:
1173           break;  // Caught by QF Assembly
1174       }
1175     }
1176   }
1177   (*data)->num_eval_mode_out = num_eval_mode_out;
1178   (*data)->eval_mode_out     = eval_mode_out;
1179   CeedCall(CeedBasisReferenceCopy(basis_out, &(*data)->basis_out));
1180 
1181   return CEED_ERROR_SUCCESS;
1182 }
1183 
1184 /**
1185   @brief Get CeedOperator CeedEvalModes for assembly
1186 
1187   @param[in]  data              CeedOperatorAssemblyData
1188   @param[out] num_eval_mode_in  Pointer to hold number of input CeedEvalModes, or NULL
1189   @param[out] eval_mode_in      Pointer to hold input CeedEvalModes, or NULL
1190   @param[out] num_eval_mode_out Pointer to hold number of output CeedEvalModes, or NULL
1191   @param[out] eval_mode_out     Pointer to hold output CeedEvalModes, or NULL
1192 
1193   @return An error code: 0 - success, otherwise - failure
1194 
1195   @ref Backend
1196 **/
1197 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data, CeedInt *num_eval_mode_in, const CeedEvalMode **eval_mode_in,
1198                                          CeedInt *num_eval_mode_out, const CeedEvalMode **eval_mode_out) {
1199   if (num_eval_mode_in) *num_eval_mode_in = data->num_eval_mode_in;
1200   if (eval_mode_in) *eval_mode_in = data->eval_mode_in;
1201   if (num_eval_mode_out) *num_eval_mode_out = data->num_eval_mode_out;
1202   if (eval_mode_out) *eval_mode_out = data->eval_mode_out;
1203 
1204   return CEED_ERROR_SUCCESS;
1205 }
1206 
1207 /**
1208   @brief Get CeedOperator CeedBasis data for assembly
1209 
1210   @param[in]  data      CeedOperatorAssemblyData
1211   @param[out] basis_in  Pointer to hold active input CeedBasis, or NULL
1212   @param[out] B_in      Pointer to hold assembled active input B, or NULL
1213   @param[out] basis_out Pointer to hold active output CeedBasis, or NULL
1214   @param[out] B_out     Pointer to hold assembled active output B, or NULL
1215 
1216   @return An error code: 0 - success, otherwise - failure
1217 
1218   @ref Backend
1219 **/
1220 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data, CeedBasis *basis_in, const CeedScalar **B_in, CeedBasis *basis_out,
1221                                      const CeedScalar **B_out) {
1222   // Assemble B_in, B_out if needed
1223   if (B_in && !data->B_in) {
1224     CeedInt           num_qpts, elem_size;
1225     CeedScalar       *B_in, *identity = NULL;
1226     const CeedScalar *interp_in, *grad_in;
1227     bool              has_eval_none = false;
1228 
1229     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_in, &num_qpts));
1230     CeedCall(CeedBasisGetNumNodes(data->basis_in, &elem_size));
1231     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_in, &B_in));
1232 
1233     for (CeedInt i = 0; i < data->num_eval_mode_in; i++) {
1234       has_eval_none = has_eval_none || (data->eval_mode_in[i] == CEED_EVAL_NONE);
1235     }
1236     if (has_eval_none) {
1237       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1238       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1239         identity[i * elem_size + i] = 1.0;
1240       }
1241     }
1242     CeedCall(CeedBasisGetInterp(data->basis_in, &interp_in));
1243     CeedCall(CeedBasisGetGrad(data->basis_in, &grad_in));
1244 
1245     for (CeedInt q = 0; q < num_qpts; q++) {
1246       for (CeedInt n = 0; n < elem_size; n++) {
1247         CeedInt d_in = -1;
1248         for (CeedInt e_in = 0; e_in < data->num_eval_mode_in; e_in++) {
1249           const CeedInt     qq = data->num_eval_mode_in * q;
1250           const CeedScalar *b  = NULL;
1251 
1252           if (data->eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in++;
1253           CeedOperatorGetBasisPointer(data->eval_mode_in[e_in], identity, interp_in, &grad_in[d_in * num_qpts * elem_size], &b);
1254           B_in[(qq + e_in) * elem_size + n] = b[q * elem_size + n];
1255         }
1256       }
1257     }
1258     data->B_in = B_in;
1259   }
1260 
1261   if (B_out && !data->B_out) {
1262     CeedInt           num_qpts, elem_size;
1263     CeedScalar       *B_out, *identity = NULL;
1264     const CeedScalar *interp_out, *grad_out;
1265     bool              has_eval_none = false;
1266 
1267     CeedCall(CeedBasisGetNumQuadraturePoints(data->basis_out, &num_qpts));
1268     CeedCall(CeedBasisGetNumNodes(data->basis_out, &elem_size));
1269     CeedCall(CeedCalloc(num_qpts * elem_size * data->num_eval_mode_out, &B_out));
1270 
1271     for (CeedInt i = 0; i < data->num_eval_mode_out; i++) {
1272       has_eval_none = has_eval_none || (data->eval_mode_out[i] == CEED_EVAL_NONE);
1273     }
1274     if (has_eval_none) {
1275       CeedCall(CeedCalloc(num_qpts * elem_size, &identity));
1276       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1277         identity[i * elem_size + i] = 1.0;
1278       }
1279     }
1280     CeedCall(CeedBasisGetInterp(data->basis_out, &interp_out));
1281     CeedCall(CeedBasisGetGrad(data->basis_out, &grad_out));
1282 
1283     for (CeedInt q = 0; q < num_qpts; q++) {
1284       for (CeedInt n = 0; n < elem_size; n++) {
1285         CeedInt d_out = -1;
1286         for (CeedInt e_out = 0; e_out < data->num_eval_mode_out; e_out++) {
1287           const CeedInt     qq = data->num_eval_mode_out * q;
1288           const CeedScalar *b  = NULL;
1289 
1290           if (data->eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out++;
1291           CeedOperatorGetBasisPointer(data->eval_mode_out[e_out], identity, interp_out, &grad_out[d_out * num_qpts * elem_size], &b);
1292           B_out[(qq + e_out) * elem_size + n] = b[q * elem_size + n];
1293         }
1294       }
1295     }
1296     data->B_out = B_out;
1297   }
1298 
1299   if (basis_in) *basis_in = data->basis_in;
1300   if (B_in) *B_in = data->B_in;
1301   if (basis_out) *basis_out = data->basis_out;
1302   if (B_out) *B_out = data->B_out;
1303 
1304   return CEED_ERROR_SUCCESS;
1305 }
1306 
1307 /**
1308   @brief Destroy CeedOperatorAssemblyData
1309 
1310   @param[in,out] data CeedOperatorAssemblyData to destroy
1311 
1312   @return An error code: 0 - success, otherwise - failure
1313 
1314   @ref Backend
1315 **/
1316 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1317   if (!*data) return CEED_ERROR_SUCCESS;
1318 
1319   CeedCall(CeedDestroy(&(*data)->ceed));
1320   CeedCall(CeedBasisDestroy(&(*data)->basis_in));
1321   CeedCall(CeedBasisDestroy(&(*data)->basis_out));
1322   CeedCall(CeedFree(&(*data)->eval_mode_in));
1323   CeedCall(CeedFree(&(*data)->eval_mode_out));
1324   CeedCall(CeedFree(&(*data)->B_in));
1325   CeedCall(CeedFree(&(*data)->B_out));
1326 
1327   CeedCall(CeedFree(data));
1328   return CEED_ERROR_SUCCESS;
1329 }
1330 
1331 /// @}
1332 
1333 /// ----------------------------------------------------------------------------
1334 /// CeedOperator Public API
1335 /// ----------------------------------------------------------------------------
1336 /// @addtogroup CeedOperatorUser
1337 /// @{
1338 
1339 /**
1340   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1341 
1342   This returns a CeedVector containing a matrix at each quadrature point providing the action of the CeedQFunction associated with the CeedOperator.
1343     The vector 'assembled' is of shape [num_elements, num_input_fields, num_output_fields, num_quad_points] and contains column-major matrices
1344 representing the action of the CeedQFunction for a corresponding quadrature point on an element. Inputs and outputs are in the order provided by the
1345 user when adding CeedOperator fields. For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and 'v', provided in that order,
1346 would result in an assembled QFunction that consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting on the input [u, du_0, du_1]
1347 and producing the output [dv_0, dv_1, v].
1348 
1349   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1350 
1351   @param[in]  op        CeedOperator to assemble CeedQFunction
1352   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1353   @param[out] rstr      CeedElemRestriction for CeedVector containing assembled CeedQFunction
1354   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1355 
1356   @return An error code: 0 - success, otherwise - failure
1357 
1358   @ref User
1359 **/
1360 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1361   CeedCall(CeedOperatorCheckReady(op));
1362 
1363   if (op->LinearAssembleQFunction) {
1364     // Backend version
1365     CeedCall(op->LinearAssembleQFunction(op, assembled, rstr, request));
1366   } else {
1367     // Operator fallback
1368     CeedOperator op_fallback;
1369 
1370     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1371     if (op_fallback) {
1372       CeedCall(CeedOperatorLinearAssembleQFunction(op_fallback, assembled, rstr, request));
1373     } else {
1374       // LCOV_EXCL_START
1375       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunction");
1376       // LCOV_EXCL_STOP
1377     }
1378   }
1379   return CEED_ERROR_SUCCESS;
1380 }
1381 
1382 /**
1383   @brief Assemble CeedQFunction and store result internally.
1384            Return copied references of stored data to the caller.
1385            Caller is responsible for ownership and destruction of the copied references.
1386            See also @ref CeedOperatorLinearAssembleQFunction
1387 
1388   @param[in]  op        CeedOperator to assemble CeedQFunction
1389   @param[out] assembled CeedVector to store assembled CeedQFunction at quadrature points
1390   @param[out] rstr      CeedElemRestriction for CeedVector containing assembledCeedQFunction
1391   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1392 
1393   @return An error code: 0 - success, otherwise - failure
1394 
1395   @ref User
1396 **/
1397 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1398   CeedCall(CeedOperatorCheckReady(op));
1399 
1400   if (op->LinearAssembleQFunctionUpdate) {
1401     // Backend version
1402     bool                qf_assembled_is_setup;
1403     CeedVector          assembled_vec  = NULL;
1404     CeedElemRestriction assembled_rstr = NULL;
1405 
1406     CeedCall(CeedQFunctionAssemblyDataIsSetup(op->qf_assembled, &qf_assembled_is_setup));
1407     if (qf_assembled_is_setup) {
1408       bool update_needed;
1409 
1410       CeedCall(CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec, &assembled_rstr));
1411       CeedCall(CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled, &update_needed));
1412       if (update_needed) {
1413         CeedCall(op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr, request));
1414       }
1415     } else {
1416       CeedCall(op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr, request));
1417       CeedCall(CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec, assembled_rstr));
1418     }
1419     CeedCall(CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false));
1420 
1421     // Copy reference from internally held copy
1422     *assembled = NULL;
1423     *rstr      = NULL;
1424     CeedCall(CeedVectorReferenceCopy(assembled_vec, assembled));
1425     CeedCall(CeedVectorDestroy(&assembled_vec));
1426     CeedCall(CeedElemRestrictionReferenceCopy(assembled_rstr, rstr));
1427     CeedCall(CeedElemRestrictionDestroy(&assembled_rstr));
1428   } else {
1429     // Operator fallback
1430     CeedOperator op_fallback;
1431 
1432     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1433     if (op_fallback) {
1434       CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled, rstr, request));
1435     } else {
1436       // LCOV_EXCL_START
1437       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED, "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1438       // LCOV_EXCL_STOP
1439     }
1440   }
1441 
1442   return CEED_ERROR_SUCCESS;
1443 }
1444 
1445 /**
1446   @brief Assemble the diagonal of a square linear CeedOperator
1447 
1448   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1449 
1450   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1451 
1452   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1453 
1454   @param[in]  op        CeedOperator to assemble CeedQFunction
1455   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1456   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1457 
1458   @return An error code: 0 - success, otherwise - failure
1459 
1460   @ref User
1461 **/
1462 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1463   bool is_composite;
1464   CeedCall(CeedOperatorCheckReady(op));
1465   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1466 
1467   CeedSize input_size = 0, output_size = 0;
1468   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1469   if (input_size != output_size) {
1470     // LCOV_EXCL_START
1471     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1472     // LCOV_EXCL_STOP
1473   }
1474 
1475   // Early exit for empty operator
1476   if (!is_composite) {
1477     CeedInt num_elem = 0;
1478 
1479     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1480     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1481   }
1482 
1483   if (op->LinearAssembleDiagonal) {
1484     // Backend version
1485     CeedCall(op->LinearAssembleDiagonal(op, assembled, request));
1486     return CEED_ERROR_SUCCESS;
1487   } else if (op->LinearAssembleAddDiagonal) {
1488     // Backend version with zeroing first
1489     CeedCall(CeedVectorSetValue(assembled, 0.0));
1490     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1491     return CEED_ERROR_SUCCESS;
1492   } else {
1493     // Operator fallback
1494     CeedOperator op_fallback;
1495 
1496     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1497     if (op_fallback) {
1498       CeedCall(CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request));
1499       return CEED_ERROR_SUCCESS;
1500     }
1501   }
1502   // Default interface implementation
1503   CeedCall(CeedVectorSetValue(assembled, 0.0));
1504   CeedCall(CeedOperatorLinearAssembleAddDiagonal(op, assembled, request));
1505 
1506   return CEED_ERROR_SUCCESS;
1507 }
1508 
1509 /**
1510   @brief Assemble the diagonal of a square linear CeedOperator
1511 
1512   This sums into a CeedVector the diagonal of a linear CeedOperator.
1513 
1514   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1515 
1516   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1517 
1518   @param[in]  op        CeedOperator to assemble CeedQFunction
1519   @param[out] assembled CeedVector to store assembled CeedOperator diagonal
1520   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1521 
1522   @return An error code: 0 - success, otherwise - failure
1523 
1524   @ref User
1525 **/
1526 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1527   bool is_composite;
1528   CeedCall(CeedOperatorCheckReady(op));
1529   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1530 
1531   CeedSize input_size = 0, output_size = 0;
1532   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1533   if (input_size != output_size) {
1534     // LCOV_EXCL_START
1535     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1536     // LCOV_EXCL_STOP
1537   }
1538 
1539   // Early exit for empty operator
1540   if (!is_composite) {
1541     CeedInt num_elem = 0;
1542 
1543     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1544     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1545   }
1546 
1547   if (op->LinearAssembleAddDiagonal) {
1548     // Backend version
1549     CeedCall(op->LinearAssembleAddDiagonal(op, assembled, request));
1550     return CEED_ERROR_SUCCESS;
1551   } else {
1552     // Operator fallback
1553     CeedOperator op_fallback;
1554 
1555     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1556     if (op_fallback) {
1557       CeedCall(CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request));
1558       return CEED_ERROR_SUCCESS;
1559     }
1560   }
1561   // Default interface implementation
1562   if (is_composite) {
1563     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, false, assembled));
1564   } else {
1565     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, false, assembled));
1566   }
1567 
1568   return CEED_ERROR_SUCCESS;
1569 }
1570 
1571 /**
1572   @brief Assemble the point block diagonal of a square linear CeedOperator
1573 
1574   This overwrites a CeedVector with the point block diagonal of a linear CeedOperator.
1575 
1576   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1577 
1578   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1579 
1580   @param[in]  op        CeedOperator to assemble CeedQFunction
1581   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1582 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1583 component in].
1584   @param[in]  request   Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1585 
1586   @return An error code: 0 - success, otherwise - failure
1587 
1588   @ref User
1589 **/
1590 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1591   bool is_composite;
1592   CeedCall(CeedOperatorCheckReady(op));
1593   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1594 
1595   CeedSize input_size = 0, output_size = 0;
1596   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1597   if (input_size != output_size) {
1598     // LCOV_EXCL_START
1599     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1600     // LCOV_EXCL_STOP
1601   }
1602 
1603   // Early exit for empty operator
1604   if (!is_composite) {
1605     CeedInt num_elem = 0;
1606 
1607     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1608     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1609   }
1610 
1611   if (op->LinearAssemblePointBlockDiagonal) {
1612     // Backend version
1613     CeedCall(op->LinearAssemblePointBlockDiagonal(op, assembled, request));
1614     return CEED_ERROR_SUCCESS;
1615   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1616     // Backend version with zeroing first
1617     CeedCall(CeedVectorSetValue(assembled, 0.0));
1618     CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1619     return CEED_ERROR_SUCCESS;
1620   } else {
1621     // Operator fallback
1622     CeedOperator op_fallback;
1623 
1624     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1625     if (op_fallback) {
1626       CeedCall(CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled, request));
1627       return CEED_ERROR_SUCCESS;
1628     }
1629   }
1630   // Default interface implementation
1631   CeedCall(CeedVectorSetValue(assembled, 0.0));
1632   CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request));
1633 
1634   return CEED_ERROR_SUCCESS;
1635 }
1636 
1637 /**
1638   @brief Assemble the point block diagonal of a square linear CeedOperator
1639 
1640   This sums into a CeedVector with the point block diagonal of a linear CeedOperator.
1641 
1642   Note: Currently only non-composite CeedOperators with a single field and composite CeedOperators with single field sub-operators are supported.
1643 
1644   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1645 
1646   @param[in]  op        CeedOperator to assemble CeedQFunction
1647   @param[out] assembled CeedVector to store assembled CeedOperator point block diagonal, provided in row-major form with an @a num_comp * @a num_comp
1648 block at each node. The dimensions of this vector are derived from the active vector for the CeedOperator. The array has shape [nodes, component out,
1649 component in].
1650   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
1651 
1652   @return An error code: 0 - success, otherwise - failure
1653 
1654   @ref User
1655 **/
1656 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, CeedVector assembled, CeedRequest *request) {
1657   bool is_composite;
1658   CeedCall(CeedOperatorCheckReady(op));
1659   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1660 
1661   CeedSize input_size = 0, output_size = 0;
1662   CeedCall(CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size));
1663   if (input_size != output_size) {
1664     // LCOV_EXCL_START
1665     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1666     // LCOV_EXCL_STOP
1667   }
1668 
1669   // Early exit for empty operator
1670   if (!is_composite) {
1671     CeedInt num_elem = 0;
1672 
1673     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1674     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1675   }
1676 
1677   if (op->LinearAssembleAddPointBlockDiagonal) {
1678     // Backend version
1679     CeedCall(op->LinearAssembleAddPointBlockDiagonal(op, assembled, request));
1680     return CEED_ERROR_SUCCESS;
1681   } else {
1682     // Operator fallback
1683     CeedOperator op_fallback;
1684 
1685     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1686     if (op_fallback) {
1687       CeedCall(CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled, request));
1688       return CEED_ERROR_SUCCESS;
1689     }
1690   }
1691   // Default interface implementation
1692   if (is_composite) {
1693     CeedCall(CeedCompositeOperatorLinearAssembleAddDiagonal(op, request, true, assembled));
1694   } else {
1695     CeedCall(CeedSingleOperatorAssembleAddDiagonal_Core(op, request, true, assembled));
1696   }
1697 
1698   return CEED_ERROR_SUCCESS;
1699 }
1700 
1701 /**
1702    @brief Fully assemble the nonzero pattern of a linear operator.
1703 
1704    Expected to be used in conjunction with CeedOperatorLinearAssemble().
1705 
1706    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1707 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the number of entries and their (i, j)
1708 locations, while CeedOperatorLinearAssemble() provides the values in the same ordering.
1709 
1710    This will generally be slow unless your operator is low-order.
1711 
1712    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1713 
1714    @param[in]  op          CeedOperator to assemble
1715    @param[out] num_entries Number of entries in coordinate nonzero pattern
1716    @param[out] rows        Row number for each entry
1717    @param[out] cols        Column number for each entry
1718 
1719    @ref User
1720 **/
1721 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries, CeedInt **rows, CeedInt **cols) {
1722   CeedInt       num_suboperators, single_entries;
1723   CeedOperator *sub_operators;
1724   bool          is_composite;
1725   CeedCall(CeedOperatorCheckReady(op));
1726   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1727 
1728   if (op->LinearAssembleSymbolic) {
1729     // Backend version
1730     CeedCall(op->LinearAssembleSymbolic(op, num_entries, rows, cols));
1731     return CEED_ERROR_SUCCESS;
1732   } else {
1733     // Operator fallback
1734     CeedOperator op_fallback;
1735 
1736     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1737     if (op_fallback) {
1738       CeedCall(CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols));
1739       return CEED_ERROR_SUCCESS;
1740     }
1741   }
1742 
1743   // Default interface implementation
1744 
1745   // count entries and allocate rows, cols arrays
1746   *num_entries = 0;
1747   if (is_composite) {
1748     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1749     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1750     for (CeedInt k = 0; k < num_suboperators; ++k) {
1751       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1752       *num_entries += single_entries;
1753     }
1754   } else {
1755     CeedCall(CeedSingleOperatorAssemblyCountEntries(op, &single_entries));
1756     *num_entries += single_entries;
1757   }
1758   CeedCall(CeedCalloc(*num_entries, rows));
1759   CeedCall(CeedCalloc(*num_entries, cols));
1760 
1761   // assemble nonzero locations
1762   CeedInt offset = 0;
1763   if (is_composite) {
1764     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1765     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1766     for (CeedInt k = 0; k < num_suboperators; ++k) {
1767       CeedCall(CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows, *cols));
1768       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1769       offset += single_entries;
1770     }
1771   } else {
1772     CeedCall(CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols));
1773   }
1774 
1775   return CEED_ERROR_SUCCESS;
1776 }
1777 
1778 /**
1779    @brief Fully assemble the nonzero entries of a linear operator.
1780 
1781    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic().
1782 
1783    The assembly routines use coordinate format, with num_entries tuples of the form (i, j, value) which indicate that value should be added to the
1784 matrix in entry (i, j). Note that the (i, j) pairs are not unique and may repeat. This function returns the values of the nonzero entries to be added,
1785 their (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1786 
1787    This will generally be slow unless your operator is low-order.
1788 
1789    Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1790 
1791    @param[in]  op     CeedOperator to assemble
1792    @param[out] values Values to assemble into matrix
1793 
1794    @ref User
1795 **/
1796 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1797   CeedInt       num_suboperators, single_entries = 0;
1798   CeedOperator *sub_operators;
1799   bool          is_composite;
1800   CeedCall(CeedOperatorCheckReady(op));
1801   CeedCall(CeedOperatorIsComposite(op, &is_composite));
1802 
1803   // Early exit for empty operator
1804   if (!is_composite) {
1805     CeedInt num_elem = 0;
1806 
1807     CeedCall(CeedOperatorGetNumElements(op, &num_elem));
1808     if (num_elem == 0) return CEED_ERROR_SUCCESS;
1809   }
1810 
1811   if (op->LinearAssemble) {
1812     // Backend version
1813     CeedCall(op->LinearAssemble(op, values));
1814     return CEED_ERROR_SUCCESS;
1815   } else {
1816     // Operator fallback
1817     CeedOperator op_fallback;
1818 
1819     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
1820     if (op_fallback) {
1821       CeedCall(CeedOperatorLinearAssemble(op_fallback, values));
1822       return CEED_ERROR_SUCCESS;
1823     }
1824   }
1825 
1826   // Default interface implementation
1827   CeedInt offset = 0;
1828   if (is_composite) {
1829     CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1830     CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1831     for (CeedInt k = 0; k < num_suboperators; k++) {
1832       CeedCall(CeedSingleOperatorAssemble(sub_operators[k], offset, values));
1833       CeedCall(CeedSingleOperatorAssemblyCountEntries(sub_operators[k], &single_entries));
1834       offset += single_entries;
1835     }
1836   } else {
1837     CeedCall(CeedSingleOperatorAssemble(op, offset, values));
1838   }
1839 
1840   return CEED_ERROR_SUCCESS;
1841 }
1842 
1843 /**
1844   @brief Get the multiplicity of nodes across suboperators in a composite CeedOperator
1845 
1846   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
1847 
1848   @param[in]  op               Composite CeedOperator
1849   @param[in]  num_skip_indices Number of suboperators to skip
1850   @param[in]  skip_indices     Array of indices of suboperators to skip
1851   @param[out] mult             Vector to store multiplicity (of size l_size)
1852 
1853   @return An error code: 0 - success, otherwise - failure
1854 
1855   @ref User
1856 **/
1857 int CeedCompositeOperatorGetMultiplicity(CeedOperator op, CeedInt num_skip_indices, CeedInt *skip_indices, CeedVector mult) {
1858   CeedCall(CeedOperatorCheckReady(op));
1859 
1860   Ceed                ceed;
1861   CeedInt             num_suboperators;
1862   CeedSize            l_vec_len;
1863   CeedScalar         *mult_array;
1864   CeedVector          ones_l_vec;
1865   CeedElemRestriction elem_restr;
1866   CeedOperator       *sub_operators;
1867 
1868   CeedCall(CeedOperatorGetCeed(op, &ceed));
1869 
1870   // Zero mult vector
1871   CeedCall(CeedVectorSetValue(mult, 0.0));
1872 
1873   // Get suboperators
1874   CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
1875   CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
1876   if (num_suboperators == 0) return CEED_ERROR_SUCCESS;
1877 
1878   // Work vector
1879   CeedCall(CeedVectorGetLength(mult, &l_vec_len));
1880   CeedCall(CeedVectorCreate(ceed, l_vec_len, &ones_l_vec));
1881   CeedCall(CeedVectorSetValue(ones_l_vec, 1.0));
1882   CeedCall(CeedVectorGetArray(mult, CEED_MEM_HOST, &mult_array));
1883 
1884   // Compute multiplicity across suboperators
1885   for (CeedInt i = 0; i < num_suboperators; i++) {
1886     const CeedScalar *sub_mult_array;
1887     CeedVector        sub_mult_l_vec, ones_e_vec;
1888 
1889     // -- Check for suboperator to skip
1890     for (CeedInt j = 0; j < num_skip_indices; j++) {
1891       if (skip_indices[j] == i) continue;
1892     }
1893 
1894     // -- Sub operator multiplicity
1895     CeedCall(CeedOperatorGetActiveElemRestriction(sub_operators[i], &elem_restr));
1896     CeedCall(CeedElemRestrictionCreateVector(elem_restr, &sub_mult_l_vec, &ones_e_vec));
1897     CeedCall(CeedVectorSetValue(sub_mult_l_vec, 0.0));
1898     CeedCall(CeedElemRestrictionApply(elem_restr, CEED_NOTRANSPOSE, ones_l_vec, ones_e_vec, CEED_REQUEST_IMMEDIATE));
1899     CeedCall(CeedElemRestrictionApply(elem_restr, CEED_TRANSPOSE, ones_e_vec, sub_mult_l_vec, CEED_REQUEST_IMMEDIATE));
1900     CeedCall(CeedVectorGetArrayRead(sub_mult_l_vec, CEED_MEM_HOST, &sub_mult_array));
1901     // ---- Flag every node present in the current suboperator
1902     for (CeedInt j = 0; j < l_vec_len; j++) {
1903       if (sub_mult_array[j] > 0.0) mult_array[j] += 1.0;
1904     }
1905     CeedCall(CeedVectorRestoreArrayRead(sub_mult_l_vec, &sub_mult_array));
1906     CeedCall(CeedVectorDestroy(&sub_mult_l_vec));
1907     CeedCall(CeedVectorDestroy(&ones_e_vec));
1908   }
1909   CeedCall(CeedVectorRestoreArray(mult, &mult_array));
1910   CeedCall(CeedVectorDestroy(&ones_l_vec));
1911 
1912   return CEED_ERROR_SUCCESS;
1913 }
1914 
1915 /**
1916   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator, creating the prolongation basis from the fine and coarse
1917 grid interpolation
1918 
1919   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1920 
1921   @param[in]  op_fine      Fine grid operator
1922   @param[in]  p_mult_fine  L-vector multiplicity in parallel gather/scatter
1923   @param[in]  rstr_coarse  Coarse grid restriction
1924   @param[in]  basis_coarse Coarse grid active vector basis
1925   @param[out] op_coarse    Coarse grid operator
1926   @param[out] op_prolong   Coarse to fine operator
1927   @param[out] op_restrict  Fine to coarse operator
1928 
1929   @return An error code: 0 - success, otherwise - failure
1930 
1931   @ref User
1932 **/
1933 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1934                                      CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict) {
1935   CeedCall(CeedOperatorCheckReady(op_fine));
1936 
1937   // Build prolongation matrix
1938   CeedBasis basis_fine, basis_c_to_f;
1939   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1940   CeedCall(CeedBasisCreateProjection(basis_coarse, basis_fine, &basis_c_to_f));
1941 
1942   // Core code
1943   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
1944 
1945   return CEED_ERROR_SUCCESS;
1946 }
1947 
1948 /**
1949   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a tensor basis for the active basis
1950 
1951   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
1952 
1953   @param[in]  op_fine       Fine grid operator
1954   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
1955   @param[in]  rstr_coarse   Coarse grid restriction
1956   @param[in]  basis_coarse  Coarse grid active vector basis
1957   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
1958   @param[out] op_coarse     Coarse grid operator
1959   @param[out] op_prolong    Coarse to fine operator
1960   @param[out] op_restrict   Fine to coarse operator
1961 
1962   @return An error code: 0 - success, otherwise - failure
1963 
1964   @ref User
1965 **/
1966 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1967                                              const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
1968                                              CeedOperator *op_restrict) {
1969   CeedCall(CeedOperatorCheckReady(op_fine));
1970   Ceed ceed;
1971   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
1972 
1973   // Check for compatible quadrature spaces
1974   CeedBasis basis_fine;
1975   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
1976   CeedInt Q_f, Q_c;
1977   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
1978   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
1979   if (Q_f != Q_c) {
1980     // LCOV_EXCL_START
1981     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
1982     // LCOV_EXCL_STOP
1983   }
1984 
1985   // Coarse to fine basis
1986   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
1987   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
1988   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
1989   CeedCall(CeedBasisGetNumNodes1D(basis_fine, &P_1d_f));
1990   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
1991   P_1d_c = dim == 1 ? num_nodes_c : dim == 2 ? sqrt(num_nodes_c) : cbrt(num_nodes_c);
1992   CeedScalar *q_ref, *q_weight, *grad;
1993   CeedCall(CeedCalloc(P_1d_f, &q_ref));
1994   CeedCall(CeedCalloc(P_1d_f, &q_weight));
1995   CeedCall(CeedCalloc(P_1d_f * P_1d_c * dim, &grad));
1996   CeedBasis basis_c_to_f;
1997   CeedCall(CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
1998   CeedCall(CeedFree(&q_ref));
1999   CeedCall(CeedFree(&q_weight));
2000   CeedCall(CeedFree(&grad));
2001 
2002   // Core code
2003   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2004   return CEED_ERROR_SUCCESS;
2005 }
2006 
2007 /**
2008   @brief Create a multigrid coarse operator and level transfer operators for a CeedOperator with a non-tensor basis for the active vector
2009 
2010   Note: Calling this function asserts that setup is complete and sets all four CeedOperators as immutable.
2011 
2012   @param[in]  op_fine       Fine grid operator
2013   @param[in]  p_mult_fine   L-vector multiplicity in parallel gather/scatter
2014   @param[in]  rstr_coarse   Coarse grid restriction
2015   @param[in]  basis_coarse  Coarse grid active vector basis
2016   @param[in]  interp_c_to_f Matrix for coarse to fine interpolation
2017   @param[out] op_coarse     Coarse grid operator
2018   @param[out] op_prolong    Coarse to fine operator
2019   @param[out] op_restrict   Fine to coarse operator
2020 
2021   @return An error code: 0 - success, otherwise - failure
2022 
2023   @ref User
2024 **/
2025 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2026                                        const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
2027                                        CeedOperator *op_restrict) {
2028   CeedCall(CeedOperatorCheckReady(op_fine));
2029   Ceed ceed;
2030   CeedCall(CeedOperatorGetCeed(op_fine, &ceed));
2031 
2032   // Check for compatible quadrature spaces
2033   CeedBasis basis_fine;
2034   CeedCall(CeedOperatorGetActiveBasis(op_fine, &basis_fine));
2035   CeedInt Q_f, Q_c;
2036   CeedCall(CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f));
2037   CeedCall(CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c));
2038   if (Q_f != Q_c) {
2039     // LCOV_EXCL_START
2040     return CeedError(ceed, CEED_ERROR_DIMENSION, "Bases must have compatible quadrature spaces");
2041     // LCOV_EXCL_STOP
2042   }
2043 
2044   // Coarse to fine basis
2045   CeedElemTopology topo;
2046   CeedCall(CeedBasisGetTopology(basis_fine, &topo));
2047   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2048   CeedCall(CeedBasisGetDimension(basis_fine, &dim));
2049   CeedCall(CeedBasisGetNumComponents(basis_fine, &num_comp));
2050   CeedCall(CeedBasisGetNumNodes(basis_fine, &num_nodes_f));
2051   CeedCall(CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c));
2052   CeedScalar *q_ref, *q_weight, *grad;
2053   CeedCall(CeedCalloc(num_nodes_f * dim, &q_ref));
2054   CeedCall(CeedCalloc(num_nodes_f, &q_weight));
2055   CeedCall(CeedCalloc(num_nodes_f * num_nodes_c * dim, &grad));
2056   CeedBasis basis_c_to_f;
2057   CeedCall(CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f, interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f));
2058   CeedCall(CeedFree(&q_ref));
2059   CeedCall(CeedFree(&q_weight));
2060   CeedCall(CeedFree(&grad));
2061 
2062   // Core code
2063   CeedCall(CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse, basis_coarse, basis_c_to_f, op_coarse, op_prolong, op_restrict));
2064   return CEED_ERROR_SUCCESS;
2065 }
2066 
2067 /**
2068   @brief Build a FDM based approximate inverse for each element for a CeedOperator
2069 
2070   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization Method based approximate inverse.
2071     This function obtains the simultaneous diagonalization for the 1D mass and Laplacian operators, M = V^T V, K = V^T S V.
2072     The assembled QFunction is used to modify the eigenvalues from simultaneous diagonalization and obtain an approximate inverse of the form V^T
2073 S^hat V. The CeedOperator must be linear and non-composite. The associated CeedQFunction must therefore also be linear.
2074 
2075   Note: Calling this function asserts that setup is complete and sets the CeedOperator as immutable.
2076 
2077   @param[in]  op      CeedOperator to create element inverses
2078   @param[out] fdm_inv CeedOperator to apply the action of a FDM based inverse for each element
2079   @param[in]  request Address of CeedRequest for non-blocking completion, else @ref CEED_REQUEST_IMMEDIATE
2080 
2081   @return An error code: 0 - success, otherwise - failure
2082 
2083   @ref User
2084 **/
2085 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv, CeedRequest *request) {
2086   CeedCall(CeedOperatorCheckReady(op));
2087 
2088   if (op->CreateFDMElementInverse) {
2089     // Backend version
2090     CeedCall(op->CreateFDMElementInverse(op, fdm_inv, request));
2091     return CEED_ERROR_SUCCESS;
2092   } else {
2093     // Operator fallback
2094     CeedOperator op_fallback;
2095 
2096     CeedCall(CeedOperatorGetFallback(op, &op_fallback));
2097     if (op_fallback) {
2098       CeedCall(CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request));
2099       return CEED_ERROR_SUCCESS;
2100     }
2101   }
2102 
2103   // Default interface implementation
2104   Ceed ceed, ceed_parent;
2105   CeedCall(CeedOperatorGetCeed(op, &ceed));
2106   CeedCall(CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent));
2107   ceed_parent = ceed_parent ? ceed_parent : ceed;
2108   CeedQFunction qf;
2109   CeedCall(CeedOperatorGetQFunction(op, &qf));
2110 
2111   // Determine active input basis
2112   bool                interp = false, grad = false;
2113   CeedBasis           basis = NULL;
2114   CeedElemRestriction rstr  = NULL;
2115   CeedOperatorField  *op_fields;
2116   CeedQFunctionField *qf_fields;
2117   CeedInt             num_input_fields;
2118   CeedCall(CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL));
2119   CeedCall(CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL));
2120   for (CeedInt i = 0; i < num_input_fields; i++) {
2121     CeedVector vec;
2122     CeedCall(CeedOperatorFieldGetVector(op_fields[i], &vec));
2123     if (vec == CEED_VECTOR_ACTIVE) {
2124       CeedEvalMode eval_mode;
2125       CeedCall(CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode));
2126       interp = interp || eval_mode == CEED_EVAL_INTERP;
2127       grad   = grad || eval_mode == CEED_EVAL_GRAD;
2128       CeedCall(CeedOperatorFieldGetBasis(op_fields[i], &basis));
2129       CeedCall(CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr));
2130     }
2131   }
2132   if (!basis) {
2133     // LCOV_EXCL_START
2134     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2135     // LCOV_EXCL_STOP
2136   }
2137   CeedSize l_size = 1;
2138   CeedInt  P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2139   CeedCall(CeedBasisGetNumNodes1D(basis, &P_1d));
2140   CeedCall(CeedBasisGetNumNodes(basis, &elem_size));
2141   CeedCall(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
2142   CeedCall(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
2143   CeedCall(CeedBasisGetDimension(basis, &dim));
2144   CeedCall(CeedBasisGetNumComponents(basis, &num_comp));
2145   CeedCall(CeedElemRestrictionGetNumElements(rstr, &num_elem));
2146   CeedCall(CeedElemRestrictionGetLVectorSize(rstr, &l_size));
2147 
2148   // Build and diagonalize 1D Mass and Laplacian
2149   bool tensor_basis;
2150   CeedCall(CeedBasisIsTensor(basis, &tensor_basis));
2151   if (!tensor_basis) {
2152     // LCOV_EXCL_START
2153     return CeedError(ceed, CEED_ERROR_BACKEND, "FDMElementInverse only supported for tensor bases");
2154     // LCOV_EXCL_STOP
2155   }
2156   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2157   CeedCall(CeedCalloc(P_1d * P_1d, &mass));
2158   CeedCall(CeedCalloc(P_1d * P_1d, &laplace));
2159   CeedCall(CeedCalloc(P_1d * P_1d, &x));
2160   CeedCall(CeedCalloc(P_1d * P_1d, &fdm_interp));
2161   CeedCall(CeedCalloc(P_1d, &lambda));
2162   // -- Build matrices
2163   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2164   CeedCall(CeedBasisGetInterp1D(basis, &interp_1d));
2165   CeedCall(CeedBasisGetGrad1D(basis, &grad_1d));
2166   CeedCall(CeedBasisGetQWeights(basis, &q_weight_1d));
2167   CeedCall(CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim, mass, laplace));
2168 
2169   // -- Diagonalize
2170   CeedCall(CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d));
2171   CeedCall(CeedFree(&mass));
2172   CeedCall(CeedFree(&laplace));
2173   for (CeedInt i = 0; i < P_1d; i++) {
2174     for (CeedInt j = 0; j < P_1d; j++) fdm_interp[i + j * P_1d] = x[j + i * P_1d];
2175   }
2176   CeedCall(CeedFree(&x));
2177 
2178   // Assemble QFunction
2179   CeedVector          assembled;
2180   CeedElemRestriction rstr_qf;
2181   CeedCall(CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled, &rstr_qf, request));
2182   CeedInt layout[3];
2183   CeedCall(CeedElemRestrictionGetELayout(rstr_qf, &layout));
2184   CeedCall(CeedElemRestrictionDestroy(&rstr_qf));
2185   CeedScalar max_norm = 0;
2186   CeedCall(CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm));
2187 
2188   // Calculate element averages
2189   CeedInt           num_modes = (interp ? 1 : 0) + (grad ? dim : 0);
2190   CeedScalar       *elem_avg;
2191   const CeedScalar *assembled_array, *q_weight_array;
2192   CeedVector        q_weight;
2193   CeedCall(CeedVectorCreate(ceed_parent, num_qpts, &q_weight));
2194   CeedCall(CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_weight));
2195   CeedCall(CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array));
2196   CeedCall(CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array));
2197   CeedCall(CeedCalloc(num_elem, &elem_avg));
2198   const CeedScalar qf_value_bound = max_norm * 100 * CEED_EPSILON;
2199   for (CeedInt e = 0; e < num_elem; e++) {
2200     CeedInt count = 0;
2201     for (CeedInt q = 0; q < num_qpts; q++) {
2202       for (CeedInt i = 0; i < num_comp * num_comp * num_modes * num_modes; i++) {
2203         if (fabs(assembled_array[q * layout[0] + i * layout[1] + e * layout[2]]) > qf_value_bound) {
2204           elem_avg[e] += assembled_array[q * layout[0] + i * layout[1] + e * layout[2]] / q_weight_array[q];
2205           count++;
2206         }
2207       }
2208     }
2209     if (count) {
2210       elem_avg[e] /= count;
2211     } else {
2212       elem_avg[e] = 1.0;
2213     }
2214   }
2215   CeedCall(CeedVectorRestoreArrayRead(assembled, &assembled_array));
2216   CeedCall(CeedVectorDestroy(&assembled));
2217   CeedCall(CeedVectorRestoreArrayRead(q_weight, &q_weight_array));
2218   CeedCall(CeedVectorDestroy(&q_weight));
2219 
2220   // Build FDM diagonal
2221   CeedVector  q_data;
2222   CeedScalar *q_data_array, *fdm_diagonal;
2223   CeedCall(CeedCalloc(num_comp * elem_size, &fdm_diagonal));
2224   const CeedScalar fdm_diagonal_bound = elem_size * CEED_EPSILON;
2225   for (CeedInt c = 0; c < num_comp; c++) {
2226     for (CeedInt n = 0; n < elem_size; n++) {
2227       if (interp) fdm_diagonal[c * elem_size + n] = 1.0;
2228       if (grad) {
2229         for (CeedInt d = 0; d < dim; d++) {
2230           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2231           fdm_diagonal[c * elem_size + n] += lambda[i];
2232         }
2233       }
2234       if (fabs(fdm_diagonal[c * elem_size + n]) < fdm_diagonal_bound) fdm_diagonal[c * elem_size + n] = fdm_diagonal_bound;
2235     }
2236   }
2237   CeedCall(CeedVectorCreate(ceed_parent, num_elem * num_comp * elem_size, &q_data));
2238   CeedCall(CeedVectorSetValue(q_data, 0.0));
2239   CeedCall(CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array));
2240   for (CeedInt e = 0; e < num_elem; e++) {
2241     for (CeedInt c = 0; c < num_comp; c++) {
2242       for (CeedInt n = 0; n < elem_size; n++) q_data_array[(e * num_comp + c) * elem_size + n] = 1. / (elem_avg[e] * fdm_diagonal[c * elem_size + n]);
2243     }
2244   }
2245   CeedCall(CeedFree(&elem_avg));
2246   CeedCall(CeedFree(&fdm_diagonal));
2247   CeedCall(CeedVectorRestoreArray(q_data, &q_data_array));
2248 
2249   // Setup FDM operator
2250   // -- Basis
2251   CeedBasis   fdm_basis;
2252   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2253   CeedCall(CeedCalloc(P_1d * P_1d, &grad_dummy));
2254   CeedCall(CeedCalloc(P_1d, &q_ref_dummy));
2255   CeedCall(CeedCalloc(P_1d, &q_weight_dummy));
2256   CeedCall(CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d, fdm_interp, grad_dummy, q_ref_dummy, q_weight_dummy, &fdm_basis));
2257   CeedCall(CeedFree(&fdm_interp));
2258   CeedCall(CeedFree(&grad_dummy));
2259   CeedCall(CeedFree(&q_ref_dummy));
2260   CeedCall(CeedFree(&q_weight_dummy));
2261   CeedCall(CeedFree(&lambda));
2262 
2263   // -- Restriction
2264   CeedElemRestriction rstr_qd_i;
2265   CeedInt             strides[3] = {1, elem_size, elem_size * num_comp};
2266   CeedCall(CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size, num_comp, num_elem * num_comp * elem_size, strides, &rstr_qd_i));
2267   // -- QFunction
2268   CeedQFunction qf_fdm;
2269   CeedCall(CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm));
2270   CeedCall(CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP));
2271   CeedCall(CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE));
2272   CeedCall(CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP));
2273   CeedCall(CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp));
2274   // -- QFunction context
2275   CeedInt *num_comp_data;
2276   CeedCall(CeedCalloc(1, &num_comp_data));
2277   num_comp_data[0] = num_comp;
2278   CeedQFunctionContext ctx_fdm;
2279   CeedCall(CeedQFunctionContextCreate(ceed, &ctx_fdm));
2280   CeedCall(CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER, sizeof(*num_comp_data), num_comp_data));
2281   CeedCall(CeedQFunctionSetContext(qf_fdm, ctx_fdm));
2282   CeedCall(CeedQFunctionContextDestroy(&ctx_fdm));
2283   // -- Operator
2284   CeedCall(CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv));
2285   CeedCall(CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2286   CeedCall(CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED, q_data));
2287   CeedCall(CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE));
2288 
2289   // Cleanup
2290   CeedCall(CeedVectorDestroy(&q_data));
2291   CeedCall(CeedBasisDestroy(&fdm_basis));
2292   CeedCall(CeedElemRestrictionDestroy(&rstr_qd_i));
2293   CeedCall(CeedQFunctionDestroy(&qf_fdm));
2294 
2295   return CEED_ERROR_SUCCESS;
2296 }
2297 
2298 /// @}
2299