xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision 067fd99f3dc9013a4cd6c57af3402ece8c168174)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed/ceed.h>
9 #include <ceed/backend.h>
10 #include <ceed-impl.h>
11 #include <math.h>
12 #include <assert.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 /// @file
18 /// Implementation of CeedOperator preconditioning interfaces
19 
20 /// ----------------------------------------------------------------------------
21 /// CeedOperator Library Internal Preconditioning Functions
22 /// ----------------------------------------------------------------------------
23 /// @addtogroup CeedOperatorDeveloper
24 /// @{
25 
26 /**
27   @brief Duplicate a CeedQFunction with a reference Ceed to fallback for advanced
28          CeedOperator functionality
29 
30   @param[in] fallback_ceed Ceed on which to create fallback CeedQFunction
31   @param[in] qf            CeedQFunction to create fallback for
32   @param[out] qf_fallback  fallback CeedQFunction
33 
34   @return An error code: 0 - success, otherwise - failure
35 
36   @ref Developer
37 **/
38 static int CeedQFunctionCreateFallback(Ceed fallback_ceed, CeedQFunction qf,
39                                        CeedQFunction *qf_fallback) {
40   int ierr;
41 
42   // Check if NULL qf passed in
43   if (!qf) return CEED_ERROR_SUCCESS;
44 
45   CeedDebug256(qf->ceed, 1, "---------- CeedOperator Fallback ----------\n");
46   CeedDebug256(qf->ceed, 255, "Creating fallback CeedQFunction\n");
47 
48   char *source_path_with_name = "";
49   if (qf->source_path) {
50     size_t path_len = strlen(qf->source_path),
51            name_len = strlen(qf->kernel_name);
52     ierr = CeedCalloc(path_len + name_len + 2, &source_path_with_name);
53     CeedChk(ierr);
54     memcpy(source_path_with_name, qf->source_path, path_len);
55     memcpy(&source_path_with_name[path_len], ":", 1);
56     memcpy(&source_path_with_name[path_len + 1], qf->kernel_name, name_len);
57   } else {
58     ierr = CeedCalloc(1, &source_path_with_name); CeedChk(ierr);
59   }
60 
61   ierr = CeedQFunctionCreateInterior(fallback_ceed, qf->vec_length,
62                                      qf->function, source_path_with_name,
63                                      qf_fallback); CeedChk(ierr);
64   {
65     CeedQFunctionContext ctx;
66 
67     ierr = CeedQFunctionGetContext(qf, &ctx); CeedChk(ierr);
68     ierr = CeedQFunctionSetContext(*qf_fallback, ctx); CeedChk(ierr);
69   }
70   for (CeedInt i = 0; i < qf->num_input_fields; i++) {
71     ierr = CeedQFunctionAddInput(*qf_fallback, qf->input_fields[i]->field_name,
72                                  qf->input_fields[i]->size,
73                                  qf->input_fields[i]->eval_mode); CeedChk(ierr);
74   }
75   for (CeedInt i = 0; i < qf->num_output_fields; i++) {
76     ierr = CeedQFunctionAddOutput(*qf_fallback, qf->output_fields[i]->field_name,
77                                   qf->output_fields[i]->size,
78                                   qf->output_fields[i]->eval_mode); CeedChk(ierr);
79   }
80   ierr = CeedFree(&source_path_with_name); CeedChk(ierr);
81 
82   return CEED_ERROR_SUCCESS;
83 }
84 
85 /**
86   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced
87          CeedOperator functionality
88 
89   @param op  CeedOperator to create fallback for
90 
91   @return An error code: 0 - success, otherwise - failure
92 
93   @ref Developer
94 **/
95 static int CeedOperatorCreateFallback(CeedOperator op) {
96   int ierr;
97   Ceed ceed_fallback;
98 
99   // Check not already created
100   if (op->op_fallback) return CEED_ERROR_SUCCESS;
101 
102   // Fallback Ceed
103   ierr = CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback); CeedChk(ierr);
104   if (!ceed_fallback) return CEED_ERROR_SUCCESS;
105 
106   CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
107   CeedDebug256(op->ceed, 255, "Creating fallback CeedOperator\n");
108 
109   // Clone Op
110   CeedOperator op_fallback;
111   if (op->is_composite) {
112     ierr = CeedCompositeOperatorCreate(ceed_fallback, &op_fallback);
113     CeedChk(ierr);
114     for (CeedInt i = 0; i < op->num_suboperators; i++) {
115       CeedOperator op_sub_fallback;
116 
117       ierr = CeedOperatorGetFallback(op->sub_operators[i], &op_sub_fallback);
118       CeedChk(ierr);
119       ierr = CeedCompositeOperatorAddSub(op_fallback, op_sub_fallback); CeedChk(ierr);
120     }
121   } else {
122     CeedQFunction qf_fallback = NULL, dqf_fallback = NULL, dqfT_fallback = NULL;
123     ierr = CeedQFunctionCreateFallback(ceed_fallback, op->qf, &qf_fallback);
124     CeedChk(ierr);
125     ierr = CeedQFunctionCreateFallback(ceed_fallback, op->dqf, &dqf_fallback);
126     CeedChk(ierr);
127     ierr = CeedQFunctionCreateFallback(ceed_fallback, op->dqfT, &dqfT_fallback);
128     CeedChk(ierr);
129     ierr = CeedOperatorCreate(ceed_fallback, qf_fallback, dqf_fallback,
130                               dqfT_fallback, &op_fallback); CeedChk(ierr);
131     for (CeedInt i = 0; i < op->qf->num_input_fields; i++) {
132       ierr = CeedOperatorSetField(op_fallback, op->input_fields[i]->field_name,
133                                   op->input_fields[i]->elem_restr,
134                                   op->input_fields[i]->basis,
135                                   op->input_fields[i]->vec); CeedChk(ierr);
136     }
137     for (CeedInt i = 0; i < op->qf->num_output_fields; i++) {
138       ierr = CeedOperatorSetField(op_fallback, op->output_fields[i]->field_name,
139                                   op->output_fields[i]->elem_restr,
140                                   op->output_fields[i]->basis,
141                                   op->output_fields[i]->vec); CeedChk(ierr);
142     }
143     ierr = CeedQFunctionAssemblyDataReferenceCopy(op->qf_assembled,
144            &op_fallback->qf_assembled); CeedChk(ierr);
145     if (op_fallback->num_qpts == 0) {
146       ierr = CeedOperatorSetNumQuadraturePoints(op_fallback, op->num_qpts);
147       CeedChk(ierr);
148     }
149     // Cleanup
150     ierr = CeedQFunctionDestroy(&qf_fallback); CeedChk(ierr);
151     ierr = CeedQFunctionDestroy(&dqf_fallback); CeedChk(ierr);
152     ierr = CeedQFunctionDestroy(&dqfT_fallback); CeedChk(ierr);
153   }
154   ierr = CeedOperatorSetName(op_fallback, op->name); CeedChk(ierr);
155   ierr = CeedOperatorCheckReady(op_fallback); CeedChk(ierr);
156   op->op_fallback = op_fallback;
157 
158   return CEED_ERROR_SUCCESS;
159 }
160 
161 /**
162   @brief Retreive fallback CeedOperator with a reference Ceed for advanced CeedOperator functionality
163 
164   @param[in] op            CeedOperator to retrieve fallback for
165   @param[out] op_fallback  Fallback CeedOperator
166 
167   @return An error code: 0 - success, otherwise - failure
168 
169   @ref Developer
170 **/
171 int CeedOperatorGetFallback(CeedOperator op, CeedOperator *op_fallback) {
172   int ierr;
173 
174   // Create if needed
175   if (!op->op_fallback) {
176     ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
177   }
178   if (op->op_fallback) {
179     bool is_debug;
180 
181     ierr = CeedIsDebug(op->ceed, &is_debug); CeedChk(ierr);
182     if (is_debug) {
183       Ceed ceed_fallback;
184       const char *resource, *resource_fallback;
185 
186       ierr = CeedGetOperatorFallbackCeed(op->ceed, &ceed_fallback); CeedChk(ierr);
187       ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
188       ierr = CeedGetResource(ceed_fallback, &resource_fallback); CeedChk(ierr);
189 
190       CeedDebug256(op->ceed, 1, "---------- CeedOperator Fallback ----------\n");
191       CeedDebug256(op->ceed, 255,
192                    "Falling back from %s operator at address %ld to %s operator at address %ld\n",
193                    resource, op, resource_fallback, op->op_fallback);
194     }
195   }
196   *op_fallback = op->op_fallback;
197 
198   return CEED_ERROR_SUCCESS;
199 }
200 
201 /**
202   @brief Select correct basis matrix pointer based on CeedEvalMode
203 
204   @param[in] eval_mode   Current basis evaluation mode
205   @param[in] identity    Pointer to identity matrix
206   @param[in] interp      Pointer to interpolation matrix
207   @param[in] grad        Pointer to gradient matrix
208   @param[out] basis_ptr  Basis pointer to set
209 
210   @ref Developer
211 **/
212 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode,
213     const CeedScalar *identity, const CeedScalar *interp,
214     const CeedScalar *grad, const CeedScalar **basis_ptr) {
215   switch (eval_mode) {
216   case CEED_EVAL_NONE:
217     *basis_ptr = identity;
218     break;
219   case CEED_EVAL_INTERP:
220     *basis_ptr = interp;
221     break;
222   case CEED_EVAL_GRAD:
223     *basis_ptr = grad;
224     break;
225   case CEED_EVAL_WEIGHT:
226   case CEED_EVAL_DIV:
227   case CEED_EVAL_CURL:
228     break; // Caught by QF Assembly
229   }
230   assert(*basis_ptr != NULL);
231 }
232 
233 /**
234   @brief Create point block restriction for active operator field
235 
236   @param[in] rstr              Original CeedElemRestriction for active field
237   @param[out] pointblock_rstr  Address of the variable where the newly created
238                                  CeedElemRestriction will be stored
239 
240   @return An error code: 0 - success, otherwise - failure
241 
242   @ref Developer
243 **/
244 static int CeedOperatorCreateActivePointBlockRestriction(
245   CeedElemRestriction rstr,
246   CeedElemRestriction *pointblock_rstr) {
247   int ierr;
248   Ceed ceed;
249   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr);
250   const CeedInt *offsets;
251   ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets);
252   CeedChk(ierr);
253 
254   // Expand offsets
255   CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1,
256                                                       *pointblock_offsets;
257   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
258   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
259   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
260   ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr);
261   CeedInt shift = num_comp;
262   if (comp_stride != 1)
263     shift *= num_comp;
264   ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets);
265   CeedChk(ierr);
266   for (CeedInt i = 0; i < num_elem*elem_size; i++) {
267     pointblock_offsets[i] = offsets[i]*shift;
268     if (pointblock_offsets[i] > max)
269       max = pointblock_offsets[i];
270   }
271 
272   // Create new restriction
273   ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp,
274                                    1, max + num_comp*num_comp, CEED_MEM_HOST,
275                                    CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr);
276   CeedChk(ierr);
277 
278   // Cleanup
279   ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr);
280 
281   return CEED_ERROR_SUCCESS;
282 }
283 
284 /**
285   @brief Core logic for assembling operator diagonal or point block diagonal
286 
287   @param[in] op             CeedOperator to assemble point block diagonal
288   @param[in] request        Address of CeedRequest for non-blocking completion, else
289                               CEED_REQUEST_IMMEDIATE
290   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
291   @param[out] assembled     CeedVector to store assembled diagonal
292 
293   @return An error code: 0 - success, otherwise - failure
294 
295   @ref Developer
296 **/
297 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op,
298     CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
299   int ierr;
300   Ceed ceed;
301   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
302 
303   // Assemble QFunction
304   CeedQFunction qf;
305   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
306   CeedInt num_input_fields, num_output_fields;
307   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
308   CeedChk(ierr);
309   CeedVector assembled_qf;
310   CeedElemRestriction rstr;
311   ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op,  &assembled_qf,
312          &rstr, request); CeedChk(ierr);
313   CeedInt layout[3];
314   ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr);
315   ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr);
316   CeedScalar max_norm = 0;
317   ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
318 
319   // Get assembly data
320   CeedOperatorAssemblyData data;
321   ierr = CeedOperatorGetOperatorAssemblyData(op, &data); CeedChk(ierr);
322   const CeedEvalMode *eval_mode_in, *eval_mode_out;
323   CeedInt num_eval_mode_in, num_eval_mode_out;
324   ierr = CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in,
325          &eval_mode_in, &num_eval_mode_out, &eval_mode_out); CeedChk(ierr);
326   CeedBasis basis_in, basis_out;
327   ierr = CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out,
328                                           NULL); CeedChk(ierr);
329   CeedInt num_comp;
330   ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr);
331 
332   // Assemble point block diagonal restriction, if needed
333   CeedElemRestriction diag_rstr;
334   ierr = CeedOperatorGetActiveElemRestriction(op, &diag_rstr); CeedChk(ierr);
335   if (is_pointblock) {
336     CeedElemRestriction point_block_rstr;
337     ierr = CeedOperatorCreateActivePointBlockRestriction(diag_rstr,
338            &point_block_rstr);
339     CeedChk(ierr);
340     diag_rstr = point_block_rstr;
341   }
342 
343   // Create diagonal vector
344   CeedVector elem_diag;
345   ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag);
346   CeedChk(ierr);
347 
348   // Assemble element operator diagonals
349   CeedScalar *elem_diag_array;
350   const CeedScalar *assembled_qf_array;
351   ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr);
352   ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array);
353   CeedChk(ierr);
354   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
355   CeedChk(ierr);
356   CeedInt num_elem, num_nodes, num_qpts;
357   ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr);
358   ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr);
359   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
360 
361   // Basis matrices
362   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
363   CeedScalar *identity = NULL;
364   bool has_eval_none = false;
365   for (CeedInt i = 0; i < num_eval_mode_in; i++) {
366     has_eval_none = has_eval_none || (eval_mode_in[i] == CEED_EVAL_NONE);
367   }
368   for (CeedInt i = 0; i<num_eval_mode_out; i++) {
369     has_eval_none = has_eval_none || (eval_mode_out[i] == CEED_EVAL_NONE);
370   }
371   if (has_eval_none) {
372     ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr);
373     for (CeedInt i = 0; i < (num_nodes < num_qpts ? num_nodes : num_qpts); i++)
374       identity[i*num_nodes+i] = 1.0;
375   }
376   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
377   ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr);
378   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
379   ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr);
380   // Compute the diagonal of B^T D B
381   // Each element
382   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
383   for (CeedInt e=0; e<num_elem; e++) {
384     CeedInt d_out = -1;
385     // Each basis eval mode pair
386     for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) {
387       const CeedScalar *bt = NULL;
388       if (eval_mode_out[e_out] == CEED_EVAL_GRAD)
389         d_out += 1;
390       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out,
391                                   &grad_out[d_out*num_qpts*num_nodes], &bt);
392       CeedInt d_in = -1;
393       for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) {
394         const CeedScalar *b = NULL;
395         if (eval_mode_in[e_in] == CEED_EVAL_GRAD)
396           d_in += 1;
397         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in,
398                                     &grad_in[d_in*num_qpts*num_nodes], &b);
399         // Each component
400         for (CeedInt c_out=0; c_out<num_comp; c_out++)
401           // Each qpoint/node pair
402           for (CeedInt q=0; q<num_qpts; q++)
403             if (is_pointblock) {
404               // Point Block Diagonal
405               for (CeedInt c_in=0; c_in<num_comp; c_in++) {
406                 const CeedScalar qf_value =
407                   assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)*
408                                                      num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
409                 if (fabs(qf_value) > qf_value_bound)
410                   for (CeedInt n=0; n<num_nodes; n++)
411                     elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] +=
412                       bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
413               }
414             } else {
415               // Diagonal Only
416               const CeedScalar qf_value =
417                 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)*
418                                                    num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
419               if (fabs(qf_value) > qf_value_bound)
420                 for (CeedInt n=0; n<num_nodes; n++)
421                   elem_diag_array[(e*num_comp+c_out)*num_nodes+n] +=
422                     bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
423             }
424       }
425     }
426   }
427   ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr);
428   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
429   CeedChk(ierr);
430 
431   // Assemble local operator diagonal
432   ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag,
433                                   assembled, request); CeedChk(ierr);
434 
435   // Cleanup
436   if (is_pointblock) {
437     ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr);
438   }
439   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
440   ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr);
441   ierr = CeedFree(&identity); CeedChk(ierr);
442 
443   return CEED_ERROR_SUCCESS;
444 }
445 
446 /**
447   @brief Core logic for assembling composite operator diagonal
448 
449   @param[in] op             CeedOperator to assemble point block diagonal
450   @param[in] request        Address of CeedRequest for non-blocking completion, else
451                             CEED_REQUEST_IMMEDIATE
452   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
453   @param[out] assembled     CeedVector to store assembled diagonal
454 
455   @return An error code: 0 - success, otherwise - failure
456 
457   @ref Developer
458 **/
459 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(
460   CeedOperator op, CeedRequest *request, const bool is_pointblock,
461   CeedVector assembled) {
462   int ierr;
463   CeedInt num_sub;
464   CeedOperator *suboperators;
465   ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr);
466   ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr);
467   for (CeedInt i = 0; i < num_sub; i++) {
468     if (is_pointblock) {
469       ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(suboperators[i],
470              assembled, request); CeedChk(ierr);
471     } else {
472       ierr = CeedOperatorLinearAssembleAddDiagonal(suboperators[i], assembled,
473              request); CeedChk(ierr);
474     }
475   }
476   return CEED_ERROR_SUCCESS;
477 }
478 
479 /**
480   @brief Build nonzero pattern for non-composite operator
481 
482   Users should generally use CeedOperatorLinearAssembleSymbolic()
483 
484   @param[in] op      CeedOperator to assemble nonzero pattern
485   @param[in] offset  Offset for number of entries
486   @param[out] rows   Row number for each entry
487   @param[out] cols   Column number for each entry
488 
489   @return An error code: 0 - success, otherwise - failure
490 
491   @ref Developer
492 **/
493 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset,
494     CeedInt *rows, CeedInt *cols) {
495   int ierr;
496   Ceed ceed = op->ceed;
497   if (op->is_composite)
498     // LCOV_EXCL_START
499     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
500                      "Composite operator not supported");
501   // LCOV_EXCL_STOP
502 
503   CeedSize num_nodes;
504   ierr = CeedOperatorGetActiveVectorLengths(op, &num_nodes, NULL); CeedChk(ierr);
505   CeedElemRestriction rstr_in;
506   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr);
507   CeedInt num_elem, elem_size, num_comp;
508   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
509   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
510   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
511   CeedInt layout_er[3];
512   ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr);
513 
514   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
515 
516   // Determine elem_dof relation
517   CeedVector index_vec;
518   ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr);
519   CeedScalar *array;
520   ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr);
521   for (CeedInt i = 0; i < num_nodes; i++) array[i] = i;
522   ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr);
523   CeedVector elem_dof;
524   ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof);
525   CeedChk(ierr);
526   ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr);
527   CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec,
528                            elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
529   const CeedScalar *elem_dof_a;
530   ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a);
531   CeedChk(ierr);
532   ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr);
533 
534   // Determine i, j locations for element matrices
535   CeedInt count = 0;
536   for (CeedInt e = 0; e < num_elem; e++) {
537     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
538       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
539         for (CeedInt i = 0; i < elem_size; i++) {
540           for (CeedInt j = 0; j < elem_size; j++) {
541             const CeedInt elem_dof_index_row = i*layout_er[0] +
542                                                (comp_out)*layout_er[1] + e*layout_er[2];
543             const CeedInt elem_dof_index_col = j*layout_er[0] +
544                                                comp_in*layout_er[1] + e*layout_er[2];
545 
546             const CeedInt row = elem_dof_a[elem_dof_index_row];
547             const CeedInt col = elem_dof_a[elem_dof_index_col];
548 
549             rows[offset + count] = row;
550             cols[offset + count] = col;
551             count++;
552           }
553         }
554       }
555     }
556   }
557   if (count != local_num_entries)
558     // LCOV_EXCL_START
559     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
560   // LCOV_EXCL_STOP
561   ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr);
562   ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr);
563 
564   return CEED_ERROR_SUCCESS;
565 }
566 
567 /**
568   @brief Assemble nonzero entries for non-composite operator
569 
570   Users should generally use CeedOperatorLinearAssemble()
571 
572   @param[in] op       CeedOperator to assemble
573   @param[in] offset   Offest for number of entries
574   @param[out] values  Values to assemble into matrix
575 
576   @return An error code: 0 - success, otherwise - failure
577 
578   @ref Developer
579 **/
580 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
581                                       CeedVector values) {
582   int ierr;
583   Ceed ceed = op->ceed;
584   if (op->is_composite)
585     // LCOV_EXCL_START
586     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
587                      "Composite operator not supported");
588   // LCOV_EXCL_STOP
589   if (op->num_elem == 0) return CEED_ERROR_SUCCESS;
590 
591   if (op->LinearAssembleSingle) {
592     // Backend version
593     ierr = op->LinearAssembleSingle(op, offset, values); CeedChk(ierr);
594     return CEED_ERROR_SUCCESS;
595   } else {
596     // Operator fallback
597     CeedOperator op_fallback;
598 
599     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
600     if (op_fallback) {
601       ierr = CeedSingleOperatorAssemble(op_fallback, offset, values);
602       CeedChk(ierr);
603       return CEED_ERROR_SUCCESS;
604     }
605   }
606 
607   // Assemble QFunction
608   CeedQFunction qf;
609   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
610   CeedVector assembled_qf;
611   CeedElemRestriction rstr_q;
612   ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(
613            op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
614   CeedScalar max_norm = 0;
615   ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
616   CeedSize qf_length;
617   ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr);
618 
619   CeedInt num_input_fields, num_output_fields;
620   CeedOperatorField *input_fields;
621   CeedOperatorField *output_fields;
622   ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields,
623                                &num_output_fields, &output_fields); CeedChk(ierr);
624 
625   // Get assembly data
626   CeedOperatorAssemblyData data;
627   ierr = CeedOperatorGetOperatorAssemblyData(op, &data); CeedChk(ierr);
628   const CeedEvalMode *eval_mode_in, *eval_mode_out;
629   CeedInt num_eval_mode_in, num_eval_mode_out;
630   ierr = CeedOperatorAssemblyDataGetEvalModes(data, &num_eval_mode_in,
631          &eval_mode_in, &num_eval_mode_out, &eval_mode_out); CeedChk(ierr);
632   CeedBasis basis_in, basis_out;
633   ierr = CeedOperatorAssemblyDataGetBases(data, &basis_in, NULL, &basis_out,
634                                           NULL); CeedChk(ierr);
635 
636   if (num_eval_mode_in == 0 || num_eval_mode_out == 0)
637     // LCOV_EXCL_START
638     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
639                      "Cannot assemble operator with out inputs/outputs");
640   // LCOV_EXCL_STOP
641 
642   CeedElemRestriction active_rstr;
643   CeedInt num_elem, elem_size, num_qpts, num_comp;
644   ierr = CeedOperatorGetActiveElemRestriction(op, &active_rstr); CeedChk(ierr);
645   ierr = CeedElemRestrictionGetNumElements(active_rstr, &num_elem); CeedChk(ierr);
646   ierr = CeedElemRestrictionGetElementSize(active_rstr, &elem_size);
647   CeedChk(ierr);
648   ierr = CeedElemRestrictionGetNumComponents(active_rstr, &num_comp);
649   CeedChk(ierr);
650   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
651 
652   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
653 
654   // loop over elements and put in data structure
655   const CeedScalar *interp_in, *grad_in;
656   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
657   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
658 
659   const CeedScalar *assembled_qf_array;
660   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
661   CeedChk(ierr);
662 
663   CeedInt layout_qf[3];
664   ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr);
665   ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr);
666 
667   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
668   const CeedScalar *B_mat_in, *B_mat_out;
669   ierr = CeedOperatorAssemblyDataGetBases(data, NULL, &B_mat_in, NULL,
670                                           &B_mat_out); CeedChk(ierr);
671   CeedScalar BTD_mat[elem_size * num_qpts*num_eval_mode_in];
672   CeedScalar elem_mat[elem_size * elem_size];
673   CeedInt count = 0;
674   CeedScalar *vals;
675   ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr);
676   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
677   for (CeedInt e = 0; e < num_elem; e++) {
678     for (CeedInt comp_in = 0; comp_in < num_comp; comp_in++) {
679       for (CeedInt comp_out = 0; comp_out < num_comp; comp_out++) {
680         // Compute B^T*D
681         for (CeedInt n = 0; n < elem_size; n++) {
682           for (CeedInt q = 0; q < num_qpts; q++) {
683             for (CeedInt e_in = 0; e_in < num_eval_mode_in; e_in++) {
684               const CeedInt btd_index = n*(num_qpts*num_eval_mode_in) +
685                                         (num_eval_mode_in*q + e_in);
686               CeedScalar sum = 0.0;
687               for (CeedInt e_out = 0; e_out < num_eval_mode_out; e_out++) {
688                 const CeedInt b_out_index = (num_eval_mode_out*q + e_out)*elem_size + n;
689                 const CeedInt eval_mode_index = ((e_in*num_comp+comp_in)*num_eval_mode_out
690                                                  +e_out)*num_comp + comp_out;
691                 const CeedInt qf_index = q*layout_qf[0] + eval_mode_index*layout_qf[1] +
692                                          e*layout_qf[2];
693                 if (fabs(assembled_qf_array[qf_index]) > qf_value_bound) {
694                   sum += B_mat_out[b_out_index] * assembled_qf_array[qf_index];
695                 }
696               }
697               BTD_mat[btd_index] = sum;
698             }
699           }
700         }
701         // form element matrix itself (for each block component)
702         ierr = CeedMatrixMatrixMultiply(ceed, BTD_mat, B_mat_in, elem_mat, elem_size,
703                                         elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr);
704 
705         // put element matrix in coordinate data structure
706         for (CeedInt i = 0; i < elem_size; i++) {
707           for (CeedInt j = 0; j < elem_size; j++) {
708             vals[offset + count] = elem_mat[i*elem_size + j];
709             count++;
710           }
711         }
712       }
713     }
714   }
715   if (count != local_num_entries)
716     // LCOV_EXCL_START
717     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
718   // LCOV_EXCL_STOP
719   ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr);
720 
721   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
722   CeedChk(ierr);
723   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
724 
725   return CEED_ERROR_SUCCESS;
726 }
727 
728 /**
729   @brief Count number of entries for assembled CeedOperator
730 
731   @param[in] op            CeedOperator to assemble
732   @param[out] num_entries  Number of entries in assembled representation
733 
734   @return An error code: 0 - success, otherwise - failure
735 
736   @ref Utility
737 **/
738 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op,
739     CeedInt *num_entries) {
740   int ierr;
741   CeedElemRestriction rstr;
742   CeedInt num_elem, elem_size, num_comp;
743 
744   if (op->is_composite)
745     // LCOV_EXCL_START
746     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
747                      "Composite operator not supported");
748   // LCOV_EXCL_STOP
749   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr);
750   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
751   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
752   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
753   *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
754 
755   return CEED_ERROR_SUCCESS;
756 }
757 
758 /**
759   @brief Common code for creating a multigrid coarse operator and level
760            transfer operators for a CeedOperator
761 
762   @param[in] op_fine       Fine grid operator
763   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
764   @param[in] rstr_coarse   Coarse grid restriction
765   @param[in] basis_coarse  Coarse grid active vector basis
766   @param[in] basis_c_to_f  Basis for coarse to fine interpolation
767   @param[out] op_coarse    Coarse grid operator
768   @param[out] op_prolong   Coarse to fine operator
769   @param[out] op_restrict  Fine to coarse operator
770 
771   @return An error code: 0 - success, otherwise - failure
772 
773   @ref Developer
774 **/
775 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
776     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
777     CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
778     CeedOperator *op_restrict) {
779   int ierr;
780   Ceed ceed;
781   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
782 
783   // Check for composite operator
784   bool is_composite;
785   ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr);
786   if (is_composite)
787     // LCOV_EXCL_START
788     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
789                      "Automatic multigrid setup for composite operators not supported");
790   // LCOV_EXCL_STOP
791 
792   // Coarse Grid
793   ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT,
794                             op_coarse); CeedChk(ierr);
795   CeedElemRestriction rstr_fine = NULL;
796   // -- Clone input fields
797   for (CeedInt i = 0; i < op_fine->qf->num_input_fields; i++) {
798     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
799       rstr_fine = op_fine->input_fields[i]->elem_restr;
800       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
801                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
802       CeedChk(ierr);
803     } else {
804       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
805                                   op_fine->input_fields[i]->elem_restr,
806                                   op_fine->input_fields[i]->basis,
807                                   op_fine->input_fields[i]->vec); CeedChk(ierr);
808     }
809   }
810   // -- Clone output fields
811   for (CeedInt i = 0; i < op_fine->qf->num_output_fields; i++) {
812     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
813       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
814                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
815       CeedChk(ierr);
816     } else {
817       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
818                                   op_fine->output_fields[i]->elem_restr,
819                                   op_fine->output_fields[i]->basis,
820                                   op_fine->output_fields[i]->vec); CeedChk(ierr);
821     }
822   }
823   // -- Clone QFunctionAssemblyData
824   ierr = CeedQFunctionAssemblyDataReferenceCopy(op_fine->qf_assembled,
825          &(*op_coarse)->qf_assembled); CeedChk(ierr);
826 
827   // Multiplicity vector
828   CeedVector mult_vec, mult_e_vec;
829   ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec);
830   CeedChk(ierr);
831   ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr);
832   ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine,
833                                   mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
834   ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr);
835   ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec,
836                                   CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
837   ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr);
838   ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr);
839 
840   // Restriction
841   CeedInt num_comp;
842   ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr);
843   CeedQFunction qf_restrict;
844   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict);
845   CeedChk(ierr);
846   CeedInt *num_comp_r_data;
847   ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr);
848   num_comp_r_data[0] = num_comp;
849   CeedQFunctionContext ctx_r;
850   ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr);
851   ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER,
852                                      sizeof(*num_comp_r_data), num_comp_r_data);
853   CeedChk(ierr);
854   ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr);
855   ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr);
856   ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE);
857   CeedChk(ierr);
858   ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE);
859   CeedChk(ierr);
860   ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp,
861                                 CEED_EVAL_INTERP); CeedChk(ierr);
862   ierr = CeedQFunctionSetUserFlopsEstimate(qf_restrict, num_comp); CeedChk(ierr);
863 
864   ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE,
865                             CEED_QFUNCTION_NONE, op_restrict);
866   CeedChk(ierr);
867   ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine,
868                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
869   CeedChk(ierr);
870   ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine,
871                               CEED_BASIS_COLLOCATED, mult_vec);
872   CeedChk(ierr);
873   ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f,
874                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
875 
876   // Prolongation
877   CeedQFunction qf_prolong;
878   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong);
879   CeedChk(ierr);
880   CeedInt *num_comp_p_data;
881   ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr);
882   num_comp_p_data[0] = num_comp;
883   CeedQFunctionContext ctx_p;
884   ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr);
885   ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER,
886                                      sizeof(*num_comp_p_data), num_comp_p_data);
887   CeedChk(ierr);
888   ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr);
889   ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr);
890   ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP);
891   CeedChk(ierr);
892   ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE);
893   CeedChk(ierr);
894   ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE);
895   CeedChk(ierr);
896   ierr = CeedQFunctionSetUserFlopsEstimate(qf_prolong, num_comp); CeedChk(ierr);
897 
898   ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE,
899                             CEED_QFUNCTION_NONE, op_prolong);
900   CeedChk(ierr);
901   ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f,
902                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
903   ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine,
904                               CEED_BASIS_COLLOCATED, mult_vec);
905   CeedChk(ierr);
906   ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine,
907                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
908   CeedChk(ierr);
909 
910   // Clone name
911   bool has_name = op_fine->name;
912   size_t name_len = op_fine->name ? strlen(op_fine->name) : 0;
913   ierr = CeedOperatorSetName(*op_coarse, op_fine->name); CeedChk(ierr);
914   {
915     char *prolongation_name;
916     ierr = CeedCalloc(18 + name_len, &prolongation_name); CeedChk(ierr);
917     sprintf(prolongation_name, "prolongation%s%s", has_name ? " for " : "",
918             has_name ? op_fine->name : "");
919     ierr = CeedOperatorSetName(*op_prolong, prolongation_name); CeedChk(ierr);
920     ierr = CeedFree(&prolongation_name); CeedChk(ierr);
921   }
922   {
923     char *restriction_name;
924     ierr = CeedCalloc(17 + name_len, &restriction_name); CeedChk(ierr);
925     sprintf(restriction_name, "restriction%s%s", has_name ? " for " : "",
926             has_name ? op_fine->name : "");
927     ierr = CeedOperatorSetName(*op_restrict, restriction_name); CeedChk(ierr);
928     ierr = CeedFree(&restriction_name); CeedChk(ierr);
929   }
930 
931   // Cleanup
932   ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr);
933   ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr);
934   ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr);
935   ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr);
936 
937   return CEED_ERROR_SUCCESS;
938 }
939 
940 /**
941   @brief Build 1D mass matrix and Laplacian with perturbation
942 
943   @param[in] interp_1d    Interpolation matrix in one dimension
944   @param[in] grad_1d      Gradient matrix in one dimension
945   @param[in] q_weight_1d  Quadrature weights in one dimension
946   @param[in] P_1d         Number of basis nodes in one dimension
947   @param[in] Q_1d         Number of quadrature points in one dimension
948   @param[in] dim          Dimension of basis
949   @param[out] mass        Assembled mass matrix in one dimension
950   @param[out] laplace     Assembled perturbed Laplacian in one dimension
951 
952   @return An error code: 0 - success, otherwise - failure
953 
954   @ref Developer
955 **/
956 CeedPragmaOptimizeOff
957 static int CeedBuildMassLaplace(const CeedScalar *interp_1d,
958                                 const CeedScalar *grad_1d,
959                                 const CeedScalar *q_weight_1d, CeedInt P_1d,
960                                 CeedInt Q_1d, CeedInt dim,
961                                 CeedScalar *mass, CeedScalar *laplace) {
962 
963   for (CeedInt i=0; i<P_1d; i++)
964     for (CeedInt j=0; j<P_1d; j++) {
965       CeedScalar sum = 0.0;
966       for (CeedInt k=0; k<Q_1d; k++)
967         sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j];
968       mass[i+j*P_1d] = sum;
969     }
970   // -- Laplacian
971   for (CeedInt i=0; i<P_1d; i++)
972     for (CeedInt j=0; j<P_1d; j++) {
973       CeedScalar sum = 0.0;
974       for (CeedInt k=0; k<Q_1d; k++)
975         sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j];
976       laplace[i+j*P_1d] = sum;
977     }
978   CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4;
979   for (CeedInt i=0; i<P_1d; i++)
980     laplace[i+P_1d*i] += perturbation;
981   return CEED_ERROR_SUCCESS;
982 }
983 CeedPragmaOptimizeOn
984 
985 /// @}
986 
987 /// ----------------------------------------------------------------------------
988 /// CeedOperator Backend API
989 /// ----------------------------------------------------------------------------
990 /// @addtogroup CeedOperatorBackend
991 /// @{
992 
993 /**
994   @brief Create object holding CeedQFunction assembly data for CeedOperator
995 
996   @param[in] ceed  A Ceed object where the CeedQFunctionAssemblyData will be created
997   @param[out] data Address of the variable where the newly created
998                      CeedQFunctionAssemblyData will be stored
999 
1000   @return An error code: 0 - success, otherwise - failure
1001 
1002   @ref Backend
1003 **/
1004 int CeedQFunctionAssemblyDataCreate(Ceed ceed,
1005                                     CeedQFunctionAssemblyData *data) {
1006   int ierr;
1007 
1008   ierr = CeedCalloc(1, data); CeedChk(ierr);
1009   (*data)->ref_count = 1;
1010   (*data)->ceed = ceed;
1011   ierr = CeedReference(ceed); CeedChk(ierr);
1012 
1013   return CEED_ERROR_SUCCESS;
1014 }
1015 
1016 /**
1017   @brief Increment the reference counter for a CeedQFunctionAssemblyData
1018 
1019   @param data  CeedQFunctionAssemblyData to increment the reference counter
1020 
1021   @return An error code: 0 - success, otherwise - failure
1022 
1023   @ref Backend
1024 **/
1025 int CeedQFunctionAssemblyDataReference(CeedQFunctionAssemblyData data) {
1026   data->ref_count++;
1027   return CEED_ERROR_SUCCESS;
1028 }
1029 
1030 /**
1031   @brief Set re-use of CeedQFunctionAssemblyData
1032 
1033   @param data       CeedQFunctionAssemblyData to mark for reuse
1034   @param reuse_data Boolean flag indicating data re-use
1035 
1036   @return An error code: 0 - success, otherwise - failure
1037 
1038   @ref Backend
1039 **/
1040 int CeedQFunctionAssemblyDataSetReuse(CeedQFunctionAssemblyData data,
1041                                       bool reuse_data) {
1042   data->reuse_data = reuse_data;
1043   data->needs_data_update = true;
1044   return CEED_ERROR_SUCCESS;
1045 }
1046 
1047 /**
1048   @brief Mark QFunctionAssemblyData as stale
1049 
1050   @param data              CeedQFunctionAssemblyData to mark as stale
1051   @param needs_data_update Boolean flag indicating if update is needed or completed
1052 
1053   @return An error code: 0 - success, otherwise - failure
1054 
1055   @ref Backend
1056 **/
1057 int CeedQFunctionAssemblyDataSetUpdateNeeded(CeedQFunctionAssemblyData data,
1058     bool needs_data_update) {
1059   data->needs_data_update = needs_data_update;
1060   return CEED_ERROR_SUCCESS;
1061 }
1062 
1063 /**
1064   @brief Determine if QFunctionAssemblyData needs update
1065 
1066   @param[in] data              CeedQFunctionAssemblyData to mark as stale
1067   @param[out] is_update_needed Boolean flag indicating if re-assembly is required
1068 
1069   @return An error code: 0 - success, otherwise - failure
1070 
1071   @ref Backend
1072 **/
1073 int CeedQFunctionAssemblyDataIsUpdateNeeded(CeedQFunctionAssemblyData data,
1074     bool *is_update_needed) {
1075   *is_update_needed = !data->reuse_data || data->needs_data_update;
1076   return CEED_ERROR_SUCCESS;
1077 }
1078 
1079 /**
1080   @brief Copy the pointer to a CeedQFunctionAssemblyData. Both pointers should
1081            be destroyed with `CeedCeedQFunctionAssemblyDataDestroy()`;
1082            Note: If `*data_copy` is non-NULL, then it is assumed that
1083            `*data_copy` is a pointer to a CeedQFunctionAssemblyData. This
1084            CeedQFunctionAssemblyData will be destroyed if `*data_copy` is
1085            the only reference to this CeedQFunctionAssemblyData.
1086 
1087   @param data            CeedQFunctionAssemblyData to copy reference to
1088   @param[out] data_copy  Variable to store copied reference
1089 
1090   @return An error code: 0 - success, otherwise - failure
1091 
1092   @ref Backend
1093 **/
1094 int CeedQFunctionAssemblyDataReferenceCopy(CeedQFunctionAssemblyData data,
1095     CeedQFunctionAssemblyData *data_copy) {
1096   int ierr;
1097 
1098   ierr = CeedQFunctionAssemblyDataReference(data); CeedChk(ierr);
1099   ierr = CeedQFunctionAssemblyDataDestroy(data_copy); CeedChk(ierr);
1100   *data_copy = data;
1101   return CEED_ERROR_SUCCESS;
1102 }
1103 
1104 /**
1105   @brief Get setup status for internal objects for CeedQFunctionAssemblyData
1106 
1107   @param[in] data      CeedQFunctionAssemblyData to retreive status
1108   @param[out] is_setup Boolean flag for setup status
1109 
1110   @return An error code: 0 - success, otherwise - failure
1111 
1112   @ref Backend
1113 **/
1114 int CeedQFunctionAssemblyDataIsSetup(CeedQFunctionAssemblyData data,
1115                                      bool *is_setup) {
1116   *is_setup = data->is_setup;
1117   return CEED_ERROR_SUCCESS;
1118 }
1119 
1120 /**
1121   @brief Set internal objects for CeedQFunctionAssemblyData
1122 
1123   @param[in] data  CeedQFunctionAssemblyData to set objects
1124   @param[in] vec   CeedVector to store assembled CeedQFunction at quadrature points
1125   @param[in] rstr  CeedElemRestriction for CeedVector containing assembled CeedQFunction
1126 
1127   @return An error code: 0 - success, otherwise - failure
1128 
1129   @ref Backend
1130 **/
1131 int CeedQFunctionAssemblyDataSetObjects(CeedQFunctionAssemblyData data,
1132                                         CeedVector vec, CeedElemRestriction rstr) {
1133   int ierr;
1134 
1135   ierr = CeedVectorReferenceCopy(vec, &data->vec); CeedChk(ierr);
1136   ierr = CeedElemRestrictionReferenceCopy(rstr, &data->rstr); CeedChk(ierr);
1137 
1138   data->is_setup = true;
1139   return CEED_ERROR_SUCCESS;
1140 }
1141 
1142 int CeedQFunctionAssemblyDataGetObjects(CeedQFunctionAssemblyData data,
1143                                         CeedVector *vec, CeedElemRestriction *rstr) {
1144   int ierr;
1145 
1146   if (!data->is_setup)
1147     // LCOV_EXCL_START
1148     return CeedError(data->ceed, CEED_ERROR_INCOMPLETE,
1149                      "Internal objects not set; must call CeedQFunctionAssemblyDataSetObjects first.");
1150   // LCOV_EXCL_STOP
1151 
1152   ierr = CeedVectorReferenceCopy(data->vec, vec); CeedChk(ierr);
1153   ierr = CeedElemRestrictionReferenceCopy(data->rstr, rstr); CeedChk(ierr);
1154 
1155   return CEED_ERROR_SUCCESS;
1156 }
1157 
1158 /**
1159   @brief Destroy CeedQFunctionAssemblyData
1160 
1161   @param[out] data  CeedQFunctionAssemblyData to destroy
1162 
1163   @return An error code: 0 - success, otherwise - failure
1164 
1165   @ref Backend
1166 **/
1167 int CeedQFunctionAssemblyDataDestroy(CeedQFunctionAssemblyData *data) {
1168   int ierr;
1169 
1170   if (!*data || --(*data)->ref_count > 0) return CEED_ERROR_SUCCESS;
1171 
1172   ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr);
1173   ierr = CeedVectorDestroy(&(*data)->vec); CeedChk(ierr);
1174   ierr = CeedElemRestrictionDestroy(&(*data)->rstr); CeedChk(ierr);
1175 
1176   ierr = CeedFree(data); CeedChk(ierr);
1177   return CEED_ERROR_SUCCESS;
1178 }
1179 
1180 /**
1181   @brief Get CeedOperatorAssemblyData
1182 
1183   @param[in] op     CeedOperator to assemble
1184   @param[out] data  CeedQFunctionAssemblyData
1185 
1186   @return An error code: 0 - success, otherwise - failure
1187 
1188   @ref Backend
1189 **/
1190 int CeedOperatorGetOperatorAssemblyData(CeedOperator op,
1191                                         CeedOperatorAssemblyData *data) {
1192   int ierr;
1193 
1194   if (!op->op_assembled) {
1195     CeedOperatorAssemblyData data;
1196 
1197     ierr = CeedOperatorAssemblyDataCreate(op->ceed, op, &data); CeedChk(ierr);
1198     op->op_assembled = data;
1199   }
1200   *data = op->op_assembled;
1201 
1202   return CEED_ERROR_SUCCESS;
1203 }
1204 
1205 /**
1206   @brief Create object holding CeedOperator assembly data
1207 
1208   @param[in] ceed   A Ceed object where the CeedOperatorAssemblyData will be created
1209   @param[in] op     CeedOperator to be assembled
1210   @param[out] data  Address of the variable where the newly created
1211                       CeedOperatorAssemblyData will be stored
1212 
1213   @return An error code: 0 - success, otherwise - failure
1214 
1215   @ref Backend
1216 **/
1217 int CeedOperatorAssemblyDataCreate(Ceed ceed, CeedOperator op,
1218                                    CeedOperatorAssemblyData *data) {
1219   int ierr;
1220 
1221   ierr = CeedCalloc(1, data); CeedChk(ierr);
1222   (*data)->ceed = ceed;
1223   ierr = CeedReference(ceed); CeedChk(ierr);
1224 
1225   // Build OperatorAssembly data
1226   CeedQFunction qf;
1227   CeedQFunctionField *qf_fields;
1228   CeedOperatorField *op_fields;
1229   CeedInt num_input_fields;
1230   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
1231   ierr = CeedQFunctionGetFields(qf, &num_input_fields, &qf_fields, NULL, NULL);
1232   CeedChk(ierr);
1233   ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr);
1234 
1235   // Determine active input basis
1236   CeedInt num_eval_mode_in = 0, dim = 1;
1237   CeedEvalMode *eval_mode_in = NULL;
1238   CeedBasis basis_in = NULL;
1239   for (CeedInt i=0; i<num_input_fields; i++) {
1240     CeedVector vec;
1241     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
1242     if (vec == CEED_VECTOR_ACTIVE) {
1243       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in);
1244       CeedChk(ierr);
1245       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
1246       CeedEvalMode eval_mode;
1247       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
1248       CeedChk(ierr);
1249       switch (eval_mode) {
1250       case CEED_EVAL_NONE:
1251       case CEED_EVAL_INTERP:
1252         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
1253         eval_mode_in[num_eval_mode_in] = eval_mode;
1254         num_eval_mode_in += 1;
1255         break;
1256       case CEED_EVAL_GRAD:
1257         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
1258         for (CeedInt d=0; d<dim; d++) {
1259           eval_mode_in[num_eval_mode_in+d] = eval_mode;
1260         }
1261         num_eval_mode_in += dim;
1262         break;
1263       case CEED_EVAL_WEIGHT:
1264       case CEED_EVAL_DIV:
1265       case CEED_EVAL_CURL:
1266         break; // Caught by QF Assembly
1267       }
1268     }
1269   }
1270   (*data)->num_eval_mode_in = num_eval_mode_in;
1271   (*data)->eval_mode_in = eval_mode_in;
1272   ierr = CeedBasisReferenceCopy(basis_in, &(*data)->basis_in); CeedChk(ierr);
1273 
1274   // Determine active output basis
1275   CeedInt num_output_fields;
1276   ierr = CeedQFunctionGetFields(qf, NULL, NULL, &num_output_fields, &qf_fields);
1277   CeedChk(ierr);
1278   ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr);
1279   CeedInt num_eval_mode_out = 0;
1280   CeedEvalMode *eval_mode_out = NULL;
1281   CeedBasis basis_out = NULL;
1282   for (CeedInt i=0; i<num_output_fields; i++) {
1283     CeedVector vec;
1284     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
1285     if (vec == CEED_VECTOR_ACTIVE) {
1286       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out);
1287       CeedChk(ierr);
1288       CeedEvalMode eval_mode;
1289       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
1290       CeedChk(ierr);
1291       switch (eval_mode) {
1292       case CEED_EVAL_NONE:
1293       case CEED_EVAL_INTERP:
1294         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
1295         eval_mode_out[num_eval_mode_out] = eval_mode;
1296         num_eval_mode_out += 1;
1297         break;
1298       case CEED_EVAL_GRAD:
1299         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
1300         for (CeedInt d=0; d<dim; d++) {
1301           eval_mode_out[num_eval_mode_out+d] = eval_mode;
1302         }
1303         num_eval_mode_out += dim;
1304         break;
1305       case CEED_EVAL_WEIGHT:
1306       case CEED_EVAL_DIV:
1307       case CEED_EVAL_CURL:
1308         break; // Caught by QF Assembly
1309       }
1310     }
1311   }
1312   (*data)->num_eval_mode_out = num_eval_mode_out;
1313   (*data)->eval_mode_out = eval_mode_out;
1314   ierr = CeedBasisReferenceCopy(basis_out, &(*data)->basis_out); CeedChk(ierr);
1315 
1316   return CEED_ERROR_SUCCESS;
1317 }
1318 
1319 /**
1320   @brief Get CeedOperator CeedEvalModes for assembly
1321 
1322   @param[in] data                CeedOperatorAssemblyData
1323   @param[out] num_eval_mode_in   Pointer to hold number of input CeedEvalModes, or NULL
1324   @param[out] eval_mode_in       Pointer to hold input CeedEvalModes, or NULL
1325   @param[out] num_eval_mode_out  Pointer to hold number of output CeedEvalModes, or NULL
1326   @param[out] eval_mode_out      Pointer to hold output CeedEvalModes, or NULL
1327 
1328   @return An error code: 0 - success, otherwise - failure
1329 
1330   @ref Backend
1331 **/
1332 int CeedOperatorAssemblyDataGetEvalModes(CeedOperatorAssemblyData data,
1333     CeedInt *num_eval_mode_in, const CeedEvalMode **eval_mode_in,
1334     CeedInt *num_eval_mode_out, const CeedEvalMode **eval_mode_out) {
1335   if (num_eval_mode_in) *num_eval_mode_in   = data->num_eval_mode_in;
1336   if (eval_mode_in) *eval_mode_in           = data->eval_mode_in;
1337   if (num_eval_mode_out) *num_eval_mode_out = data->num_eval_mode_out;
1338   if (eval_mode_out) *eval_mode_out         = data->eval_mode_out;
1339 
1340   return CEED_ERROR_SUCCESS;
1341 }
1342 
1343 /**
1344   @brief Get CeedOperator CeedBasis data for assembly
1345 
1346   @param[in] data        CeedOperatorAssemblyData
1347   @param[out] basis_in   Pointer to hold active input CeedBasis, or NULL
1348   @param[out] B_in       Pointer to hold assembled active input B, or NULL
1349   @param[out] basis_out  Pointer to hold active output CeedBasis, or NULL
1350   @param[out] B_out      Pointer to hold assembled active output B, or NULL
1351 
1352   @return An error code: 0 - success, otherwise - failure
1353 
1354   @ref Backend
1355 **/
1356 int CeedOperatorAssemblyDataGetBases(CeedOperatorAssemblyData data,
1357                                      CeedBasis *basis_in, const CeedScalar **B_in, CeedBasis *basis_out,
1358                                      const CeedScalar **B_out) {
1359   int ierr;
1360 
1361   // Assemble B_in, B_out if needed
1362   if (B_in && !data->B_in) {
1363     CeedInt num_qpts, elem_size;
1364     CeedScalar *B_in, *identity = NULL;
1365     const CeedScalar *interp_in, *grad_in;
1366     bool has_eval_none = false;
1367 
1368     ierr = CeedBasisGetNumQuadraturePoints(data->basis_in, &num_qpts);
1369     CeedChk(ierr);
1370     ierr = CeedBasisGetNumNodes(data->basis_in, &elem_size); CeedChk(ierr);
1371     ierr = CeedCalloc(num_qpts * elem_size * data->num_eval_mode_in, &B_in);
1372     CeedChk(ierr);
1373 
1374     for (CeedInt i = 0; i < data->num_eval_mode_in; i++) {
1375       has_eval_none = has_eval_none || (data->eval_mode_in[i] == CEED_EVAL_NONE);
1376     }
1377     if (has_eval_none) {
1378       ierr = CeedCalloc(num_qpts * elem_size, &identity); CeedChk(ierr);
1379       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1380         identity[i * elem_size + i] = 1.0;
1381       }
1382     }
1383     ierr = CeedBasisGetInterp(data->basis_in, &interp_in); CeedChk(ierr);
1384     ierr = CeedBasisGetGrad(data->basis_in, &grad_in); CeedChk(ierr);
1385 
1386     for (CeedInt q = 0; q < num_qpts; q++) {
1387       for (CeedInt n = 0; n < elem_size; n++) {
1388         CeedInt d_in = -1;
1389         for (CeedInt e_in = 0; e_in < data->num_eval_mode_in; e_in++) {
1390           const CeedInt qq = data->num_eval_mode_in * q;
1391           const CeedScalar *b = NULL;
1392 
1393           if (data->eval_mode_in[e_in] == CEED_EVAL_GRAD) d_in++;
1394           CeedOperatorGetBasisPointer(data->eval_mode_in[e_in], identity,
1395                                       interp_in, &grad_in[d_in * num_qpts * elem_size], &b); CeedChk(ierr);
1396           B_in[(qq + e_in)*elem_size + n] = b[q * elem_size + n];
1397         }
1398       }
1399     }
1400     data->B_in = B_in;
1401   }
1402 
1403   if (B_out && !data->B_out) {
1404     CeedInt num_qpts, elem_size;
1405     CeedScalar *B_out, *identity = NULL;
1406     const CeedScalar *interp_out, *grad_out;
1407     bool has_eval_none = false;
1408 
1409     ierr = CeedBasisGetNumQuadraturePoints(data->basis_out, &num_qpts);
1410     CeedChk(ierr);
1411     ierr = CeedBasisGetNumNodes(data->basis_out, &elem_size); CeedChk(ierr);
1412     ierr = CeedCalloc(num_qpts * elem_size * data->num_eval_mode_out, &B_out);
1413     CeedChk(ierr);
1414 
1415     for (CeedInt i = 0; i < data->num_eval_mode_out; i++) {
1416       has_eval_none = has_eval_none || (data->eval_mode_out[i] == CEED_EVAL_NONE);
1417     }
1418     if (has_eval_none) {
1419       ierr = CeedCalloc(num_qpts * elem_size, &identity); CeedChk(ierr);
1420       for (CeedInt i = 0; i < (elem_size < num_qpts ? elem_size : num_qpts); i++) {
1421         identity[i * elem_size + i] = 1.0;
1422       }
1423     }
1424     ierr = CeedBasisGetInterp(data->basis_out, &interp_out); CeedChk(ierr);
1425     ierr = CeedBasisGetGrad(data->basis_out, &grad_out); CeedChk(ierr);
1426 
1427     for (CeedInt q = 0; q < num_qpts; q++) {
1428       for (CeedInt n = 0; n < elem_size; n++) {
1429         CeedInt d_out = -1;
1430         for (CeedInt e_out = 0; e_out < data->num_eval_mode_out; e_out++) {
1431           const CeedInt qq = data->num_eval_mode_out * q;
1432           const CeedScalar *b = NULL;
1433 
1434           if (data->eval_mode_out[e_out] == CEED_EVAL_GRAD) d_out++;
1435           CeedOperatorGetBasisPointer(data->eval_mode_out[e_out], identity,
1436                                       interp_out, &grad_out[d_out * num_qpts * elem_size], &b); CeedChk(ierr);
1437           B_out[(qq + e_out)*elem_size + n] = b[q * elem_size + n];
1438         }
1439       }
1440     }
1441     data->B_out = B_out;
1442   }
1443 
1444   if (basis_in) *basis_in   = data->basis_in;
1445   if (B_in) *B_in           = data->B_in;
1446   if (basis_out) *basis_out = data->basis_out;
1447   if (B_out) *B_out         = data->B_out;
1448 
1449   return CEED_ERROR_SUCCESS;
1450 }
1451 
1452 /**
1453   @brief Destroy CeedOperatorAssemblyData
1454 
1455   @param[out] data  CeedOperatorAssemblyData to destroy
1456 
1457   @return An error code: 0 - success, otherwise - failure
1458 
1459   @ref Backend
1460 **/
1461 int CeedOperatorAssemblyDataDestroy(CeedOperatorAssemblyData *data) {
1462   int ierr;
1463 
1464   if (!*data) return CEED_ERROR_SUCCESS;
1465 
1466   ierr = CeedDestroy(&(*data)->ceed); CeedChk(ierr);
1467   ierr = CeedBasisDestroy(&(*data)->basis_in); CeedChk(ierr);
1468   ierr = CeedBasisDestroy(&(*data)->basis_out); CeedChk(ierr);
1469   ierr = CeedFree(&(*data)->B_in); CeedChk(ierr);
1470   ierr = CeedFree(&(*data)->B_out); CeedChk(ierr);
1471 
1472   ierr = CeedFree(data); CeedChk(ierr);
1473   return CEED_ERROR_SUCCESS;
1474 }
1475 
1476 /// @}
1477 
1478 /// ----------------------------------------------------------------------------
1479 /// CeedOperator Public API
1480 /// ----------------------------------------------------------------------------
1481 /// @addtogroup CeedOperatorUser
1482 /// @{
1483 
1484 /**
1485   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1486 
1487   This returns a CeedVector containing a matrix at each quadrature point
1488     providing the action of the CeedQFunction associated with the CeedOperator.
1489     The vector 'assembled' is of shape
1490       [num_elements, num_input_fields, num_output_fields, num_quad_points]
1491     and contains column-major matrices representing the action of the
1492     CeedQFunction for a corresponding quadrature point on an element. Inputs and
1493     outputs are in the order provided by the user when adding CeedOperator fields.
1494     For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and
1495     'v', provided in that order, would result in an assembled QFunction that
1496     consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting
1497     on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1498 
1499   Note: Calling this function asserts that setup is complete
1500           and sets the CeedOperator as immutable.
1501 
1502   @param op              CeedOperator to assemble CeedQFunction
1503   @param[out] assembled  CeedVector to store assembled CeedQFunction at
1504                            quadrature points
1505   @param[out] rstr       CeedElemRestriction for CeedVector containing assembled
1506                            CeedQFunction
1507   @param request         Address of CeedRequest for non-blocking completion, else
1508                            @ref CEED_REQUEST_IMMEDIATE
1509 
1510   @return An error code: 0 - success, otherwise - failure
1511 
1512   @ref User
1513 **/
1514 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled,
1515                                         CeedElemRestriction *rstr,
1516                                         CeedRequest *request) {
1517   int ierr;
1518   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1519 
1520   if (op->LinearAssembleQFunction) {
1521     // Backend version
1522     ierr = op->LinearAssembleQFunction(op, assembled, rstr, request);
1523     CeedChk(ierr);
1524   } else {
1525     // Operator fallback
1526     CeedOperator op_fallback;
1527 
1528     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1529     if (op_fallback) {
1530       ierr = CeedOperatorLinearAssembleQFunction(op_fallback, assembled,
1531              rstr, request); CeedChk(ierr);
1532     } else {
1533       // LCOV_EXCL_START
1534       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
1535                        "Backend does not support CeedOperatorLinearAssembleQFunction");
1536       // LCOV_EXCL_STOP
1537     }
1538   }
1539   return CEED_ERROR_SUCCESS;
1540 }
1541 
1542 /**
1543   @brief Assemble CeedQFunction and store result internall. Return copied
1544            references of stored data to the caller. Caller is responsible for
1545            ownership and destruction of the copied references. See also
1546            @ref CeedOperatorLinearAssembleQFunction
1547 
1548   @param op              CeedOperator to assemble CeedQFunction
1549   @param assembled       CeedVector to store assembled CeedQFunction at
1550                            quadrature points
1551   @param rstr            CeedElemRestriction for CeedVector containing assembled
1552                            CeedQFunction
1553   @param request         Address of CeedRequest for non-blocking completion, else
1554                            @ref CEED_REQUEST_IMMEDIATE
1555 
1556   @return An error code: 0 - success, otherwise - failure
1557 
1558   @ref User
1559 **/
1560 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op,
1561     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1562   int ierr;
1563   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1564 
1565   if (op->LinearAssembleQFunctionUpdate) {
1566     // Backend version
1567     bool qf_assembled_is_setup;
1568     CeedVector assembled_vec = NULL;
1569     CeedElemRestriction assembled_rstr = NULL;
1570 
1571     ierr = CeedQFunctionAssemblyDataIsSetup(op->qf_assembled,
1572                                             &qf_assembled_is_setup); CeedChk(ierr);
1573     if (qf_assembled_is_setup) {
1574       bool update_needed;
1575 
1576       ierr = CeedQFunctionAssemblyDataGetObjects(op->qf_assembled, &assembled_vec,
1577              &assembled_rstr); CeedChk(ierr);
1578       ierr = CeedQFunctionAssemblyDataIsUpdateNeeded(op->qf_assembled,
1579              &update_needed); CeedChk(ierr);
1580       if (update_needed) {
1581         ierr = op->LinearAssembleQFunctionUpdate(op, assembled_vec, assembled_rstr,
1582                request); CeedChk(ierr);
1583       }
1584     } else {
1585       ierr = op->LinearAssembleQFunction(op, &assembled_vec, &assembled_rstr,
1586                                          request); CeedChk(ierr);
1587       ierr = CeedQFunctionAssemblyDataSetObjects(op->qf_assembled, assembled_vec,
1588              assembled_rstr); CeedChk(ierr);
1589     }
1590     ierr = CeedQFunctionAssemblyDataSetUpdateNeeded(op->qf_assembled, false);
1591     CeedChk(ierr);
1592 
1593     // Copy reference from internally held copy
1594     *assembled = NULL;
1595     *rstr = NULL;
1596     ierr = CeedVectorReferenceCopy(assembled_vec, assembled); CeedChk(ierr);
1597     ierr = CeedVectorDestroy(&assembled_vec); CeedChk(ierr);
1598     ierr = CeedElemRestrictionReferenceCopy(assembled_rstr, rstr); CeedChk(ierr);
1599     ierr = CeedElemRestrictionDestroy(&assembled_rstr); CeedChk(ierr);
1600   } else {
1601     // Operator fallback
1602     CeedOperator op_fallback;
1603 
1604     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1605     if (op_fallback) {
1606       ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op_fallback, assembled,
1607              rstr, request); CeedChk(ierr);
1608     } else {
1609       // LCOV_EXCL_START
1610       return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
1611                        "Backend does not support CeedOperatorLinearAssembleQFunctionUpdate");
1612       // LCOV_EXCL_STOP
1613     }
1614   }
1615 
1616   return CEED_ERROR_SUCCESS;
1617 }
1618 
1619 /**
1620   @brief Assemble the diagonal of a square linear CeedOperator
1621 
1622   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1623 
1624   Note: Currently only non-composite CeedOperators with a single field and
1625           composite CeedOperators with single field sub-operators are supported.
1626 
1627   Note: Calling this function asserts that setup is complete
1628           and sets the CeedOperator as immutable.
1629 
1630   @param op              CeedOperator to assemble CeedQFunction
1631   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1632   @param request         Address of CeedRequest for non-blocking completion, else
1633                            @ref CEED_REQUEST_IMMEDIATE
1634 
1635   @return An error code: 0 - success, otherwise - failure
1636 
1637   @ref User
1638 **/
1639 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled,
1640                                        CeedRequest *request) {
1641   int ierr;
1642   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1643 
1644   CeedSize input_size = 0, output_size = 0;
1645   ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size);
1646   CeedChk(ierr);
1647   if (input_size != output_size)
1648     // LCOV_EXCL_START
1649     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1650   // LCOV_EXCL_STOP
1651 
1652   if (op->LinearAssembleDiagonal) {
1653     // Backend version
1654     ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr);
1655     return CEED_ERROR_SUCCESS;
1656   } else if (op->LinearAssembleAddDiagonal) {
1657     // Backend version with zeroing first
1658     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1659     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1660     return CEED_ERROR_SUCCESS;
1661   } else {
1662     // Operator fallback
1663     CeedOperator op_fallback;
1664 
1665     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1666     if (op_fallback) {
1667       ierr = CeedOperatorLinearAssembleDiagonal(op_fallback, assembled, request);
1668       CeedChk(ierr);
1669       return CEED_ERROR_SUCCESS;
1670     }
1671   }
1672   // Default interface implementation
1673   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1674   ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request);
1675   CeedChk(ierr);
1676 
1677   return CEED_ERROR_SUCCESS;
1678 }
1679 
1680 /**
1681   @brief Assemble the diagonal of a square linear CeedOperator
1682 
1683   This sums into a CeedVector the diagonal of a linear CeedOperator.
1684 
1685   Note: Currently only non-composite CeedOperators with a single field and
1686           composite CeedOperators with single field sub-operators are supported.
1687 
1688   Note: Calling this function asserts that setup is complete
1689           and sets the CeedOperator as immutable.
1690 
1691   @param op              CeedOperator to assemble CeedQFunction
1692   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1693   @param request         Address of CeedRequest for non-blocking completion, else
1694                            @ref CEED_REQUEST_IMMEDIATE
1695 
1696   @return An error code: 0 - success, otherwise - failure
1697 
1698   @ref User
1699 **/
1700 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled,
1701     CeedRequest *request) {
1702   int ierr;
1703   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1704 
1705   CeedSize input_size = 0, output_size = 0;
1706   ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size);
1707   CeedChk(ierr);
1708   if (input_size != output_size)
1709     // LCOV_EXCL_START
1710     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1711   // LCOV_EXCL_STOP
1712 
1713   if (op->LinearAssembleAddDiagonal) {
1714     // Backend version
1715     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1716     return CEED_ERROR_SUCCESS;
1717   } else {
1718     // Operator fallback
1719     CeedOperator op_fallback;
1720 
1721     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1722     if (op_fallback) {
1723       ierr = CeedOperatorLinearAssembleAddDiagonal(op_fallback, assembled, request);
1724       CeedChk(ierr);
1725       return CEED_ERROR_SUCCESS;
1726     }
1727   }
1728   // Default interface implementation
1729   bool is_composite;
1730   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1731   if (is_composite) {
1732     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1733            false, assembled); CeedChk(ierr);
1734   } else {
1735     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled);
1736     CeedChk(ierr);
1737   }
1738 
1739   return CEED_ERROR_SUCCESS;
1740 }
1741 
1742 /**
1743   @brief Assemble the point block diagonal of a square linear CeedOperator
1744 
1745   This overwrites a CeedVector with the point block diagonal of a linear
1746     CeedOperator.
1747 
1748   Note: Currently only non-composite CeedOperators with a single field and
1749           composite CeedOperators with single field sub-operators are supported.
1750 
1751   Note: Calling this function asserts that setup is complete
1752           and sets the CeedOperator as immutable.
1753 
1754   @param op              CeedOperator to assemble CeedQFunction
1755   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1756                            diagonal, provided in row-major form with an
1757                            @a num_comp * @a num_comp block at each node. The dimensions
1758                            of this vector are derived from the active vector
1759                            for the CeedOperator. The array has shape
1760                            [nodes, component out, component in].
1761   @param request         Address of CeedRequest for non-blocking completion, else
1762                            CEED_REQUEST_IMMEDIATE
1763 
1764   @return An error code: 0 - success, otherwise - failure
1765 
1766   @ref User
1767 **/
1768 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op,
1769     CeedVector assembled, CeedRequest *request) {
1770   int ierr;
1771   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1772 
1773   CeedSize input_size = 0, output_size = 0;
1774   ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size);
1775   CeedChk(ierr);
1776   if (input_size != output_size)
1777     // LCOV_EXCL_START
1778     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1779   // LCOV_EXCL_STOP
1780 
1781   if (op->LinearAssemblePointBlockDiagonal) {
1782     // Backend version
1783     ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request);
1784     CeedChk(ierr);
1785     return CEED_ERROR_SUCCESS;
1786   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1787     // Backend version with zeroing first
1788     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1789     ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled,
1790            request); CeedChk(ierr);
1791     return CEED_ERROR_SUCCESS;
1792   } else {
1793     // Operator fallback
1794     CeedOperator op_fallback;
1795 
1796     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1797     if (op_fallback) {
1798       ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op_fallback, assembled,
1799              request); CeedChk(ierr);
1800       return CEED_ERROR_SUCCESS;
1801     }
1802   }
1803   // Default interface implementation
1804   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1805   ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request);
1806   CeedChk(ierr);
1807 
1808   return CEED_ERROR_SUCCESS;
1809 }
1810 
1811 /**
1812   @brief Assemble the point block diagonal of a square linear CeedOperator
1813 
1814   This sums into a CeedVector with the point block diagonal of a linear
1815     CeedOperator.
1816 
1817   Note: Currently only non-composite CeedOperators with a single field and
1818           composite CeedOperators with single field sub-operators are supported.
1819 
1820   Note: Calling this function asserts that setup is complete
1821           and sets the CeedOperator as immutable.
1822 
1823   @param op              CeedOperator to assemble CeedQFunction
1824   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1825                            diagonal, provided in row-major form with an
1826                            @a num_comp * @a num_comp block at each node. The dimensions
1827                            of this vector are derived from the active vector
1828                            for the CeedOperator. The array has shape
1829                            [nodes, component out, component in].
1830   @param request         Address of CeedRequest for non-blocking completion, else
1831                            CEED_REQUEST_IMMEDIATE
1832 
1833   @return An error code: 0 - success, otherwise - failure
1834 
1835   @ref User
1836 **/
1837 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op,
1838     CeedVector assembled, CeedRequest *request) {
1839   int ierr;
1840   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1841 
1842   CeedSize input_size = 0, output_size = 0;
1843   ierr = CeedOperatorGetActiveVectorLengths(op, &input_size, &output_size);
1844   CeedChk(ierr);
1845   if (input_size != output_size)
1846     // LCOV_EXCL_START
1847     return CeedError(op->ceed, CEED_ERROR_DIMENSION, "Operator must be square");
1848   // LCOV_EXCL_STOP
1849 
1850   if (op->LinearAssembleAddPointBlockDiagonal) {
1851     // Backend version
1852     ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request);
1853     CeedChk(ierr);
1854     return CEED_ERROR_SUCCESS;
1855   } else {
1856     // Operator fallback
1857     CeedOperator op_fallback;
1858 
1859     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1860     if (op_fallback) {
1861       ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op_fallback, assembled,
1862              request); CeedChk(ierr);
1863       return CEED_ERROR_SUCCESS;
1864     }
1865   }
1866   // Default interface implemenation
1867   bool is_composite;
1868   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1869   if (is_composite) {
1870     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1871            true, assembled); CeedChk(ierr);
1872   } else {
1873     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled);
1874     CeedChk(ierr);
1875   }
1876 
1877   return CEED_ERROR_SUCCESS;
1878 }
1879 
1880 /**
1881    @brief Fully assemble the nonzero pattern of a linear operator.
1882 
1883    Expected to be used in conjunction with CeedOperatorLinearAssemble()
1884 
1885    The assembly routines use coordinate format, with num_entries tuples of the
1886    form (i, j, value) which indicate that value should be added to the matrix
1887    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1888    This function returns the number of entries and their (i, j) locations,
1889    while CeedOperatorLinearAssemble() provides the values in the same
1890    ordering.
1891 
1892    This will generally be slow unless your operator is low-order.
1893 
1894   Note: Calling this function asserts that setup is complete
1895           and sets the CeedOperator as immutable.
1896 
1897    @param[in]  op           CeedOperator to assemble
1898    @param[out] num_entries  Number of entries in coordinate nonzero pattern
1899    @param[out] rows         Row number for each entry
1900    @param[out] cols         Column number for each entry
1901 
1902    @ref User
1903 **/
1904 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries,
1905                                        CeedInt **rows, CeedInt **cols) {
1906   int ierr;
1907   CeedInt num_suboperators, single_entries;
1908   CeedOperator *sub_operators;
1909   bool is_composite;
1910   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1911 
1912   if (op->LinearAssembleSymbolic) {
1913     // Backend version
1914     ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr);
1915     return CEED_ERROR_SUCCESS;
1916   } else {
1917     // Operator fallback
1918     CeedOperator op_fallback;
1919 
1920     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
1921     if (op_fallback) {
1922       ierr = CeedOperatorLinearAssembleSymbolic(op_fallback, num_entries, rows, cols);
1923       CeedChk(ierr);
1924       return CEED_ERROR_SUCCESS;
1925     }
1926   }
1927 
1928   // Default interface implementation
1929 
1930   // count entries and allocate rows, cols arrays
1931   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1932   *num_entries = 0;
1933   if (is_composite) {
1934     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1935     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1936     for (CeedInt k = 0; k < num_suboperators; ++k) {
1937       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1938              &single_entries); CeedChk(ierr);
1939       *num_entries += single_entries;
1940     }
1941   } else {
1942     ierr = CeedSingleOperatorAssemblyCountEntries(op,
1943            &single_entries); CeedChk(ierr);
1944     *num_entries += single_entries;
1945   }
1946   ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr);
1947   ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr);
1948 
1949   // assemble nonzero locations
1950   CeedInt offset = 0;
1951   if (is_composite) {
1952     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1953     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1954     for (CeedInt k = 0; k < num_suboperators; ++k) {
1955       ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows,
1956              *cols); CeedChk(ierr);
1957       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1958              &single_entries);
1959       CeedChk(ierr);
1960       offset += single_entries;
1961     }
1962   } else {
1963     ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols);
1964     CeedChk(ierr);
1965   }
1966 
1967   return CEED_ERROR_SUCCESS;
1968 }
1969 
1970 /**
1971    @brief Fully assemble the nonzero entries of a linear operator.
1972 
1973    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic()
1974 
1975    The assembly routines use coordinate format, with num_entries tuples of the
1976    form (i, j, value) which indicate that value should be added to the matrix
1977    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1978    This function returns the values of the nonzero entries to be added, their
1979    (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1980 
1981    This will generally be slow unless your operator is low-order.
1982 
1983   Note: Calling this function asserts that setup is complete
1984           and sets the CeedOperator as immutable.
1985 
1986    @param[in]  op      CeedOperator to assemble
1987    @param[out] values  Values to assemble into matrix
1988 
1989    @ref User
1990 **/
1991 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1992   int ierr;
1993   CeedInt num_suboperators, single_entries = 0;
1994   CeedOperator *sub_operators;
1995   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1996 
1997   if (op->LinearAssemble) {
1998     // Backend version
1999     ierr = op->LinearAssemble(op, values); CeedChk(ierr);
2000     return CEED_ERROR_SUCCESS;
2001   } else {
2002     // Operator fallback
2003     CeedOperator op_fallback;
2004 
2005     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
2006     if (op_fallback) {
2007       ierr = CeedOperatorLinearAssemble(op_fallback, values); CeedChk(ierr);
2008       return CEED_ERROR_SUCCESS;
2009     }
2010   }
2011 
2012   // Default interface implementation
2013   bool is_composite;
2014   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
2015 
2016   CeedInt offset = 0;
2017   if (is_composite) {
2018     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
2019     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
2020     for (CeedInt k = 0; k < num_suboperators; k++) {
2021       ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values);
2022       CeedChk(ierr);
2023       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
2024              &single_entries);
2025       CeedChk(ierr);
2026       offset += single_entries;
2027     }
2028   } else {
2029     ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr);
2030   }
2031 
2032   return CEED_ERROR_SUCCESS;
2033 }
2034 
2035 /**
2036   @brief Create a multigrid coarse operator and level transfer operators
2037            for a CeedOperator, creating the prolongation basis from the
2038            fine and coarse grid interpolation
2039 
2040   Note: Calling this function asserts that setup is complete
2041           and sets the CeedOperator as immutable.
2042 
2043   @param[in] op_fine       Fine grid operator
2044   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
2045   @param[in] rstr_coarse   Coarse grid restriction
2046   @param[in] basis_coarse  Coarse grid active vector basis
2047   @param[out] op_coarse    Coarse grid operator
2048   @param[out] op_prolong   Coarse to fine operator
2049   @param[out] op_restrict  Fine to coarse operator
2050 
2051   @return An error code: 0 - success, otherwise - failure
2052 
2053   @ref User
2054 **/
2055 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine,
2056                                      CeedVector p_mult_fine,
2057                                      CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2058                                      CeedOperator *op_coarse, CeedOperator *op_prolong,
2059                                      CeedOperator *op_restrict) {
2060   int ierr;
2061   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
2062   Ceed ceed;
2063   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
2064 
2065   // Check for compatible quadrature spaces
2066   CeedBasis basis_fine;
2067   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
2068   CeedInt Q_f, Q_c;
2069   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
2070   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
2071   if (Q_f != Q_c)
2072     // LCOV_EXCL_START
2073     return CeedError(ceed, CEED_ERROR_DIMENSION,
2074                      "Bases must have compatible quadrature spaces");
2075   // LCOV_EXCL_STOP
2076 
2077   // Coarse to fine basis
2078   CeedInt P_f, P_c, Q = Q_f;
2079   bool is_tensor_f, is_tensor_c;
2080   ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr);
2081   ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr);
2082   CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau;
2083   if (is_tensor_f && is_tensor_c) {
2084     ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr);
2085     ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr);
2086     ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr);
2087   } else if (!is_tensor_f && !is_tensor_c) {
2088     ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr);
2089     ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr);
2090   } else {
2091     // LCOV_EXCL_START
2092     return CeedError(ceed, CEED_ERROR_MINOR,
2093                      "Bases must both be tensor or non-tensor");
2094     // LCOV_EXCL_STOP
2095   }
2096 
2097   ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr);
2098   ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr);
2099   ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr);
2100   ierr = CeedMalloc(Q, &tau); CeedChk(ierr);
2101   const CeedScalar *interp_f_source = NULL, *interp_c_source = NULL;
2102   if (is_tensor_f) {
2103     ierr = CeedBasisGetInterp1D(basis_fine, &interp_f_source); CeedChk(ierr);
2104     ierr = CeedBasisGetInterp1D(basis_coarse, &interp_c_source); CeedChk(ierr);
2105   } else {
2106     ierr = CeedBasisGetInterp(basis_fine, &interp_f_source); CeedChk(ierr);
2107     ierr = CeedBasisGetInterp(basis_coarse, &interp_c_source); CeedChk(ierr);
2108   }
2109   memcpy(interp_f, interp_f_source, Q*P_f*sizeof interp_f_source[0]);
2110   memcpy(interp_c, interp_c_source, Q*P_c*sizeof interp_c_source[0]);
2111 
2112   // -- QR Factorization, interp_f = Q R
2113   ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr);
2114 
2115   // -- Apply Qtranspose, interp_c = Qtranspose interp_c
2116   ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE,
2117                                Q, P_c, P_f, P_c, 1); CeedChk(ierr);
2118 
2119   // -- Apply Rinv, interp_c_to_f = Rinv interp_c
2120   for (CeedInt j=0; j<P_c; j++) { // Column j
2121     interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1];
2122     for (CeedInt i=P_f-2; i>=0; i--) { // Row i
2123       interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i];
2124       for (CeedInt k=i+1; k<P_f; k++)
2125         interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k];
2126       interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i];
2127     }
2128   }
2129   ierr = CeedFree(&tau); CeedChk(ierr);
2130   ierr = CeedFree(&interp_c); CeedChk(ierr);
2131   ierr = CeedFree(&interp_f); CeedChk(ierr);
2132 
2133   // Complete with interp_c_to_f versions of code
2134   if (is_tensor_f) {
2135     ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine,
2136            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
2137     CeedChk(ierr);
2138   } else {
2139     ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine,
2140            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
2141     CeedChk(ierr);
2142   }
2143 
2144   // Cleanup
2145   ierr = CeedFree(&interp_c_to_f); CeedChk(ierr);
2146   return CEED_ERROR_SUCCESS;
2147 }
2148 
2149 /**
2150   @brief Create a multigrid coarse operator and level transfer operators
2151            for a CeedOperator with a tensor basis for the active basis
2152 
2153   Note: Calling this function asserts that setup is complete
2154           and sets the CeedOperator as immutable.
2155 
2156   @param[in] op_fine        Fine grid operator
2157   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
2158   @param[in] rstr_coarse    Coarse grid restriction
2159   @param[in] basis_coarse   Coarse grid active vector basis
2160   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
2161   @param[out] op_coarse     Coarse grid operator
2162   @param[out] op_prolong    Coarse to fine operator
2163   @param[out] op_restrict   Fine to coarse operator
2164 
2165   @return An error code: 0 - success, otherwise - failure
2166 
2167   @ref User
2168 **/
2169 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine,
2170     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
2171     const CeedScalar *interp_c_to_f, CeedOperator *op_coarse,
2172     CeedOperator *op_prolong, CeedOperator *op_restrict) {
2173   int ierr;
2174   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
2175   Ceed ceed;
2176   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
2177 
2178   // Check for compatible quadrature spaces
2179   CeedBasis basis_fine;
2180   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
2181   CeedInt Q_f, Q_c;
2182   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
2183   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
2184   if (Q_f != Q_c)
2185     // LCOV_EXCL_START
2186     return CeedError(ceed, CEED_ERROR_DIMENSION,
2187                      "Bases must have compatible quadrature spaces");
2188   // LCOV_EXCL_STOP
2189 
2190   // Coarse to fine basis
2191   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
2192   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
2193   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
2194   ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr);
2195   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
2196   CeedChk(ierr);
2197   P_1d_c = dim == 1 ? num_nodes_c :
2198            dim == 2 ? sqrt(num_nodes_c) :
2199            cbrt(num_nodes_c);
2200   CeedScalar *q_ref, *q_weight, *grad;
2201   ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr);
2202   ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr);
2203   ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr);
2204   CeedBasis basis_c_to_f;
2205   ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f,
2206                                  interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
2207   CeedChk(ierr);
2208   ierr = CeedFree(&q_ref); CeedChk(ierr);
2209   ierr = CeedFree(&q_weight); CeedChk(ierr);
2210   ierr = CeedFree(&grad); CeedChk(ierr);
2211 
2212   // Core code
2213   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
2214                                           basis_coarse, basis_c_to_f, op_coarse,
2215                                           op_prolong, op_restrict);
2216   CeedChk(ierr);
2217   return CEED_ERROR_SUCCESS;
2218 }
2219 
2220 /**
2221   @brief Create a multigrid coarse operator and level transfer operators
2222            for a CeedOperator with a non-tensor basis for the active vector
2223 
2224   Note: Calling this function asserts that setup is complete
2225           and sets the CeedOperator as immutable.
2226 
2227   @param[in] op_fine        Fine grid operator
2228   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
2229   @param[in] rstr_coarse    Coarse grid restriction
2230   @param[in] basis_coarse   Coarse grid active vector basis
2231   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
2232   @param[out] op_coarse     Coarse grid operator
2233   @param[out] op_prolong    Coarse to fine operator
2234   @param[out] op_restrict   Fine to coarse operator
2235 
2236   @return An error code: 0 - success, otherwise - failure
2237 
2238   @ref User
2239 **/
2240 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine,
2241                                        CeedVector p_mult_fine,
2242                                        CeedElemRestriction rstr_coarse,
2243                                        CeedBasis basis_coarse,
2244                                        const CeedScalar *interp_c_to_f,
2245                                        CeedOperator *op_coarse,
2246                                        CeedOperator *op_prolong,
2247                                        CeedOperator *op_restrict) {
2248   int ierr;
2249   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
2250   Ceed ceed;
2251   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
2252 
2253   // Check for compatible quadrature spaces
2254   CeedBasis basis_fine;
2255   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
2256   CeedInt Q_f, Q_c;
2257   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
2258   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
2259   if (Q_f != Q_c)
2260     // LCOV_EXCL_START
2261     return CeedError(ceed, CEED_ERROR_DIMENSION,
2262                      "Bases must have compatible quadrature spaces");
2263   // LCOV_EXCL_STOP
2264 
2265   // Coarse to fine basis
2266   CeedElemTopology topo;
2267   ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr);
2268   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
2269   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
2270   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
2271   ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr);
2272   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
2273   CeedChk(ierr);
2274   CeedScalar *q_ref, *q_weight, *grad;
2275   ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr);
2276   ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr);
2277   ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr);
2278   CeedBasis basis_c_to_f;
2279   ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f,
2280                            interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
2281   CeedChk(ierr);
2282   ierr = CeedFree(&q_ref); CeedChk(ierr);
2283   ierr = CeedFree(&q_weight); CeedChk(ierr);
2284   ierr = CeedFree(&grad); CeedChk(ierr);
2285 
2286   // Core code
2287   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
2288                                           basis_coarse, basis_c_to_f, op_coarse,
2289                                           op_prolong, op_restrict);
2290   CeedChk(ierr);
2291   return CEED_ERROR_SUCCESS;
2292 }
2293 
2294 /**
2295   @brief Build a FDM based approximate inverse for each element for a
2296            CeedOperator
2297 
2298   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization
2299     Method based approximate inverse. This function obtains the simultaneous
2300     diagonalization for the 1D mass and Laplacian operators,
2301       M = V^T V, K = V^T S V.
2302     The assembled QFunction is used to modify the eigenvalues from simultaneous
2303     diagonalization and obtain an approximate inverse of the form
2304       V^T S^hat V. The CeedOperator must be linear and non-composite. The
2305     associated CeedQFunction must therefore also be linear.
2306 
2307   Note: Calling this function asserts that setup is complete
2308           and sets the CeedOperator as immutable.
2309 
2310   @param op            CeedOperator to create element inverses
2311   @param[out] fdm_inv  CeedOperator to apply the action of a FDM based inverse
2312                          for each element
2313   @param request       Address of CeedRequest for non-blocking completion, else
2314                          @ref CEED_REQUEST_IMMEDIATE
2315 
2316   @return An error code: 0 - success, otherwise - failure
2317 
2318   @ref User
2319 **/
2320 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv,
2321                                         CeedRequest *request) {
2322   int ierr;
2323   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
2324 
2325   if (op->CreateFDMElementInverse) {
2326     // Backend version
2327     ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr);
2328     return CEED_ERROR_SUCCESS;
2329   } else {
2330     // Operator fallback
2331     CeedOperator op_fallback;
2332 
2333     ierr = CeedOperatorGetFallback(op, &op_fallback); CeedChk(ierr);
2334     if (op_fallback) {
2335       ierr = CeedOperatorCreateFDMElementInverse(op_fallback, fdm_inv, request);
2336       CeedChk(ierr);
2337       return CEED_ERROR_SUCCESS;
2338     }
2339   }
2340 
2341   // Default interface implementation
2342   Ceed ceed, ceed_parent;
2343   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
2344   ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr);
2345   ceed_parent = ceed_parent ? ceed_parent : ceed;
2346   CeedQFunction qf;
2347   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
2348 
2349   // Determine active input basis
2350   bool interp = false, grad = false;
2351   CeedBasis basis = NULL;
2352   CeedElemRestriction rstr = NULL;
2353   CeedOperatorField *op_fields;
2354   CeedQFunctionField *qf_fields;
2355   CeedInt num_input_fields;
2356   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL);
2357   CeedChk(ierr);
2358   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
2359   for (CeedInt i=0; i<num_input_fields; i++) {
2360     CeedVector vec;
2361     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
2362     if (vec == CEED_VECTOR_ACTIVE) {
2363       CeedEvalMode eval_mode;
2364       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
2365       interp = interp || eval_mode == CEED_EVAL_INTERP;
2366       grad = grad || eval_mode == CEED_EVAL_GRAD;
2367       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr);
2368       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
2369     }
2370   }
2371   if (!basis)
2372     // LCOV_EXCL_START
2373     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
2374   // LCOV_EXCL_STOP
2375   CeedSize l_size = 1;
2376   CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
2377   ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr);
2378   ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr);
2379   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr);
2380   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
2381   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
2382   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
2383   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
2384   ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr);
2385 
2386   // Build and diagonalize 1D Mass and Laplacian
2387   bool tensor_basis;
2388   ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr);
2389   if (!tensor_basis)
2390     // LCOV_EXCL_START
2391     return CeedError(ceed, CEED_ERROR_BACKEND,
2392                      "FDMElementInverse only supported for tensor "
2393                      "bases");
2394   // LCOV_EXCL_STOP
2395   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
2396   ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr);
2397   ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr);
2398   ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr);
2399   ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr);
2400   ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr);
2401   // -- Build matrices
2402   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
2403   ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr);
2404   ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr);
2405   ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr);
2406   ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim,
2407                               mass, laplace); CeedChk(ierr);
2408 
2409   // -- Diagonalize
2410   ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d);
2411   CeedChk(ierr);
2412   ierr = CeedFree(&mass); CeedChk(ierr);
2413   ierr = CeedFree(&laplace); CeedChk(ierr);
2414   for (CeedInt i=0; i<P_1d; i++)
2415     for (CeedInt j=0; j<P_1d; j++)
2416       fdm_interp[i+j*P_1d] = x[j+i*P_1d];
2417   ierr = CeedFree(&x); CeedChk(ierr);
2418 
2419   // Assemble QFunction
2420   CeedVector assembled;
2421   CeedElemRestriction rstr_qf;
2422   ierr =  CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled,
2423           &rstr_qf, request); CeedChk(ierr);
2424   CeedInt layout[3];
2425   ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr);
2426   ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr);
2427   CeedScalar max_norm = 0;
2428   ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
2429 
2430   // Calculate element averages
2431   CeedInt num_modes = (interp?1:0) + (grad?dim:0);
2432   CeedScalar *elem_avg;
2433   const CeedScalar *assembled_array, *q_weight_array;
2434   CeedVector q_weight;
2435   ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr);
2436   ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
2437                         CEED_VECTOR_NONE, q_weight); CeedChk(ierr);
2438   ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array);
2439   CeedChk(ierr);
2440   ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array);
2441   CeedChk(ierr);
2442   ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr);
2443   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
2444   for (CeedInt e=0; e<num_elem; e++) {
2445     CeedInt count = 0;
2446     for (CeedInt q=0; q<num_qpts; q++)
2447       for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++)
2448         if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) >
2449             qf_value_bound) {
2450           elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] /
2451                          q_weight_array[q];
2452           count++;
2453         }
2454     if (count) {
2455       elem_avg[e] /= count;
2456     } else {
2457       elem_avg[e] = 1.0;
2458     }
2459   }
2460   ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr);
2461   ierr = CeedVectorDestroy(&assembled); CeedChk(ierr);
2462   ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr);
2463   ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr);
2464 
2465   // Build FDM diagonal
2466   CeedVector q_data;
2467   CeedScalar *q_data_array, *fdm_diagonal;
2468   ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr);
2469   const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON;
2470   for (CeedInt c=0; c<num_comp; c++)
2471     for (CeedInt n=0; n<elem_size; n++) {
2472       if (interp)
2473         fdm_diagonal[c*elem_size + n] = 1.0;
2474       if (grad)
2475         for (CeedInt d=0; d<dim; d++) {
2476           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
2477           fdm_diagonal[c*elem_size + n] += lambda[i];
2478         }
2479       if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound)
2480         fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound;
2481     }
2482   ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data);
2483   CeedChk(ierr);
2484   ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr);
2485   ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array);
2486   CeedChk(ierr);
2487   for (CeedInt e=0; e<num_elem; e++)
2488     for (CeedInt c=0; c<num_comp; c++)
2489       for (CeedInt n=0; n<elem_size; n++)
2490         q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] *
2491             fdm_diagonal[c*elem_size + n]);
2492   ierr = CeedFree(&elem_avg); CeedChk(ierr);
2493   ierr = CeedFree(&fdm_diagonal); CeedChk(ierr);
2494   ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr);
2495 
2496   // Setup FDM operator
2497   // -- Basis
2498   CeedBasis fdm_basis;
2499   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2500   ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr);
2501   ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr);
2502   ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr);
2503   ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d,
2504                                  fdm_interp, grad_dummy, q_ref_dummy,
2505                                  q_weight_dummy, &fdm_basis); CeedChk(ierr);
2506   ierr = CeedFree(&fdm_interp); CeedChk(ierr);
2507   ierr = CeedFree(&grad_dummy); CeedChk(ierr);
2508   ierr = CeedFree(&q_ref_dummy); CeedChk(ierr);
2509   ierr = CeedFree(&q_weight_dummy); CeedChk(ierr);
2510   ierr = CeedFree(&lambda); CeedChk(ierr);
2511 
2512   // -- Restriction
2513   CeedElemRestriction rstr_qd_i;
2514   CeedInt strides[3] = {1, elem_size, elem_size*num_comp};
2515   ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size,
2516                                           num_comp, num_elem*num_comp*elem_size,
2517                                           strides, &rstr_qd_i); CeedChk(ierr);
2518   // -- QFunction
2519   CeedQFunction qf_fdm;
2520   ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm);
2521   CeedChk(ierr);
2522   ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP);
2523   CeedChk(ierr);
2524   ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE);
2525   CeedChk(ierr);
2526   ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP);
2527   CeedChk(ierr);
2528   ierr = CeedQFunctionSetUserFlopsEstimate(qf_fdm, num_comp); CeedChk(ierr);
2529   // -- QFunction context
2530   CeedInt *num_comp_data;
2531   ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr);
2532   num_comp_data[0] = num_comp;
2533   CeedQFunctionContext ctx_fdm;
2534   ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr);
2535   ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER,
2536                                      sizeof(*num_comp_data), num_comp_data);
2537   CeedChk(ierr);
2538   ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr);
2539   ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr);
2540   // -- Operator
2541   ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv);
2542   CeedChk(ierr);
2543   CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2544   CeedChk(ierr);
2545   CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED,
2546                        q_data); CeedChk(ierr);
2547   CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2548   CeedChk(ierr);
2549 
2550   // Cleanup
2551   ierr = CeedVectorDestroy(&q_data); CeedChk(ierr);
2552   ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr);
2553   ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr);
2554   ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr);
2555 
2556   return CEED_ERROR_SUCCESS;
2557 }
2558 
2559 /// @}
2560