xref: /libCEED/interface/ceed-preconditioning.c (revision 43e1b16f0c7c5dbfae04543c0675e255f265ff67)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <ceed-impl.h>
20 #include <math.h>
21 #include <stdbool.h>
22 #include <stdio.h>
23 #include <string.h>
24 
25 /// @file
26 /// Implementation of CeedOperator preconditioning interfaces
27 
28 /// ----------------------------------------------------------------------------
29 /// CeedOperator Library Internal Preconditioning Functions
30 /// ----------------------------------------------------------------------------
31 /// @addtogroup CeedOperatorDeveloper
32 /// @{
33 
34 /**
35   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced
36          CeedOperator functionality
37 
38   @param op  CeedOperator to create fallback for
39 
40   @return An error code: 0 - success, otherwise - failure
41 
42   @ref Developer
43 **/
44 int CeedOperatorCreateFallback(CeedOperator op) {
45   int ierr;
46 
47   // Fallback Ceed
48   const char *resource, *fallback_resource;
49   ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
50   ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
51   CeedChk(ierr);
52   if (!strcmp(resource, fallback_resource))
53     // LCOV_EXCL_START
54     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
55                      "Backend %s cannot create an operator"
56                      "fallback to resource %s", resource, fallback_resource);
57   // LCOV_EXCL_STOP
58 
59   // Fallback Ceed
60   Ceed ceed_ref;
61   if (!op->ceed->op_fallback_ceed) {
62     ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr);
63     ceed_ref->op_fallback_parent = op->ceed;
64     ceed_ref->Error = op->ceed->Error;
65     op->ceed->op_fallback_ceed = ceed_ref;
66   }
67   ceed_ref = op->ceed->op_fallback_ceed;
68 
69   // Clone Op
70   CeedOperator op_ref;
71   ierr = CeedCalloc(1, &op_ref); CeedChk(ierr);
72   memcpy(op_ref, op, sizeof(*op_ref));
73   op_ref->data = NULL;
74   op_ref->is_interface_setup = false;
75   op_ref->is_backend_setup = false;
76   op_ref->ceed = ceed_ref;
77   ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr);
78   op->op_fallback = op_ref;
79 
80   // Clone QF
81   CeedQFunction qf_ref;
82   ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr);
83   memcpy(qf_ref, (op->qf), sizeof(*qf_ref));
84   qf_ref->data = NULL;
85   qf_ref->ceed = ceed_ref;
86   ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr);
87   op_ref->qf = qf_ref;
88   op->qf_fallback = qf_ref;
89   return CEED_ERROR_SUCCESS;
90 }
91 
92 /**
93   @brief Select correct basis matrix pointer based on CeedEvalMode
94 
95   @param[in] eval_mode   Current basis evaluation mode
96   @param[in] identity    Pointer to identity matrix
97   @param[in] interp      Pointer to interpolation matrix
98   @param[in] grad        Pointer to gradient matrix
99   @param[out] basis_ptr  Basis pointer to set
100 
101   @return none
102 
103   @ref Developer
104 **/
105 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode,
106     const CeedScalar *identity, const CeedScalar *interp,
107     const CeedScalar *grad, const CeedScalar **basis_ptr) {
108   switch (eval_mode) {
109   case CEED_EVAL_NONE:
110     *basis_ptr = identity;
111     break;
112   case CEED_EVAL_INTERP:
113     *basis_ptr = interp;
114     break;
115   case CEED_EVAL_GRAD:
116     *basis_ptr = grad;
117     break;
118   case CEED_EVAL_WEIGHT:
119   case CEED_EVAL_DIV:
120   case CEED_EVAL_CURL:
121     break; // Caught by QF Assembly
122   }
123 }
124 
125 /**
126   @brief Create point block restriction for active operator field
127 
128   @param[in] rstr              Original CeedElemRestriction for active field
129   @param[out] pointblock_rstr  Address of the variable where the newly created
130                                  CeedElemRestriction will be stored
131 
132   @return An error code: 0 - success, otherwise - failure
133 
134   @ref Developer
135 **/
136 static int CeedOperatorCreateActivePointBlockRestriction(
137   CeedElemRestriction rstr,
138   CeedElemRestriction *pointblock_rstr) {
139   int ierr;
140   Ceed ceed;
141   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr);
142   const CeedInt *offsets;
143   ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets);
144   CeedChk(ierr);
145 
146   // Expand offsets
147   CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1,
148                                                       *pointblock_offsets;
149   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
150   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
151   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
152   ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr);
153   CeedInt shift = num_comp;
154   if (comp_stride != 1)
155     shift *= num_comp;
156   ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets);
157   CeedChk(ierr);
158   for (CeedInt i = 0; i < num_elem*elem_size; i++) {
159     pointblock_offsets[i] = offsets[i]*shift;
160     if (pointblock_offsets[i] > max)
161       max = pointblock_offsets[i];
162   }
163 
164   // Create new restriction
165   ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp,
166                                    1, max + num_comp*num_comp, CEED_MEM_HOST,
167                                    CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr);
168   CeedChk(ierr);
169 
170   // Cleanup
171   ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr);
172 
173   return CEED_ERROR_SUCCESS;
174 }
175 
176 /**
177   @brief Core logic for assembling operator diagonal or point block diagonal
178 
179   @param[in] op             CeedOperator to assemble point block diagonal
180   @param[in] request        Address of CeedRequest for non-blocking completion, else
181                               CEED_REQUEST_IMMEDIATE
182   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
183   @param[out] assembled     CeedVector to store assembled diagonal
184 
185   @return An error code: 0 - success, otherwise - failure
186 
187   @ref Developer
188 **/
189 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op,
190     CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
191   int ierr;
192   Ceed ceed;
193   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
194 
195   // Assemble QFunction
196   CeedQFunction qf;
197   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
198   CeedInt num_input_fields, num_output_fields;
199   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
200   CeedChk(ierr);
201   CeedVector assembled_qf;
202   CeedElemRestriction rstr;
203   ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op,  &assembled_qf,
204          &rstr, request); CeedChk(ierr);
205   CeedInt layout[3];
206   ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr);
207   ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr);
208   CeedScalar max_norm = 0;
209   ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
210 
211   // Determine active input basis
212   CeedOperatorField *op_fields;
213   CeedQFunctionField *qf_fields;
214   ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr);
215   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
216   CeedInt num_eval_mode_in = 0, num_comp, dim = 1;
217   CeedEvalMode *eval_mode_in = NULL;
218   CeedBasis basis_in = NULL;
219   CeedElemRestriction rstr_in = NULL;
220   for (CeedInt i=0; i<num_input_fields; i++) {
221     CeedVector vec;
222     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
223     if (vec == CEED_VECTOR_ACTIVE) {
224       CeedElemRestriction rstr;
225       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr);
226       ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr);
227       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
228       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
229       if (rstr_in && rstr_in != rstr)
230         // LCOV_EXCL_START
231         return CeedError(ceed, CEED_ERROR_BACKEND,
232                          "Multi-field non-composite operator diagonal assembly not supported");
233       // LCOV_EXCL_STOP
234       rstr_in = rstr;
235       CeedEvalMode eval_mode;
236       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
237       switch (eval_mode) {
238       case CEED_EVAL_NONE:
239       case CEED_EVAL_INTERP:
240         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
241         eval_mode_in[num_eval_mode_in] = eval_mode;
242         num_eval_mode_in += 1;
243         break;
244       case CEED_EVAL_GRAD:
245         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
246         for (CeedInt d=0; d<dim; d++)
247           eval_mode_in[num_eval_mode_in+d] = eval_mode;
248         num_eval_mode_in += dim;
249         break;
250       case CEED_EVAL_WEIGHT:
251       case CEED_EVAL_DIV:
252       case CEED_EVAL_CURL:
253         break; // Caught by QF Assembly
254       }
255     }
256   }
257 
258   // Determine active output basis
259   ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr);
260   ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr);
261   CeedInt num_eval_mode_out = 0;
262   CeedEvalMode *eval_mode_out = NULL;
263   CeedBasis basis_out = NULL;
264   CeedElemRestriction rstr_out = NULL;
265   for (CeedInt i=0; i<num_output_fields; i++) {
266     CeedVector vec;
267     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
268     if (vec == CEED_VECTOR_ACTIVE) {
269       CeedElemRestriction rstr;
270       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr);
271       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
272       if (rstr_out && rstr_out != rstr)
273         // LCOV_EXCL_START
274         return CeedError(ceed, CEED_ERROR_BACKEND,
275                          "Multi-field non-composite operator diagonal assembly not supported");
276       // LCOV_EXCL_STOP
277       rstr_out = rstr;
278       CeedEvalMode eval_mode;
279       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
280       switch (eval_mode) {
281       case CEED_EVAL_NONE:
282       case CEED_EVAL_INTERP:
283         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
284         eval_mode_out[num_eval_mode_out] = eval_mode;
285         num_eval_mode_out += 1;
286         break;
287       case CEED_EVAL_GRAD:
288         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
289         for (CeedInt d=0; d<dim; d++)
290           eval_mode_out[num_eval_mode_out+d] = eval_mode;
291         num_eval_mode_out += dim;
292         break;
293       case CEED_EVAL_WEIGHT:
294       case CEED_EVAL_DIV:
295       case CEED_EVAL_CURL:
296         break; // Caught by QF Assembly
297       }
298     }
299   }
300 
301   // Assemble point block diagonal restriction, if needed
302   CeedElemRestriction diag_rstr = rstr_out;
303   if (is_pointblock) {
304     ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr);
305     CeedChk(ierr);
306   }
307 
308   // Create diagonal vector
309   CeedVector elem_diag;
310   ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag);
311   CeedChk(ierr);
312 
313   // Assemble element operator diagonals
314   CeedScalar *elem_diag_array, *assembled_qf_array;
315   ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr);
316   ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array);
317   CeedChk(ierr);
318   ierr = CeedVectorGetArray(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
319   CeedChk(ierr);
320   CeedInt num_elem, num_nodes, num_qpts;
321   ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr);
322   ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr);
323   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
324   // Basis matrices
325   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
326   CeedScalar *identity = NULL;
327   bool evalNone = false;
328   for (CeedInt i=0; i<num_eval_mode_in; i++)
329     evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE);
330   for (CeedInt i=0; i<num_eval_mode_out; i++)
331     evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE);
332   if (evalNone) {
333     ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr);
334     for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++)
335       identity[i*num_nodes+i] = 1.0;
336   }
337   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
338   ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr);
339   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
340   ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr);
341   // Compute the diagonal of B^T D B
342   // Each element
343   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
344   for (CeedInt e=0; e<num_elem; e++) {
345     CeedInt d_out = -1;
346     // Each basis eval mode pair
347     for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) {
348       const CeedScalar *bt = NULL;
349       if (eval_mode_out[e_out] == CEED_EVAL_GRAD)
350         d_out += 1;
351       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out,
352                                   &grad_out[d_out*num_qpts*num_nodes], &bt);
353       CeedInt d_in = -1;
354       for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) {
355         const CeedScalar *b = NULL;
356         if (eval_mode_in[e_in] == CEED_EVAL_GRAD)
357           d_in += 1;
358         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in,
359                                     &grad_in[d_in*num_qpts*num_nodes], &b);
360         // Each component
361         for (CeedInt c_out=0; c_out<num_comp; c_out++)
362           // Each qpoint/node pair
363           for (CeedInt q=0; q<num_qpts; q++)
364             if (is_pointblock) {
365               // Point Block Diagonal
366               for (CeedInt c_in=0; c_in<num_comp; c_in++) {
367                 const CeedScalar qf_value =
368                   assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)*
369                                                      num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
370                 if (fabs(qf_value) > qf_value_bound)
371                   for (CeedInt n=0; n<num_nodes; n++)
372                     elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] +=
373                       bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
374               }
375             } else {
376               // Diagonal Only
377               const CeedScalar qf_value =
378                 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)*
379                                                    num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
380               if (fabs(qf_value) > qf_value_bound)
381                 for (CeedInt n=0; n<num_nodes; n++)
382                   elem_diag_array[(e*num_comp+c_out)*num_nodes+n] +=
383                     bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
384             }
385       }
386     }
387   }
388   ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr);
389   ierr = CeedVectorRestoreArray(assembled_qf, &assembled_qf_array); CeedChk(ierr);
390 
391   // Assemble local operator diagonal
392   ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag,
393                                   assembled, request); CeedChk(ierr);
394 
395   // Cleanup
396   if (is_pointblock) {
397     ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr);
398   }
399   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
400   ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr);
401   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
402   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
403   ierr = CeedFree(&identity); CeedChk(ierr);
404 
405   return CEED_ERROR_SUCCESS;
406 }
407 
408 /**
409   @brief Core logic for assembling composite operator diagonal
410 
411   @param[in] op             CeedOperator to assemble point block diagonal
412   @param[in] request        Address of CeedRequest for non-blocking completion, else
413                             CEED_REQUEST_IMMEDIATE
414   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
415   @param[out] assembled     CeedVector to store assembled diagonal
416 
417   @return An error code: 0 - success, otherwise - failure
418 
419   @ref Developer
420 **/
421 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(
422   CeedOperator op, CeedRequest *request, const bool is_pointblock,
423   CeedVector assembled) {
424   int ierr;
425   CeedInt num_sub;
426   CeedOperator *suboperators;
427   ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr);
428   ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr);
429   for (CeedInt i = 0; i < num_sub; i++) {
430     ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request,
431            is_pointblock, assembled); CeedChk(ierr);
432   }
433   return CEED_ERROR_SUCCESS;
434 }
435 
436 /**
437   @brief Build nonzero pattern for non-composite operator
438 
439   Users should generally use CeedOperatorLinearAssembleSymbolic()
440 
441   @param[in] op      CeedOperator to assemble nonzero pattern
442   @param[in] offset  Offset for number of entries
443   @param[out] rows   Row number for each entry
444   @param[out] cols   Column number for each entry
445 
446   @return An error code: 0 - success, otherwise - failure
447 
448   @ref Developer
449 **/
450 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset,
451     CeedInt *rows, CeedInt *cols) {
452   int ierr;
453   Ceed ceed = op->ceed;
454   if (op->is_composite)
455     // LCOV_EXCL_START
456     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
457                      "Composite operator not supported");
458   // LCOV_EXCL_STOP
459 
460   CeedElemRestriction rstr_in;
461   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr);
462   CeedInt num_elem, elem_size, num_nodes, num_comp;
463   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
464   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
465   ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr);
466   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
467   CeedInt layout_er[3];
468   ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr);
469 
470   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
471 
472   // Determine elem_dof relation
473   CeedVector index_vec;
474   ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr);
475   CeedScalar *array;
476   ierr = CeedVectorGetArray(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr);
477   for (CeedInt i = 0; i < num_nodes; ++i) {
478     array[i] = i;
479   }
480   ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr);
481   CeedVector elem_dof;
482   ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof);
483   CeedChk(ierr);
484   ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr);
485   CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec,
486                            elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
487   const CeedScalar *elem_dof_a;
488   ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a);
489   CeedChk(ierr);
490   ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr);
491 
492   // Determine i, j locations for element matrices
493   CeedInt count = 0;
494   for (int e = 0; e < num_elem; ++e) {
495     for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
496       for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
497         for (int i = 0; i < elem_size; ++i) {
498           for (int j = 0; j < elem_size; ++j) {
499             const CeedInt elem_dof_index_row = (i)*layout_er[0] +
500                                                (comp_out)*layout_er[1] + e*layout_er[2];
501             const CeedInt elem_dof_index_col = (j)*layout_er[0] +
502                                                (comp_in)*layout_er[1] + e*layout_er[2];
503 
504             const CeedInt row = elem_dof_a[elem_dof_index_row];
505             const CeedInt col = elem_dof_a[elem_dof_index_col];
506 
507             rows[offset + count] = row;
508             cols[offset + count] = col;
509             count++;
510           }
511         }
512       }
513     }
514   }
515   if (count != local_num_entries)
516     // LCOV_EXCL_START
517     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
518   // LCOV_EXCL_STOP
519   ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr);
520   ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr);
521 
522   return CEED_ERROR_SUCCESS;
523 }
524 
525 /**
526   @brief Assemble nonzero entries for non-composite operator
527 
528   Users should generally use CeedOperatorLinearAssemble()
529 
530   @param[in] op       CeedOperator to assemble
531   @param[out] offset  Offest for number of entries
532   @param[out] values  Values to assemble into matrix
533 
534   @return An error code: 0 - success, otherwise - failure
535 
536   @ref Developer
537 **/
538 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
539                                       CeedVector values) {
540   int ierr;
541   Ceed ceed = op->ceed;
542   if (op->is_composite)
543     // LCOV_EXCL_START
544     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
545                      "Composite operator not supported");
546   // LCOV_EXCL_STOP
547 
548   // Assemble QFunction
549   CeedQFunction qf;
550   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
551   CeedVector assembled_qf;
552   CeedElemRestriction rstr_q;
553   ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(
554            op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
555 
556   CeedInt qf_length;
557   ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr);
558 
559   CeedInt num_input_fields, num_output_fields;
560   CeedOperatorField *input_fields;
561   CeedOperatorField *output_fields;
562   ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields,
563                                &num_output_fields, &output_fields); CeedChk(ierr);
564 
565   // Determine active input basis
566   CeedQFunctionField *qf_fields;
567   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
568   CeedInt num_eval_mode_in = 0, dim = 1;
569   CeedEvalMode *eval_mode_in = NULL;
570   CeedBasis basis_in = NULL;
571   CeedElemRestriction rstr_in = NULL;
572   for (CeedInt i=0; i<num_input_fields; i++) {
573     CeedVector vec;
574     ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr);
575     if (vec == CEED_VECTOR_ACTIVE) {
576       ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in);
577       CeedChk(ierr);
578       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
579       ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in);
580       CeedChk(ierr);
581       CeedEvalMode eval_mode;
582       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
583       CeedChk(ierr);
584       switch (eval_mode) {
585       case CEED_EVAL_NONE:
586       case CEED_EVAL_INTERP:
587         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
588         eval_mode_in[num_eval_mode_in] = eval_mode;
589         num_eval_mode_in += 1;
590         break;
591       case CEED_EVAL_GRAD:
592         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
593         for (CeedInt d=0; d<dim; d++) {
594           eval_mode_in[num_eval_mode_in+d] = eval_mode;
595         }
596         num_eval_mode_in += dim;
597         break;
598       case CEED_EVAL_WEIGHT:
599       case CEED_EVAL_DIV:
600       case CEED_EVAL_CURL:
601         break; // Caught by QF Assembly
602       }
603     }
604   }
605 
606   // Determine active output basis
607   ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr);
608   CeedInt num_eval_mode_out = 0;
609   CeedEvalMode *eval_mode_out = NULL;
610   CeedBasis basis_out = NULL;
611   CeedElemRestriction rstr_out = NULL;
612   for (CeedInt i=0; i<num_output_fields; i++) {
613     CeedVector vec;
614     ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr);
615     if (vec == CEED_VECTOR_ACTIVE) {
616       ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out);
617       CeedChk(ierr);
618       ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out);
619       CeedChk(ierr);
620       CeedEvalMode eval_mode;
621       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
622       CeedChk(ierr);
623       switch (eval_mode) {
624       case CEED_EVAL_NONE:
625       case CEED_EVAL_INTERP:
626         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
627         eval_mode_out[num_eval_mode_out] = eval_mode;
628         num_eval_mode_out += 1;
629         break;
630       case CEED_EVAL_GRAD:
631         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
632         for (CeedInt d=0; d<dim; d++) {
633           eval_mode_out[num_eval_mode_out+d] = eval_mode;
634         }
635         num_eval_mode_out += dim;
636         break;
637       case CEED_EVAL_WEIGHT:
638       case CEED_EVAL_DIV:
639       case CEED_EVAL_CURL:
640         break; // Caught by QF Assembly
641       }
642     }
643   }
644 
645   if (num_eval_mode_in == 0 || num_eval_mode_out == 0)
646     // LCOV_EXCL_START
647     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
648                      "Cannot assemble operator with out inputs/outputs");
649   // LCOV_EXCL_STOP
650 
651   CeedInt num_elem, elem_size, num_qpts, num_comp;
652   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
653   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
654   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
655   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
656 
657   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
658 
659   // loop over elements and put in data structure
660   const CeedScalar *interp_in, *grad_in;
661   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
662   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
663 
664   const CeedScalar *assembled_qf_array;
665   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
666   CeedChk(ierr);
667 
668   CeedInt layout_qf[3];
669   ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr);
670   ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr);
671 
672   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
673   CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size];
674   CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size];
675   CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in *
676                                      num_qpts]; // logically 3-tensor
677   CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in];
678   CeedScalar elem_mat[elem_size * elem_size];
679   int count = 0;
680   CeedScalar *vals;
681   ierr = CeedVectorGetArray(values, CEED_MEM_HOST, &vals); CeedChk(ierr);
682   for (int e = 0; e < num_elem; ++e) {
683     for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
684       for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
685         for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) {
686           B_mat_in[ell] = 0.0;
687         }
688         for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) {
689           B_mat_out[ell] = 0.0;
690         }
691         // Store block-diagonal D matrix as collection of small dense blocks
692         for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) {
693           D_mat[ell] = 0.0;
694         }
695         // form element matrix itself (for each block component)
696         for (int ell = 0; ell < elem_size*elem_size; ++ell) {
697           elem_mat[ell] = 0.0;
698         }
699         for (int q = 0; q < num_qpts; ++q) {
700           for (int n = 0; n < elem_size; ++n) {
701             CeedInt d_in = -1;
702             for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) {
703               const int qq = num_eval_mode_in*q;
704               if (eval_mode_in[e_in] == CEED_EVAL_INTERP) {
705                 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n];
706               } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) {
707                 d_in += 1;
708                 B_mat_in[(qq+e_in)*elem_size + n] +=
709                   grad_in[(d_in*num_qpts+q) * elem_size + n];
710               } else {
711                 // LCOV_EXCL_START
712                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
713                 // LCOV_EXCL_STOP
714               }
715             }
716             CeedInt d_out = -1;
717             for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) {
718               const int qq = num_eval_mode_out*q;
719               if (eval_mode_out[e_out] == CEED_EVAL_INTERP) {
720                 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n];
721               } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) {
722                 d_out += 1;
723                 B_mat_out[(qq+e_out)*elem_size + n] +=
724                   grad_in[(d_out*num_qpts+q) * elem_size + n];
725               } else {
726                 // LCOV_EXCL_START
727                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
728                 // LCOV_EXCL_STOP
729               }
730             }
731           }
732           for (int ei = 0; ei < num_eval_mode_out; ++ei) {
733             for (int ej = 0; ej < num_eval_mode_in; ++ej) {
734               const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp
735                                           +comp_out;
736               const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] +
737                                 e*layout_qf[2];
738               D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index];
739             }
740           }
741         }
742         // Compute B^T*D
743         for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) {
744           BTD[ell] = 0.0;
745         }
746         for (int j = 0; j<elem_size; ++j) {
747           for (int q = 0; q<num_qpts; ++q) {
748             int qq = num_eval_mode_out*q;
749             for (int ei = 0; ei < num_eval_mode_in; ++ei) {
750               for (int ej = 0; ej < num_eval_mode_out; ++ej) {
751                 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] +=
752                   B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q];
753               }
754             }
755           }
756         }
757 
758         ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size,
759                                   elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr);
760 
761         // put element matrix in coordinate data structure
762         for (int i = 0; i < elem_size; ++i) {
763           for (int j = 0; j < elem_size; ++j) {
764             vals[offset + count] = elem_mat[i*elem_size + j];
765             count++;
766           }
767         }
768       }
769     }
770   }
771   if (count != local_num_entries)
772     // LCOV_EXCL_START
773     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
774   // LCOV_EXCL_STOP
775   ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr);
776 
777   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
778   CeedChk(ierr);
779   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
780   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
781   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
782 
783   return CEED_ERROR_SUCCESS;
784 }
785 
786 /**
787   @brief Count number of entries for assembled CeedOperator
788 
789   @param[in] op            CeedOperator to assemble
790   @param[out] num_entries  Number of entries in assembled representation
791 
792   @return An error code: 0 - success, otherwise - failure
793 
794   @ref Utility
795 **/
796 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op,
797     CeedInt *num_entries) {
798   int ierr;
799   CeedElemRestriction rstr;
800   CeedInt num_elem, elem_size, num_comp;
801 
802   if (op->is_composite)
803     // LCOV_EXCL_START
804     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
805                      "Composite operator not supported");
806   // LCOV_EXCL_STOP
807   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr);
808   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
809   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
810   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
811   *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
812 
813   return CEED_ERROR_SUCCESS;
814 }
815 
816 /**
817   @brief Common code for creating a multigrid coarse operator and level
818            transfer operators for a CeedOperator
819 
820   @param[in] op_fine       Fine grid operator
821   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
822   @param[in] rstr_coarse   Coarse grid restriction
823   @param[in] basis_coarse  Coarse grid active vector basis
824   @param[in] basis_c_to_f  Basis for coarse to fine interpolation
825   @param[out] op_coarse    Coarse grid operator
826   @param[out] op_prolong   Coarse to fine operator
827   @param[out] op_restrict  Fine to coarse operator
828 
829   @return An error code: 0 - success, otherwise - failure
830 
831   @ref Developer
832 **/
833 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
834     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
835     CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
836     CeedOperator *op_restrict) {
837   int ierr;
838   Ceed ceed;
839   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
840 
841   // Check for composite operator
842   bool is_composite;
843   ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr);
844   if (is_composite)
845     // LCOV_EXCL_START
846     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
847                      "Automatic multigrid setup for composite operators not supported");
848   // LCOV_EXCL_STOP
849 
850   // Coarse Grid
851   ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT,
852                             op_coarse); CeedChk(ierr);
853   CeedElemRestriction rstr_fine = NULL;
854   // -- Clone input fields
855   for (int i = 0; i < op_fine->qf->num_input_fields; i++) {
856     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
857       rstr_fine = op_fine->input_fields[i]->elem_restr;
858       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
859                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
860       CeedChk(ierr);
861     } else {
862       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
863                                   op_fine->input_fields[i]->elem_restr,
864                                   op_fine->input_fields[i]->basis,
865                                   op_fine->input_fields[i]->vec); CeedChk(ierr);
866     }
867   }
868   // -- Clone output fields
869   for (int i = 0; i < op_fine->qf->num_output_fields; i++) {
870     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
871       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
872                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
873       CeedChk(ierr);
874     } else {
875       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
876                                   op_fine->output_fields[i]->elem_restr,
877                                   op_fine->output_fields[i]->basis,
878                                   op_fine->output_fields[i]->vec); CeedChk(ierr);
879     }
880   }
881 
882   // Multiplicity vector
883   CeedVector mult_vec, mult_e_vec;
884   ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec);
885   CeedChk(ierr);
886   ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr);
887   ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine,
888                                   mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
889   ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr);
890   ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec,
891                                   CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
892   ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr);
893   ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr);
894 
895   // Restriction
896   CeedInt num_comp;
897   ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr);
898   CeedQFunction qf_restrict;
899   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict);
900   CeedChk(ierr);
901   CeedInt *num_comp_r_data;
902   ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr);
903   num_comp_r_data[0] = num_comp;
904   CeedQFunctionContext ctx_r;
905   ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr);
906   ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER,
907                                      sizeof(*num_comp_r_data), num_comp_r_data);
908   CeedChk(ierr);
909   ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr);
910   ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr);
911   ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE);
912   CeedChk(ierr);
913   ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE);
914   CeedChk(ierr);
915   ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp,
916                                 CEED_EVAL_INTERP); CeedChk(ierr);
917 
918   ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE,
919                             CEED_QFUNCTION_NONE, op_restrict);
920   CeedChk(ierr);
921   ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine,
922                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
923   CeedChk(ierr);
924   ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine,
925                               CEED_BASIS_COLLOCATED, mult_vec);
926   CeedChk(ierr);
927   ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f,
928                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
929 
930   // Prolongation
931   CeedQFunction qf_prolong;
932   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong);
933   CeedChk(ierr);
934   CeedInt *num_comp_p_data;
935   ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr);
936   num_comp_p_data[0] = num_comp;
937   CeedQFunctionContext ctx_p;
938   ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr);
939   ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER,
940                                      sizeof(*num_comp_p_data), num_comp_p_data);
941   CeedChk(ierr);
942   ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr);
943   ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr);
944   ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP);
945   CeedChk(ierr);
946   ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE);
947   CeedChk(ierr);
948   ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE);
949   CeedChk(ierr);
950 
951   ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE,
952                             CEED_QFUNCTION_NONE, op_prolong);
953   CeedChk(ierr);
954   ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f,
955                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
956   ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine,
957                               CEED_BASIS_COLLOCATED, mult_vec);
958   CeedChk(ierr);
959   ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine,
960                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
961   CeedChk(ierr);
962 
963   // Cleanup
964   ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr);
965   ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr);
966   ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr);
967   ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr);
968   return CEED_ERROR_SUCCESS;
969 }
970 
971 /**
972   @brief Build 1D mass matrix and Laplacian with perturbation
973 
974   @param[in] interp_1d    Interpolation matrix in one dimension
975   @param[in] grad_1d      Gradient matrix in one dimension
976   @param[in] q_weight_1d  Quadrature weights in one dimension
977   @param[in] P_1d         Number of basis nodes in one dimension
978   @param[in] Q_1d         Number of quadrature points in one dimension
979   @param[in] dim          Dimension of basis
980   @param[out] mass        Assembled mass matrix in one dimension
981   @param[out] laplace     Assembled perturbed Laplacian in one dimension
982 
983   @return An error code: 0 - success, otherwise - failure
984 
985   @ref Developer
986 **/
987 CeedPragmaOptimizeOff
988 static int CeedBuildMassLaplace(const CeedScalar *interp_1d,
989                                 const CeedScalar *grad_1d,
990                                 const CeedScalar *q_weight_1d, CeedInt P_1d,
991                                 CeedInt Q_1d, CeedInt dim,
992                                 CeedScalar *mass, CeedScalar *laplace) {
993 
994   for (CeedInt i=0; i<P_1d; i++)
995     for (CeedInt j=0; j<P_1d; j++) {
996       CeedScalar sum = 0.0;
997       for (CeedInt k=0; k<Q_1d; k++)
998         sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j];
999       mass[i+j*P_1d] = sum;
1000     }
1001   // -- Laplacian
1002   for (CeedInt i=0; i<P_1d; i++)
1003     for (CeedInt j=0; j<P_1d; j++) {
1004       CeedScalar sum = 0.0;
1005       for (CeedInt k=0; k<Q_1d; k++)
1006         sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j];
1007       laplace[i+j*P_1d] = sum;
1008     }
1009   CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4;
1010   for (CeedInt i=0; i<P_1d; i++)
1011     laplace[i+P_1d*i] += perturbation;
1012   return CEED_ERROR_SUCCESS;
1013 }
1014 CeedPragmaOptimizeOn
1015 
1016 /// @}
1017 
1018 /// ----------------------------------------------------------------------------
1019 /// CeedOperator Public API
1020 /// ----------------------------------------------------------------------------
1021 /// @addtogroup CeedOperatorUser
1022 /// @{
1023 
1024 /**
1025   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1026 
1027   This returns a CeedVector containing a matrix at each quadrature point
1028     providing the action of the CeedQFunction associated with the CeedOperator.
1029     The vector 'assembled' is of shape
1030       [num_elements, num_input_fields, num_output_fields, num_quad_points]
1031     and contains column-major matrices representing the action of the
1032     CeedQFunction for a corresponding quadrature point on an element. Inputs and
1033     outputs are in the order provided by the user when adding CeedOperator fields.
1034     For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and
1035     'v', provided in that order, would result in an assembled QFunction that
1036     consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting
1037     on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1038 
1039   Note: Calling this function asserts that setup is complete
1040           and sets the CeedOperator as immutable.
1041 
1042   @param op              CeedOperator to assemble CeedQFunction
1043   @param[out] assembled  CeedVector to store assembled CeedQFunction at
1044                            quadrature points
1045   @param[out] rstr       CeedElemRestriction for CeedVector containing assembled
1046                            CeedQFunction
1047   @param request         Address of CeedRequest for non-blocking completion, else
1048                            @ref CEED_REQUEST_IMMEDIATE
1049 
1050   @return An error code: 0 - success, otherwise - failure
1051 
1052   @ref User
1053 **/
1054 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled,
1055                                         CeedElemRestriction *rstr,
1056                                         CeedRequest *request) {
1057   int ierr;
1058   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1059 
1060   // Backend version
1061   if (op->LinearAssembleQFunction) {
1062     ierr = op->LinearAssembleQFunction(op, assembled, rstr, request);
1063     CeedChk(ierr);
1064   } else {
1065     // Fallback to reference Ceed
1066     if (!op->op_fallback) {
1067       ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1068     }
1069     // Assemble
1070     ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled,
1071            rstr, request); CeedChk(ierr);
1072   }
1073   return CEED_ERROR_SUCCESS;
1074 }
1075 
1076 /**
1077   @brief Assemble CeedQFunction and store result internall. Return copied
1078            references of stored data to the caller. Caller is responsible for
1079            ownership and destruction of the copied references. See also
1080            @ref CeedOperatorLinearAssembleQFunction
1081 
1082   @param op              CeedOperator to assemble CeedQFunction
1083   @param assembled       CeedVector to store assembled CeedQFunction at
1084                            quadrature points
1085   @param rstr            CeedElemRestriction for CeedVector containing assembled
1086                            CeedQFunction
1087   @param request         Address of CeedRequest for non-blocking completion, else
1088                            @ref CEED_REQUEST_IMMEDIATE
1089 
1090   @return An error code: 0 - success, otherwise - failure
1091 
1092   @ref User
1093 **/
1094 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op,
1095     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1096   int ierr;
1097   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1098 
1099   // Backend version
1100   if (op->LinearAssembleQFunctionUpdate) {
1101     if (op->has_qf_assembled) {
1102       ierr = op->LinearAssembleQFunctionUpdate(op, op->qf_assembled,
1103              op->qf_assembled_rstr, request);
1104     } else {
1105       ierr = op->LinearAssembleQFunction(op, &op->qf_assembled,
1106                                          &op->qf_assembled_rstr, request);
1107     }
1108     CeedChk(ierr);
1109     op->has_qf_assembled = true;
1110     // Copy reference to internally held copy
1111     *assembled = NULL;
1112     *rstr = NULL;
1113     ierr = CeedVectorReferenceCopy(op->qf_assembled, assembled); CeedChk(ierr);
1114     ierr = CeedElemRestrictionReferenceCopy(op->qf_assembled_rstr, rstr);
1115   } else {
1116     // Fallback to reference Ceed
1117     if (!op->op_fallback) {
1118       ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1119     }
1120     // Assemble
1121     ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback,
1122            assembled, rstr, request); CeedChk(ierr);
1123   }
1124   CeedChk(ierr);
1125 
1126   return CEED_ERROR_SUCCESS;
1127 }
1128 
1129 /**
1130   @brief Assemble the diagonal of a square linear CeedOperator
1131 
1132   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1133 
1134   Note: Currently only non-composite CeedOperators with a single field and
1135           composite CeedOperators with single field sub-operators are supported.
1136 
1137   Note: Calling this function asserts that setup is complete
1138           and sets the CeedOperator as immutable.
1139 
1140   @param op              CeedOperator to assemble CeedQFunction
1141   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1142   @param request         Address of CeedRequest for non-blocking completion, else
1143                            @ref CEED_REQUEST_IMMEDIATE
1144 
1145   @return An error code: 0 - success, otherwise - failure
1146 
1147   @ref User
1148 **/
1149 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled,
1150                                        CeedRequest *request) {
1151   int ierr;
1152   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1153 
1154   // Use backend version, if available
1155   if (op->LinearAssembleDiagonal) {
1156     ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr);
1157     return CEED_ERROR_SUCCESS;
1158   } else if (op->LinearAssembleAddDiagonal) {
1159     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1160     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1161     return CEED_ERROR_SUCCESS;
1162   } else {
1163     // Check for valid fallback resource
1164     const char *resource, *fallback_resource;
1165     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1166     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1167     CeedChk(ierr);
1168     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1169       // Fallback to reference Ceed
1170       if (!op->op_fallback) {
1171         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1172       }
1173       // Assemble
1174       ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request);
1175       CeedChk(ierr);
1176       return CEED_ERROR_SUCCESS;
1177     }
1178   }
1179 
1180   // Default interface implementation
1181   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1182   ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request);
1183   CeedChk(ierr);
1184   return CEED_ERROR_SUCCESS;
1185 }
1186 
1187 /**
1188   @brief Assemble the diagonal of a square linear CeedOperator
1189 
1190   This sums into a CeedVector the diagonal of a linear CeedOperator.
1191 
1192   Note: Currently only non-composite CeedOperators with a single field and
1193           composite CeedOperators with single field sub-operators are supported.
1194 
1195   Note: Calling this function asserts that setup is complete
1196           and sets the CeedOperator as immutable.
1197 
1198   @param op              CeedOperator to assemble CeedQFunction
1199   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1200   @param request         Address of CeedRequest for non-blocking completion, else
1201                            @ref CEED_REQUEST_IMMEDIATE
1202 
1203   @return An error code: 0 - success, otherwise - failure
1204 
1205   @ref User
1206 **/
1207 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled,
1208     CeedRequest *request) {
1209   int ierr;
1210   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1211 
1212   // Use backend version, if available
1213   if (op->LinearAssembleAddDiagonal) {
1214     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1215     return CEED_ERROR_SUCCESS;
1216   } else {
1217     // Check for valid fallback resource
1218     const char *resource, *fallback_resource;
1219     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1220     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1221     CeedChk(ierr);
1222     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1223       // Fallback to reference Ceed
1224       if (!op->op_fallback) {
1225         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1226       }
1227       // Assemble
1228       ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled,
1229              request); CeedChk(ierr);
1230       return CEED_ERROR_SUCCESS;
1231     }
1232   }
1233 
1234   // Default interface implementation
1235   bool is_composite;
1236   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1237   if (is_composite) {
1238     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1239            false, assembled); CeedChk(ierr);
1240     return CEED_ERROR_SUCCESS;
1241   } else {
1242     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled);
1243     CeedChk(ierr);
1244     return CEED_ERROR_SUCCESS;
1245   }
1246 }
1247 
1248 /**
1249   @brief Assemble the point block diagonal of a square linear CeedOperator
1250 
1251   This overwrites a CeedVector with the point block diagonal of a linear
1252     CeedOperator.
1253 
1254   Note: Currently only non-composite CeedOperators with a single field and
1255           composite CeedOperators with single field sub-operators are supported.
1256 
1257   Note: Calling this function asserts that setup is complete
1258           and sets the CeedOperator as immutable.
1259 
1260   @param op              CeedOperator to assemble CeedQFunction
1261   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1262                            diagonal, provided in row-major form with an
1263                            @a num_comp * @a num_comp block at each node. The dimensions
1264                            of this vector are derived from the active vector
1265                            for the CeedOperator. The array has shape
1266                            [nodes, component out, component in].
1267   @param request         Address of CeedRequest for non-blocking completion, else
1268                            CEED_REQUEST_IMMEDIATE
1269 
1270   @return An error code: 0 - success, otherwise - failure
1271 
1272   @ref User
1273 **/
1274 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op,
1275     CeedVector assembled, CeedRequest *request) {
1276   int ierr;
1277   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1278 
1279   // Use backend version, if available
1280   if (op->LinearAssemblePointBlockDiagonal) {
1281     ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request);
1282     CeedChk(ierr);
1283     return CEED_ERROR_SUCCESS;
1284   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1285     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1286     ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled,
1287            request); CeedChk(ierr);
1288     return CEED_ERROR_SUCCESS;
1289   } else {
1290     // Check for valid fallback resource
1291     const char *resource, *fallback_resource;
1292     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1293     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1294     CeedChk(ierr);
1295     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1296       // Fallback to reference Ceed
1297       if (!op->op_fallback) {
1298         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1299       }
1300       // Assemble
1301       ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback,
1302              assembled, request); CeedChk(ierr);
1303       return CEED_ERROR_SUCCESS;
1304     }
1305   }
1306 
1307   // Default interface implementation
1308   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1309   ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request);
1310   CeedChk(ierr);
1311   return CEED_ERROR_SUCCESS;
1312 }
1313 
1314 /**
1315   @brief Assemble the point block diagonal of a square linear CeedOperator
1316 
1317   This sums into a CeedVector with the point block diagonal of a linear
1318     CeedOperator.
1319 
1320   Note: Currently only non-composite CeedOperators with a single field and
1321           composite CeedOperators with single field sub-operators are supported.
1322 
1323   Note: Calling this function asserts that setup is complete
1324           and sets the CeedOperator as immutable.
1325 
1326   @param op              CeedOperator to assemble CeedQFunction
1327   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1328                            diagonal, provided in row-major form with an
1329                            @a num_comp * @a num_comp block at each node. The dimensions
1330                            of this vector are derived from the active vector
1331                            for the CeedOperator. The array has shape
1332                            [nodes, component out, component in].
1333   @param request         Address of CeedRequest for non-blocking completion, else
1334                            CEED_REQUEST_IMMEDIATE
1335 
1336   @return An error code: 0 - success, otherwise - failure
1337 
1338   @ref User
1339 **/
1340 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op,
1341     CeedVector assembled, CeedRequest *request) {
1342   int ierr;
1343   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1344 
1345   // Use backend version, if available
1346   if (op->LinearAssembleAddPointBlockDiagonal) {
1347     ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request);
1348     CeedChk(ierr);
1349     return CEED_ERROR_SUCCESS;
1350   } else {
1351     // Check for valid fallback resource
1352     const char *resource, *fallback_resource;
1353     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1354     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1355     CeedChk(ierr);
1356     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1357       // Fallback to reference Ceed
1358       if (!op->op_fallback) {
1359         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1360       }
1361       // Assemble
1362       ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback,
1363              assembled, request); CeedChk(ierr);
1364       return CEED_ERROR_SUCCESS;
1365     }
1366   }
1367 
1368   // Default interface implemenation
1369   bool is_composite;
1370   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1371   if (is_composite) {
1372     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1373            true, assembled); CeedChk(ierr);
1374     return CEED_ERROR_SUCCESS;
1375   } else {
1376     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled);
1377     CeedChk(ierr);
1378     return CEED_ERROR_SUCCESS;
1379   }
1380 }
1381 
1382 /**
1383    @brief Fully assemble the nonzero pattern of a linear operator.
1384 
1385    Expected to be used in conjunction with CeedOperatorLinearAssemble()
1386 
1387    The assembly routines use coordinate format, with num_entries tuples of the
1388    form (i, j, value) which indicate that value should be added to the matrix
1389    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1390    This function returns the number of entries and their (i, j) locations,
1391    while CeedOperatorLinearAssemble() provides the values in the same
1392    ordering.
1393 
1394    This will generally be slow unless your operator is low-order.
1395 
1396   Note: Calling this function asserts that setup is complete
1397           and sets the CeedOperator as immutable.
1398 
1399    @param[in]  op           CeedOperator to assemble
1400    @param[out] num_entries  Number of entries in coordinate nonzero pattern
1401    @param[out] rows         Row number for each entry
1402    @param[out] cols         Column number for each entry
1403 
1404    @ref User
1405 **/
1406 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedInt *num_entries,
1407                                        CeedInt **rows, CeedInt **cols) {
1408   int ierr;
1409   CeedInt num_suboperators, single_entries;
1410   CeedOperator *sub_operators;
1411   bool is_composite;
1412   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1413 
1414   // Use backend version, if available
1415   if (op->LinearAssembleSymbolic) {
1416     ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr);
1417     return CEED_ERROR_SUCCESS;
1418   } else {
1419     // Check for valid fallback resource
1420     const char *resource, *fallback_resource;
1421     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1422     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1423     CeedChk(ierr);
1424     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1425       // Fallback to reference Ceed
1426       if (!op->op_fallback) {
1427         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1428       }
1429       // Assemble
1430       ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows,
1431              cols); CeedChk(ierr);
1432       return CEED_ERROR_SUCCESS;
1433     }
1434   }
1435 
1436   // Default interface implementation
1437 
1438   // count entries and allocate rows, cols arrays
1439   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1440   *num_entries = 0;
1441   if (is_composite) {
1442     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1443     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1444     for (int k = 0; k < num_suboperators; ++k) {
1445       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1446              &single_entries); CeedChk(ierr);
1447       *num_entries += single_entries;
1448     }
1449   } else {
1450     ierr = CeedSingleOperatorAssemblyCountEntries(op,
1451            &single_entries); CeedChk(ierr);
1452     *num_entries += single_entries;
1453   }
1454   ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr);
1455   ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr);
1456 
1457   // assemble nonzero locations
1458   CeedInt offset = 0;
1459   if (is_composite) {
1460     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1461     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1462     for (int k = 0; k < num_suboperators; ++k) {
1463       ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows,
1464              *cols); CeedChk(ierr);
1465       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1466              &single_entries);
1467       CeedChk(ierr);
1468       offset += single_entries;
1469     }
1470   } else {
1471     ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols);
1472     CeedChk(ierr);
1473   }
1474 
1475   return CEED_ERROR_SUCCESS;
1476 }
1477 
1478 /**
1479    @brief Fully assemble the nonzero entries of a linear operator.
1480 
1481    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic()
1482 
1483    The assembly routines use coordinate format, with num_entries tuples of the
1484    form (i, j, value) which indicate that value should be added to the matrix
1485    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1486    This function returns the values of the nonzero entries to be added, their
1487    (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1488 
1489    This will generally be slow unless your operator is low-order.
1490 
1491   Note: Calling this function asserts that setup is complete
1492           and sets the CeedOperator as immutable.
1493 
1494    @param[in]  op      CeedOperator to assemble
1495    @param[out] values  Values to assemble into matrix
1496 
1497    @ref User
1498 **/
1499 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1500   int ierr;
1501   CeedInt num_suboperators, single_entries = 0;
1502   CeedOperator *sub_operators;
1503   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1504 
1505   // Use backend version, if available
1506   if (op->LinearAssemble) {
1507     ierr = op->LinearAssemble(op, values); CeedChk(ierr);
1508     return CEED_ERROR_SUCCESS;
1509   } else {
1510     // Check for valid fallback resource
1511     const char *resource, *fallback_resource;
1512     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1513     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1514     CeedChk(ierr);
1515     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1516       // Fallback to reference Ceed
1517       if (!op->op_fallback) {
1518         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1519       }
1520       // Assemble
1521       ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr);
1522       return CEED_ERROR_SUCCESS;
1523     }
1524   }
1525 
1526   // Default interface implementation
1527   bool is_composite;
1528   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1529 
1530   CeedInt offset = 0;
1531   if (is_composite) {
1532     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1533     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1534     for (int k = 0; k < num_suboperators; ++k) {
1535       ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values);
1536       CeedChk(ierr);
1537       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1538              &single_entries);
1539       CeedChk(ierr);
1540       offset += single_entries;
1541     }
1542   } else {
1543     ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr);
1544   }
1545 
1546   return CEED_ERROR_SUCCESS;
1547 }
1548 
1549 /**
1550   @brief Create a multigrid coarse operator and level transfer operators
1551            for a CeedOperator, creating the prolongation basis from the
1552            fine and coarse grid interpolation
1553 
1554   Note: Calling this function asserts that setup is complete
1555           and sets the CeedOperator as immutable.
1556 
1557   @param[in] op_fine       Fine grid operator
1558   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
1559   @param[in] rstr_coarse   Coarse grid restriction
1560   @param[in] basis_coarse  Coarse grid active vector basis
1561   @param[out] op_coarse    Coarse grid operator
1562   @param[out] op_prolong   Coarse to fine operator
1563   @param[out] op_restrict  Fine to coarse operator
1564 
1565   @return An error code: 0 - success, otherwise - failure
1566 
1567   @ref User
1568 **/
1569 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine,
1570                                      CeedVector p_mult_fine,
1571                                      CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1572                                      CeedOperator *op_coarse, CeedOperator *op_prolong,
1573                                      CeedOperator *op_restrict) {
1574   int ierr;
1575   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
1576   Ceed ceed;
1577   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1578 
1579   // Check for compatible quadrature spaces
1580   CeedBasis basis_fine;
1581   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1582   CeedInt Q_f, Q_c;
1583   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1584   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1585   if (Q_f != Q_c)
1586     // LCOV_EXCL_START
1587     return CeedError(ceed, CEED_ERROR_DIMENSION,
1588                      "Bases must have compatible quadrature spaces");
1589   // LCOV_EXCL_STOP
1590 
1591   // Coarse to fine basis
1592   CeedInt P_f, P_c, Q = Q_f;
1593   bool is_tensor_f, is_tensor_c;
1594   ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr);
1595   ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr);
1596   CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau;
1597   if (is_tensor_f && is_tensor_c) {
1598     ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr);
1599     ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr);
1600     ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr);
1601   } else if (!is_tensor_f && !is_tensor_c) {
1602     ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr);
1603     ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr);
1604   } else {
1605     // LCOV_EXCL_START
1606     return CeedError(ceed, CEED_ERROR_MINOR,
1607                      "Bases must both be tensor or non-tensor");
1608     // LCOV_EXCL_STOP
1609   }
1610 
1611   ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr);
1612   ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr);
1613   ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr);
1614   ierr = CeedMalloc(Q, &tau); CeedChk(ierr);
1615   if (is_tensor_f) {
1616     memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]);
1617     memcpy(interp_c, basis_coarse->interp_1d,
1618            Q*P_c*sizeof basis_coarse->interp_1d[0]);
1619   } else {
1620     memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]);
1621     memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]);
1622   }
1623 
1624   // -- QR Factorization, interp_f = Q R
1625   ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr);
1626 
1627   // -- Apply Qtranspose, interp_c = Qtranspose interp_c
1628   ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE,
1629                                Q, P_c, P_f, P_c, 1); CeedChk(ierr);
1630 
1631   // -- Apply Rinv, interp_c_to_f = Rinv interp_c
1632   for (CeedInt j=0; j<P_c; j++) { // Column j
1633     interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1];
1634     for (CeedInt i=P_f-2; i>=0; i--) { // Row i
1635       interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i];
1636       for (CeedInt k=i+1; k<P_f; k++)
1637         interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k];
1638       interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i];
1639     }
1640   }
1641   ierr = CeedFree(&tau); CeedChk(ierr);
1642   ierr = CeedFree(&interp_c); CeedChk(ierr);
1643   ierr = CeedFree(&interp_f); CeedChk(ierr);
1644 
1645   // Complete with interp_c_to_f versions of code
1646   if (is_tensor_f) {
1647     ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine,
1648            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
1649     CeedChk(ierr);
1650   } else {
1651     ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine,
1652            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
1653     CeedChk(ierr);
1654   }
1655 
1656   // Cleanup
1657   ierr = CeedFree(&interp_c_to_f); CeedChk(ierr);
1658   return CEED_ERROR_SUCCESS;
1659 }
1660 
1661 /**
1662   @brief Create a multigrid coarse operator and level transfer operators
1663            for a CeedOperator with a tensor basis for the active basis
1664 
1665   Note: Calling this function asserts that setup is complete
1666           and sets the CeedOperator as immutable.
1667 
1668   @param[in] op_fine        Fine grid operator
1669   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1670   @param[in] rstr_coarse    Coarse grid restriction
1671   @param[in] basis_coarse   Coarse grid active vector basis
1672   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1673   @param[out] op_coarse     Coarse grid operator
1674   @param[out] op_prolong    Coarse to fine operator
1675   @param[out] op_restrict   Fine to coarse operator
1676 
1677   @return An error code: 0 - success, otherwise - failure
1678 
1679   @ref User
1680 **/
1681 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine,
1682     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1683     const CeedScalar *interp_c_to_f, CeedOperator *op_coarse,
1684     CeedOperator *op_prolong, CeedOperator *op_restrict) {
1685   int ierr;
1686   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
1687   Ceed ceed;
1688   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1689 
1690   // Check for compatible quadrature spaces
1691   CeedBasis basis_fine;
1692   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1693   CeedInt Q_f, Q_c;
1694   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1695   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1696   if (Q_f != Q_c)
1697     // LCOV_EXCL_START
1698     return CeedError(ceed, CEED_ERROR_DIMENSION,
1699                      "Bases must have compatible quadrature spaces");
1700   // LCOV_EXCL_STOP
1701 
1702   // Coarse to fine basis
1703   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
1704   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
1705   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
1706   ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr);
1707   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
1708   CeedChk(ierr);
1709   P_1d_c = dim == 1 ? num_nodes_c :
1710            dim == 2 ? sqrt(num_nodes_c) :
1711            cbrt(num_nodes_c);
1712   CeedScalar *q_ref, *q_weight, *grad;
1713   ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr);
1714   ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr);
1715   ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr);
1716   CeedBasis basis_c_to_f;
1717   ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f,
1718                                  interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
1719   CeedChk(ierr);
1720   ierr = CeedFree(&q_ref); CeedChk(ierr);
1721   ierr = CeedFree(&q_weight); CeedChk(ierr);
1722   ierr = CeedFree(&grad); CeedChk(ierr);
1723 
1724   // Core code
1725   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
1726                                           basis_coarse, basis_c_to_f, op_coarse,
1727                                           op_prolong, op_restrict);
1728   CeedChk(ierr);
1729   return CEED_ERROR_SUCCESS;
1730 }
1731 
1732 /**
1733   @brief Create a multigrid coarse operator and level transfer operators
1734            for a CeedOperator with a non-tensor basis for the active vector
1735 
1736   Note: Calling this function asserts that setup is complete
1737           and sets the CeedOperator as immutable.
1738 
1739   @param[in] op_fine        Fine grid operator
1740   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1741   @param[in] rstr_coarse    Coarse grid restriction
1742   @param[in] basis_coarse   Coarse grid active vector basis
1743   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1744   @param[out] op_coarse     Coarse grid operator
1745   @param[out] op_prolong    Coarse to fine operator
1746   @param[out] op_restrict   Fine to coarse operator
1747 
1748   @return An error code: 0 - success, otherwise - failure
1749 
1750   @ref User
1751 **/
1752 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine,
1753                                        CeedVector p_mult_fine,
1754                                        CeedElemRestriction rstr_coarse,
1755                                        CeedBasis basis_coarse,
1756                                        const CeedScalar *interp_c_to_f,
1757                                        CeedOperator *op_coarse,
1758                                        CeedOperator *op_prolong,
1759                                        CeedOperator *op_restrict) {
1760   int ierr;
1761   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
1762   Ceed ceed;
1763   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1764 
1765   // Check for compatible quadrature spaces
1766   CeedBasis basis_fine;
1767   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1768   CeedInt Q_f, Q_c;
1769   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1770   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1771   if (Q_f != Q_c)
1772     // LCOV_EXCL_START
1773     return CeedError(ceed, CEED_ERROR_DIMENSION,
1774                      "Bases must have compatible quadrature spaces");
1775   // LCOV_EXCL_STOP
1776 
1777   // Coarse to fine basis
1778   CeedElemTopology topo;
1779   ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr);
1780   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
1781   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
1782   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
1783   ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr);
1784   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
1785   CeedChk(ierr);
1786   CeedScalar *q_ref, *q_weight, *grad;
1787   ierr = CeedCalloc(num_nodes_f, &q_ref); CeedChk(ierr);
1788   ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr);
1789   ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr);
1790   CeedBasis basis_c_to_f;
1791   ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f,
1792                            interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
1793   CeedChk(ierr);
1794   ierr = CeedFree(&q_ref); CeedChk(ierr);
1795   ierr = CeedFree(&q_weight); CeedChk(ierr);
1796   ierr = CeedFree(&grad); CeedChk(ierr);
1797 
1798   // Core code
1799   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
1800                                           basis_coarse, basis_c_to_f, op_coarse,
1801                                           op_prolong, op_restrict);
1802   CeedChk(ierr);
1803   return CEED_ERROR_SUCCESS;
1804 }
1805 
1806 /**
1807   @brief Build a FDM based approximate inverse for each element for a
1808            CeedOperator
1809 
1810   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization
1811     Method based approximate inverse. This function obtains the simultaneous
1812     diagonalization for the 1D mass and Laplacian operators,
1813       M = V^T V, K = V^T S V.
1814     The assembled QFunction is used to modify the eigenvalues from simultaneous
1815     diagonalization and obtain an approximate inverse of the form
1816       V^T S^hat V. The CeedOperator must be linear and non-composite. The
1817     associated CeedQFunction must therefore also be linear.
1818 
1819   Note: Calling this function asserts that setup is complete
1820           and sets the CeedOperator as immutable.
1821 
1822   @param op            CeedOperator to create element inverses
1823   @param[out] fdm_inv  CeedOperator to apply the action of a FDM based inverse
1824                          for each element
1825   @param request       Address of CeedRequest for non-blocking completion, else
1826                          @ref CEED_REQUEST_IMMEDIATE
1827 
1828   @return An error code: 0 - success, otherwise - failure
1829 
1830   @ref Backend
1831 **/
1832 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv,
1833                                         CeedRequest *request) {
1834   int ierr;
1835   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1836 
1837   // Use backend version, if available
1838   if (op->CreateFDMElementInverse) {
1839     ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr);
1840     return CEED_ERROR_SUCCESS;
1841   } else {
1842     // Check for valid fallback resource
1843     const char *resource, *fallback_resource;
1844     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1845     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1846     CeedChk(ierr);
1847     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1848       // Fallback to reference Ceed
1849       if (!op->op_fallback) {
1850         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1851       }
1852       // Assemble
1853       ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request);
1854       CeedChk(ierr);
1855       return CEED_ERROR_SUCCESS;
1856     }
1857   }
1858 
1859   // Interface implementation
1860   Ceed ceed, ceed_parent;
1861   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
1862   ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr);
1863   ceed_parent = ceed_parent ? ceed_parent : ceed;
1864   CeedQFunction qf;
1865   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
1866 
1867   // Determine active input basis
1868   bool interp = false, grad = false;
1869   CeedBasis basis = NULL;
1870   CeedElemRestriction rstr = NULL;
1871   CeedOperatorField *op_fields;
1872   CeedQFunctionField *qf_fields;
1873   CeedInt num_input_fields;
1874   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL);
1875   CeedChk(ierr);
1876   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
1877   for (CeedInt i=0; i<num_input_fields; i++) {
1878     CeedVector vec;
1879     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
1880     if (vec == CEED_VECTOR_ACTIVE) {
1881       CeedEvalMode eval_mode;
1882       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
1883       interp = interp || eval_mode == CEED_EVAL_INTERP;
1884       grad = grad || eval_mode == CEED_EVAL_GRAD;
1885       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr);
1886       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
1887     }
1888   }
1889   if (!basis)
1890     // LCOV_EXCL_START
1891     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
1892   // LCOV_EXCL_STOP
1893   CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1,
1894                                                 l_size = 1;
1895   ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr);
1896   ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr);
1897   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr);
1898   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
1899   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
1900   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
1901   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
1902   ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr);
1903 
1904   // Build and diagonalize 1D Mass and Laplacian
1905   bool tensor_basis;
1906   ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr);
1907   if (!tensor_basis)
1908     // LCOV_EXCL_START
1909     return CeedError(ceed, CEED_ERROR_BACKEND,
1910                      "FDMElementInverse only supported for tensor "
1911                      "bases");
1912   // LCOV_EXCL_STOP
1913   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
1914   ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr);
1915   ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr);
1916   ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr);
1917   ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr);
1918   ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr);
1919   // -- Build matrices
1920   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
1921   ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr);
1922   ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr);
1923   ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr);
1924   ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim,
1925                               mass, laplace); CeedChk(ierr);
1926 
1927   // -- Diagonalize
1928   ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d);
1929   CeedChk(ierr);
1930   ierr = CeedFree(&mass); CeedChk(ierr);
1931   ierr = CeedFree(&laplace); CeedChk(ierr);
1932   for (CeedInt i=0; i<P_1d; i++)
1933     for (CeedInt j=0; j<P_1d; j++)
1934       fdm_interp[i+j*P_1d] = x[j+i*P_1d];
1935   ierr = CeedFree(&x); CeedChk(ierr);
1936 
1937   // Assemble QFunction
1938   CeedVector assembled;
1939   CeedElemRestriction rstr_qf;
1940   ierr =  CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled,
1941           &rstr_qf, request); CeedChk(ierr);
1942   CeedInt layout[3];
1943   ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr);
1944   ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr);
1945   CeedScalar max_norm = 0;
1946   ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
1947 
1948   // Calculate element averages
1949   CeedInt num_modes = (interp?1:0) + (grad?dim:0);
1950   CeedScalar *elem_avg;
1951   const CeedScalar *assembled_array, *q_weight_array;
1952   CeedVector q_weight;
1953   ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr);
1954   ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
1955                         CEED_VECTOR_NONE, q_weight); CeedChk(ierr);
1956   ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array);
1957   CeedChk(ierr);
1958   ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array);
1959   CeedChk(ierr);
1960   ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr);
1961   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
1962   for (CeedInt e=0; e<num_elem; e++) {
1963     CeedInt count = 0;
1964     for (CeedInt q=0; q<num_qpts; q++)
1965       for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++)
1966         if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) >
1967             qf_value_bound) {
1968           elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] /
1969                          q_weight_array[q];
1970           count++;
1971         }
1972     if (count) {
1973       elem_avg[e] /= count;
1974     } else {
1975       elem_avg[e] = 1.0;
1976     }
1977   }
1978   ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr);
1979   ierr = CeedVectorDestroy(&assembled); CeedChk(ierr);
1980   ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr);
1981   ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr);
1982 
1983   // Build FDM diagonal
1984   CeedVector q_data;
1985   CeedScalar *q_data_array, *fdm_diagonal;
1986   ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr);
1987   const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON;
1988   for (CeedInt c=0; c<num_comp; c++)
1989     for (CeedInt n=0; n<elem_size; n++) {
1990       if (interp)
1991         fdm_diagonal[c*elem_size + n] = 1.0;
1992       if (grad)
1993         for (CeedInt d=0; d<dim; d++) {
1994           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
1995           fdm_diagonal[c*elem_size + n] += lambda[i];
1996         }
1997       if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound)
1998         fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound;
1999     }
2000   ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data);
2001   CeedChk(ierr);
2002   ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr);
2003   ierr = CeedVectorGetArray(q_data, CEED_MEM_HOST, &q_data_array); CeedChk(ierr);
2004   for (CeedInt e=0; e<num_elem; e++)
2005     for (CeedInt c=0; c<num_comp; c++)
2006       for (CeedInt n=0; n<elem_size; n++)
2007         q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] *
2008             fdm_diagonal[c*elem_size + n]);
2009   ierr = CeedFree(&elem_avg); CeedChk(ierr);
2010   ierr = CeedFree(&fdm_diagonal); CeedChk(ierr);
2011   ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr);
2012 
2013   // Setup FDM operator
2014   // -- Basis
2015   CeedBasis fdm_basis;
2016   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2017   ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr);
2018   ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr);
2019   ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr);
2020   ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d,
2021                                  fdm_interp, grad_dummy, q_ref_dummy,
2022                                  q_weight_dummy, &fdm_basis); CeedChk(ierr);
2023   ierr = CeedFree(&fdm_interp); CeedChk(ierr);
2024   ierr = CeedFree(&grad_dummy); CeedChk(ierr);
2025   ierr = CeedFree(&q_ref_dummy); CeedChk(ierr);
2026   ierr = CeedFree(&q_weight_dummy); CeedChk(ierr);
2027   ierr = CeedFree(&lambda); CeedChk(ierr);
2028 
2029   // -- Restriction
2030   CeedElemRestriction rstr_qd_i;
2031   CeedInt strides[3] = {1, elem_size, elem_size*num_comp};
2032   ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size,
2033                                           num_comp, num_elem*num_comp*elem_size,
2034                                           strides, &rstr_qd_i); CeedChk(ierr);
2035   // -- QFunction
2036   CeedQFunction qf_fdm;
2037   ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm);
2038   CeedChk(ierr);
2039   ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP);
2040   CeedChk(ierr);
2041   ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE);
2042   CeedChk(ierr);
2043   ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP);
2044   CeedChk(ierr);
2045   // -- QFunction context
2046   CeedInt *num_comp_data;
2047   ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr);
2048   num_comp_data[0] = num_comp;
2049   CeedQFunctionContext ctx_fdm;
2050   ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr);
2051   ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER,
2052                                      sizeof(*num_comp_data), num_comp_data);
2053   CeedChk(ierr);
2054   ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr);
2055   ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr);
2056   // -- Operator
2057   ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv);
2058   CeedChk(ierr);
2059   CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2060   CeedChk(ierr);
2061   CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED,
2062                        q_data); CeedChk(ierr);
2063   CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2064   CeedChk(ierr);
2065 
2066   // Cleanup
2067   ierr = CeedVectorDestroy(&q_data); CeedChk(ierr);
2068   ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr);
2069   ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr);
2070   ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr);
2071 
2072   return CEED_ERROR_SUCCESS;
2073 }
2074 
2075 /// @}
2076