xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision e79b91d9f61753a734e6e21c778d772fcdbcc265)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <ceed-impl.h>
20 #include <math.h>
21 #include <stdbool.h>
22 #include <stdio.h>
23 #include <string.h>
24 
25 /// @file
26 /// Implementation of CeedOperator preconditioning interfaces
27 
28 /// ----------------------------------------------------------------------------
29 /// CeedOperator Library Internal Preconditioning Functions
30 /// ----------------------------------------------------------------------------
31 /// @addtogroup CeedOperatorDeveloper
32 /// @{
33 
34 /**
35   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced
36          CeedOperator functionality
37 
38   @param op  CeedOperator to create fallback for
39 
40   @return An error code: 0 - success, otherwise - failure
41 
42   @ref Developer
43 **/
44 int CeedOperatorCreateFallback(CeedOperator op) {
45   int ierr;
46 
47   // Fallback Ceed
48   const char *resource, *fallback_resource;
49   ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
50   ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
51   CeedChk(ierr);
52   if (!strcmp(resource, fallback_resource))
53     // LCOV_EXCL_START
54     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
55                      "Backend %s cannot create an operator"
56                      "fallback to resource %s", resource, fallback_resource);
57   // LCOV_EXCL_STOP
58 
59   // Fallback Ceed
60   Ceed ceed_ref;
61   if (!op->ceed->op_fallback_ceed) {
62     ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr);
63     ceed_ref->op_fallback_parent = op->ceed;
64     ceed_ref->Error = op->ceed->Error;
65     op->ceed->op_fallback_ceed = ceed_ref;
66   }
67   ceed_ref = op->ceed->op_fallback_ceed;
68 
69   // Clone Op
70   CeedOperator op_ref;
71   ierr = CeedCalloc(1, &op_ref); CeedChk(ierr);
72   memcpy(op_ref, op, sizeof(*op_ref));
73   op_ref->data = NULL;
74   op_ref->is_interface_setup = false;
75   op_ref->is_backend_setup = false;
76   op_ref->ceed = ceed_ref;
77   ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr);
78   op->op_fallback = op_ref;
79 
80   // Clone QF
81   CeedQFunction qf_ref;
82   ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr);
83   memcpy(qf_ref, (op->qf), sizeof(*qf_ref));
84   qf_ref->data = NULL;
85   qf_ref->ceed = ceed_ref;
86   ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr);
87   op_ref->qf = qf_ref;
88   op->qf_fallback = qf_ref;
89   return CEED_ERROR_SUCCESS;
90 }
91 
92 /**
93   @brief Select correct basis matrix pointer based on CeedEvalMode
94 
95   @param[in] eval_mode   Current basis evaluation mode
96   @param[in] identity    Pointer to identity matrix
97   @param[in] interp      Pointer to interpolation matrix
98   @param[in] grad        Pointer to gradient matrix
99   @param[out] basis_ptr  Basis pointer to set
100 
101   @ref Developer
102 **/
103 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode,
104     const CeedScalar *identity, const CeedScalar *interp,
105     const CeedScalar *grad, const CeedScalar **basis_ptr) {
106   switch (eval_mode) {
107   case CEED_EVAL_NONE:
108     *basis_ptr = identity;
109     break;
110   case CEED_EVAL_INTERP:
111     *basis_ptr = interp;
112     break;
113   case CEED_EVAL_GRAD:
114     *basis_ptr = grad;
115     break;
116   case CEED_EVAL_WEIGHT:
117   case CEED_EVAL_DIV:
118   case CEED_EVAL_CURL:
119     break; // Caught by QF Assembly
120   }
121 }
122 
123 /**
124   @brief Create point block restriction for active operator field
125 
126   @param[in] rstr              Original CeedElemRestriction for active field
127   @param[out] pointblock_rstr  Address of the variable where the newly created
128                                  CeedElemRestriction will be stored
129 
130   @return An error code: 0 - success, otherwise - failure
131 
132   @ref Developer
133 **/
134 static int CeedOperatorCreateActivePointBlockRestriction(
135   CeedElemRestriction rstr,
136   CeedElemRestriction *pointblock_rstr) {
137   int ierr;
138   Ceed ceed;
139   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr);
140   const CeedInt *offsets;
141   ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets);
142   CeedChk(ierr);
143 
144   // Expand offsets
145   CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1,
146                                                       *pointblock_offsets;
147   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
148   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
149   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
150   ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr);
151   CeedInt shift = num_comp;
152   if (comp_stride != 1)
153     shift *= num_comp;
154   ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets);
155   CeedChk(ierr);
156   for (CeedInt i = 0; i < num_elem*elem_size; i++) {
157     pointblock_offsets[i] = offsets[i]*shift;
158     if (pointblock_offsets[i] > max)
159       max = pointblock_offsets[i];
160   }
161 
162   // Create new restriction
163   ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp,
164                                    1, max + num_comp*num_comp, CEED_MEM_HOST,
165                                    CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr);
166   CeedChk(ierr);
167 
168   // Cleanup
169   ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr);
170 
171   return CEED_ERROR_SUCCESS;
172 }
173 
174 /**
175   @brief Core logic for assembling operator diagonal or point block diagonal
176 
177   @param[in] op             CeedOperator to assemble point block diagonal
178   @param[in] request        Address of CeedRequest for non-blocking completion, else
179                               CEED_REQUEST_IMMEDIATE
180   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
181   @param[out] assembled     CeedVector to store assembled diagonal
182 
183   @return An error code: 0 - success, otherwise - failure
184 
185   @ref Developer
186 **/
187 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op,
188     CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
189   int ierr;
190   Ceed ceed;
191   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
192 
193   // Assemble QFunction
194   CeedQFunction qf;
195   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
196   CeedInt num_input_fields, num_output_fields;
197   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
198   CeedChk(ierr);
199   CeedVector assembled_qf;
200   CeedElemRestriction rstr;
201   ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op,  &assembled_qf,
202          &rstr, request); CeedChk(ierr);
203   CeedInt layout[3];
204   ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr);
205   ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr);
206   CeedScalar max_norm = 0;
207   ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
208 
209   // Determine active input basis
210   CeedOperatorField *op_fields;
211   CeedQFunctionField *qf_fields;
212   ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr);
213   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
214   CeedInt num_eval_mode_in = 0, num_comp, dim = 1;
215   CeedEvalMode *eval_mode_in = NULL;
216   CeedBasis basis_in = NULL;
217   CeedElemRestriction rstr_in = NULL;
218   for (CeedInt i=0; i<num_input_fields; i++) {
219     CeedVector vec;
220     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
221     if (vec == CEED_VECTOR_ACTIVE) {
222       CeedElemRestriction rstr;
223       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr);
224       ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr);
225       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
226       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
227       if (rstr_in && rstr_in != rstr)
228         // LCOV_EXCL_START
229         return CeedError(ceed, CEED_ERROR_BACKEND,
230                          "Multi-field non-composite operator diagonal assembly not supported");
231       // LCOV_EXCL_STOP
232       rstr_in = rstr;
233       CeedEvalMode eval_mode;
234       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
235       switch (eval_mode) {
236       case CEED_EVAL_NONE:
237       case CEED_EVAL_INTERP:
238         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
239         eval_mode_in[num_eval_mode_in] = eval_mode;
240         num_eval_mode_in += 1;
241         break;
242       case CEED_EVAL_GRAD:
243         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
244         for (CeedInt d=0; d<dim; d++)
245           eval_mode_in[num_eval_mode_in+d] = eval_mode;
246         num_eval_mode_in += dim;
247         break;
248       case CEED_EVAL_WEIGHT:
249       case CEED_EVAL_DIV:
250       case CEED_EVAL_CURL:
251         break; // Caught by QF Assembly
252       }
253     }
254   }
255 
256   // Determine active output basis
257   ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr);
258   ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr);
259   CeedInt num_eval_mode_out = 0;
260   CeedEvalMode *eval_mode_out = NULL;
261   CeedBasis basis_out = NULL;
262   CeedElemRestriction rstr_out = NULL;
263   for (CeedInt i=0; i<num_output_fields; i++) {
264     CeedVector vec;
265     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
266     if (vec == CEED_VECTOR_ACTIVE) {
267       CeedElemRestriction rstr;
268       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr);
269       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
270       if (rstr_out && rstr_out != rstr)
271         // LCOV_EXCL_START
272         return CeedError(ceed, CEED_ERROR_BACKEND,
273                          "Multi-field non-composite operator diagonal assembly not supported");
274       // LCOV_EXCL_STOP
275       rstr_out = rstr;
276       CeedEvalMode eval_mode;
277       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
278       switch (eval_mode) {
279       case CEED_EVAL_NONE:
280       case CEED_EVAL_INTERP:
281         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
282         eval_mode_out[num_eval_mode_out] = eval_mode;
283         num_eval_mode_out += 1;
284         break;
285       case CEED_EVAL_GRAD:
286         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
287         for (CeedInt d=0; d<dim; d++)
288           eval_mode_out[num_eval_mode_out+d] = eval_mode;
289         num_eval_mode_out += dim;
290         break;
291       case CEED_EVAL_WEIGHT:
292       case CEED_EVAL_DIV:
293       case CEED_EVAL_CURL:
294         break; // Caught by QF Assembly
295       }
296     }
297   }
298 
299   // Assemble point block diagonal restriction, if needed
300   CeedElemRestriction diag_rstr = rstr_out;
301   if (is_pointblock) {
302     ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr);
303     CeedChk(ierr);
304   }
305 
306   // Create diagonal vector
307   CeedVector elem_diag;
308   ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag);
309   CeedChk(ierr);
310 
311   // Assemble element operator diagonals
312   CeedScalar *elem_diag_array;
313   const CeedScalar *assembled_qf_array;
314   ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr);
315   ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array);
316   CeedChk(ierr);
317   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
318   CeedChk(ierr);
319   CeedInt num_elem, num_nodes, num_qpts;
320   ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr);
321   ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr);
322   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
323   // Basis matrices
324   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
325   CeedScalar *identity = NULL;
326   bool evalNone = false;
327   for (CeedInt i=0; i<num_eval_mode_in; i++)
328     evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE);
329   for (CeedInt i=0; i<num_eval_mode_out; i++)
330     evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE);
331   if (evalNone) {
332     ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr);
333     for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++)
334       identity[i*num_nodes+i] = 1.0;
335   }
336   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
337   ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr);
338   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
339   ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr);
340   // Compute the diagonal of B^T D B
341   // Each element
342   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
343   for (CeedInt e=0; e<num_elem; e++) {
344     CeedInt d_out = -1;
345     // Each basis eval mode pair
346     for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) {
347       const CeedScalar *bt = NULL;
348       if (eval_mode_out[e_out] == CEED_EVAL_GRAD)
349         d_out += 1;
350       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out,
351                                   &grad_out[d_out*num_qpts*num_nodes], &bt);
352       CeedInt d_in = -1;
353       for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) {
354         const CeedScalar *b = NULL;
355         if (eval_mode_in[e_in] == CEED_EVAL_GRAD)
356           d_in += 1;
357         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in,
358                                     &grad_in[d_in*num_qpts*num_nodes], &b);
359         // Each component
360         for (CeedInt c_out=0; c_out<num_comp; c_out++)
361           // Each qpoint/node pair
362           for (CeedInt q=0; q<num_qpts; q++)
363             if (is_pointblock) {
364               // Point Block Diagonal
365               for (CeedInt c_in=0; c_in<num_comp; c_in++) {
366                 const CeedScalar qf_value =
367                   assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)*
368                                                      num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
369                 if (fabs(qf_value) > qf_value_bound)
370                   for (CeedInt n=0; n<num_nodes; n++)
371                     elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] +=
372                       bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
373               }
374             } else {
375               // Diagonal Only
376               const CeedScalar qf_value =
377                 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)*
378                                                    num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
379               if (fabs(qf_value) > qf_value_bound)
380                 for (CeedInt n=0; n<num_nodes; n++)
381                   elem_diag_array[(e*num_comp+c_out)*num_nodes+n] +=
382                     bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
383             }
384       }
385     }
386   }
387   ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr);
388   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
389   CeedChk(ierr);
390 
391   // Assemble local operator diagonal
392   ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag,
393                                   assembled, request); CeedChk(ierr);
394 
395   // Cleanup
396   if (is_pointblock) {
397     ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr);
398   }
399   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
400   ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr);
401   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
402   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
403   ierr = CeedFree(&identity); CeedChk(ierr);
404 
405   return CEED_ERROR_SUCCESS;
406 }
407 
408 /**
409   @brief Core logic for assembling composite operator diagonal
410 
411   @param[in] op             CeedOperator to assemble point block diagonal
412   @param[in] request        Address of CeedRequest for non-blocking completion, else
413                             CEED_REQUEST_IMMEDIATE
414   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
415   @param[out] assembled     CeedVector to store assembled diagonal
416 
417   @return An error code: 0 - success, otherwise - failure
418 
419   @ref Developer
420 **/
421 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(
422   CeedOperator op, CeedRequest *request, const bool is_pointblock,
423   CeedVector assembled) {
424   int ierr;
425   CeedInt num_sub;
426   CeedOperator *suboperators;
427   ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr);
428   ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr);
429   for (CeedInt i = 0; i < num_sub; i++) {
430     ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request,
431            is_pointblock, assembled); CeedChk(ierr);
432   }
433   return CEED_ERROR_SUCCESS;
434 }
435 
436 /**
437   @brief Build nonzero pattern for non-composite operator
438 
439   Users should generally use CeedOperatorLinearAssembleSymbolic()
440 
441   @param[in] op      CeedOperator to assemble nonzero pattern
442   @param[in] offset  Offset for number of entries
443   @param[out] rows   Row number for each entry
444   @param[out] cols   Column number for each entry
445 
446   @return An error code: 0 - success, otherwise - failure
447 
448   @ref Developer
449 **/
450 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset,
451     CeedInt *rows, CeedInt *cols) {
452   int ierr;
453   Ceed ceed = op->ceed;
454   if (op->is_composite)
455     // LCOV_EXCL_START
456     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
457                      "Composite operator not supported");
458   // LCOV_EXCL_STOP
459 
460   CeedElemRestriction rstr_in;
461   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr);
462   CeedSize num_nodes;
463   CeedInt num_elem, elem_size, num_comp;
464   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
465   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
466   ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr);
467   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
468   CeedInt layout_er[3];
469   ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr);
470 
471   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
472 
473   // Determine elem_dof relation
474   CeedVector index_vec;
475   ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr);
476   CeedScalar *array;
477   ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr);
478   for (CeedInt i = 0; i < num_nodes; ++i) {
479     array[i] = i;
480   }
481   ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr);
482   CeedVector elem_dof;
483   ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof);
484   CeedChk(ierr);
485   ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr);
486   CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec,
487                            elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
488   const CeedScalar *elem_dof_a;
489   ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a);
490   CeedChk(ierr);
491   ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr);
492 
493   // Determine i, j locations for element matrices
494   CeedInt count = 0;
495   for (int e = 0; e < num_elem; ++e) {
496     for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
497       for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
498         for (int i = 0; i < elem_size; ++i) {
499           for (int j = 0; j < elem_size; ++j) {
500             const CeedInt elem_dof_index_row = (i)*layout_er[0] +
501                                                (comp_out)*layout_er[1] + e*layout_er[2];
502             const CeedInt elem_dof_index_col = (j)*layout_er[0] +
503                                                (comp_in)*layout_er[1] + e*layout_er[2];
504 
505             const CeedInt row = elem_dof_a[elem_dof_index_row];
506             const CeedInt col = elem_dof_a[elem_dof_index_col];
507 
508             rows[offset + count] = row;
509             cols[offset + count] = col;
510             count++;
511           }
512         }
513       }
514     }
515   }
516   if (count != local_num_entries)
517     // LCOV_EXCL_START
518     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
519   // LCOV_EXCL_STOP
520   ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr);
521   ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr);
522 
523   return CEED_ERROR_SUCCESS;
524 }
525 
526 /**
527   @brief Assemble nonzero entries for non-composite operator
528 
529   Users should generally use CeedOperatorLinearAssemble()
530 
531   @param[in] op       CeedOperator to assemble
532   @param[out] offset  Offest for number of entries
533   @param[out] values  Values to assemble into matrix
534 
535   @return An error code: 0 - success, otherwise - failure
536 
537   @ref Developer
538 **/
539 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
540                                       CeedVector values) {
541   int ierr;
542   Ceed ceed = op->ceed;
543   if (op->is_composite)
544     // LCOV_EXCL_START
545     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
546                      "Composite operator not supported");
547   // LCOV_EXCL_STOP
548 
549   // Assemble QFunction
550   CeedQFunction qf;
551   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
552   CeedVector assembled_qf;
553   CeedElemRestriction rstr_q;
554   ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(
555            op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
556 
557   CeedSize qf_length;
558   ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr);
559 
560   CeedInt num_input_fields, num_output_fields;
561   CeedOperatorField *input_fields;
562   CeedOperatorField *output_fields;
563   ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields,
564                                &num_output_fields, &output_fields); CeedChk(ierr);
565 
566   // Determine active input basis
567   CeedQFunctionField *qf_fields;
568   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
569   CeedInt num_eval_mode_in = 0, dim = 1;
570   CeedEvalMode *eval_mode_in = NULL;
571   CeedBasis basis_in = NULL;
572   CeedElemRestriction rstr_in = NULL;
573   for (CeedInt i=0; i<num_input_fields; i++) {
574     CeedVector vec;
575     ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr);
576     if (vec == CEED_VECTOR_ACTIVE) {
577       ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in);
578       CeedChk(ierr);
579       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
580       ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in);
581       CeedChk(ierr);
582       CeedEvalMode eval_mode;
583       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
584       CeedChk(ierr);
585       switch (eval_mode) {
586       case CEED_EVAL_NONE:
587       case CEED_EVAL_INTERP:
588         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
589         eval_mode_in[num_eval_mode_in] = eval_mode;
590         num_eval_mode_in += 1;
591         break;
592       case CEED_EVAL_GRAD:
593         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
594         for (CeedInt d=0; d<dim; d++) {
595           eval_mode_in[num_eval_mode_in+d] = eval_mode;
596         }
597         num_eval_mode_in += dim;
598         break;
599       case CEED_EVAL_WEIGHT:
600       case CEED_EVAL_DIV:
601       case CEED_EVAL_CURL:
602         break; // Caught by QF Assembly
603       }
604     }
605   }
606 
607   // Determine active output basis
608   ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr);
609   CeedInt num_eval_mode_out = 0;
610   CeedEvalMode *eval_mode_out = NULL;
611   CeedBasis basis_out = NULL;
612   CeedElemRestriction rstr_out = NULL;
613   for (CeedInt i=0; i<num_output_fields; i++) {
614     CeedVector vec;
615     ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr);
616     if (vec == CEED_VECTOR_ACTIVE) {
617       ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out);
618       CeedChk(ierr);
619       ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out);
620       CeedChk(ierr);
621       CeedEvalMode eval_mode;
622       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
623       CeedChk(ierr);
624       switch (eval_mode) {
625       case CEED_EVAL_NONE:
626       case CEED_EVAL_INTERP:
627         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
628         eval_mode_out[num_eval_mode_out] = eval_mode;
629         num_eval_mode_out += 1;
630         break;
631       case CEED_EVAL_GRAD:
632         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
633         for (CeedInt d=0; d<dim; d++) {
634           eval_mode_out[num_eval_mode_out+d] = eval_mode;
635         }
636         num_eval_mode_out += dim;
637         break;
638       case CEED_EVAL_WEIGHT:
639       case CEED_EVAL_DIV:
640       case CEED_EVAL_CURL:
641         break; // Caught by QF Assembly
642       }
643     }
644   }
645 
646   if (num_eval_mode_in == 0 || num_eval_mode_out == 0)
647     // LCOV_EXCL_START
648     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
649                      "Cannot assemble operator with out inputs/outputs");
650   // LCOV_EXCL_STOP
651 
652   CeedInt num_elem, elem_size, num_qpts, num_comp;
653   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
654   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
655   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
656   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
657 
658   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
659 
660   // loop over elements and put in data structure
661   const CeedScalar *interp_in, *grad_in;
662   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
663   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
664 
665   const CeedScalar *assembled_qf_array;
666   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
667   CeedChk(ierr);
668 
669   CeedInt layout_qf[3];
670   ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr);
671   ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr);
672 
673   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
674   CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size];
675   CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size];
676   CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in *
677                                      num_qpts]; // logically 3-tensor
678   CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in];
679   CeedScalar elem_mat[elem_size * elem_size];
680   int count = 0;
681   CeedScalar *vals;
682   ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr);
683   for (int e = 0; e < num_elem; ++e) {
684     for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
685       for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
686         for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) {
687           B_mat_in[ell] = 0.0;
688         }
689         for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) {
690           B_mat_out[ell] = 0.0;
691         }
692         // Store block-diagonal D matrix as collection of small dense blocks
693         for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) {
694           D_mat[ell] = 0.0;
695         }
696         // form element matrix itself (for each block component)
697         for (int ell = 0; ell < elem_size*elem_size; ++ell) {
698           elem_mat[ell] = 0.0;
699         }
700         for (int q = 0; q < num_qpts; ++q) {
701           for (int n = 0; n < elem_size; ++n) {
702             CeedInt d_in = -1;
703             for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) {
704               const int qq = num_eval_mode_in*q;
705               if (eval_mode_in[e_in] == CEED_EVAL_INTERP) {
706                 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n];
707               } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) {
708                 d_in += 1;
709                 B_mat_in[(qq+e_in)*elem_size + n] +=
710                   grad_in[(d_in*num_qpts+q) * elem_size + n];
711               } else {
712                 // LCOV_EXCL_START
713                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
714                 // LCOV_EXCL_STOP
715               }
716             }
717             CeedInt d_out = -1;
718             for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) {
719               const int qq = num_eval_mode_out*q;
720               if (eval_mode_out[e_out] == CEED_EVAL_INTERP) {
721                 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n];
722               } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) {
723                 d_out += 1;
724                 B_mat_out[(qq+e_out)*elem_size + n] +=
725                   grad_in[(d_out*num_qpts+q) * elem_size + n];
726               } else {
727                 // LCOV_EXCL_START
728                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
729                 // LCOV_EXCL_STOP
730               }
731             }
732           }
733           for (int ei = 0; ei < num_eval_mode_out; ++ei) {
734             for (int ej = 0; ej < num_eval_mode_in; ++ej) {
735               const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp
736                                           +comp_out;
737               const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] +
738                                 e*layout_qf[2];
739               D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index];
740             }
741           }
742         }
743         // Compute B^T*D
744         for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) {
745           BTD[ell] = 0.0;
746         }
747         for (int j = 0; j<elem_size; ++j) {
748           for (int q = 0; q<num_qpts; ++q) {
749             int qq = num_eval_mode_out*q;
750             for (int ei = 0; ei < num_eval_mode_in; ++ei) {
751               for (int ej = 0; ej < num_eval_mode_out; ++ej) {
752                 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] +=
753                   B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q];
754               }
755             }
756           }
757         }
758 
759         ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size,
760                                   elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr);
761 
762         // put element matrix in coordinate data structure
763         for (int i = 0; i < elem_size; ++i) {
764           for (int j = 0; j < elem_size; ++j) {
765             vals[offset + count] = elem_mat[i*elem_size + j];
766             count++;
767           }
768         }
769       }
770     }
771   }
772   if (count != local_num_entries)
773     // LCOV_EXCL_START
774     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
775   // LCOV_EXCL_STOP
776   ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr);
777 
778   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
779   CeedChk(ierr);
780   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
781   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
782   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
783 
784   return CEED_ERROR_SUCCESS;
785 }
786 
787 /**
788   @brief Count number of entries for assembled CeedOperator
789 
790   @param[in] op            CeedOperator to assemble
791   @param[out] num_entries  Number of entries in assembled representation
792 
793   @return An error code: 0 - success, otherwise - failure
794 
795   @ref Utility
796 **/
797 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op,
798     CeedInt *num_entries) {
799   int ierr;
800   CeedElemRestriction rstr;
801   CeedInt num_elem, elem_size, num_comp;
802 
803   if (op->is_composite)
804     // LCOV_EXCL_START
805     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
806                      "Composite operator not supported");
807   // LCOV_EXCL_STOP
808   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr);
809   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
810   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
811   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
812   *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
813 
814   return CEED_ERROR_SUCCESS;
815 }
816 
817 /**
818   @brief Common code for creating a multigrid coarse operator and level
819            transfer operators for a CeedOperator
820 
821   @param[in] op_fine       Fine grid operator
822   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
823   @param[in] rstr_coarse   Coarse grid restriction
824   @param[in] basis_coarse  Coarse grid active vector basis
825   @param[in] basis_c_to_f  Basis for coarse to fine interpolation
826   @param[out] op_coarse    Coarse grid operator
827   @param[out] op_prolong   Coarse to fine operator
828   @param[out] op_restrict  Fine to coarse operator
829 
830   @return An error code: 0 - success, otherwise - failure
831 
832   @ref Developer
833 **/
834 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
835     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
836     CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
837     CeedOperator *op_restrict) {
838   int ierr;
839   Ceed ceed;
840   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
841 
842   // Check for composite operator
843   bool is_composite;
844   ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr);
845   if (is_composite)
846     // LCOV_EXCL_START
847     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
848                      "Automatic multigrid setup for composite operators not supported");
849   // LCOV_EXCL_STOP
850 
851   // Coarse Grid
852   ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT,
853                             op_coarse); CeedChk(ierr);
854   CeedElemRestriction rstr_fine = NULL;
855   // -- Clone input fields
856   for (int i = 0; i < op_fine->qf->num_input_fields; i++) {
857     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
858       rstr_fine = op_fine->input_fields[i]->elem_restr;
859       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
860                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
861       CeedChk(ierr);
862     } else {
863       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
864                                   op_fine->input_fields[i]->elem_restr,
865                                   op_fine->input_fields[i]->basis,
866                                   op_fine->input_fields[i]->vec); CeedChk(ierr);
867     }
868   }
869   // -- Clone output fields
870   for (int i = 0; i < op_fine->qf->num_output_fields; i++) {
871     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
872       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
873                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
874       CeedChk(ierr);
875     } else {
876       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
877                                   op_fine->output_fields[i]->elem_restr,
878                                   op_fine->output_fields[i]->basis,
879                                   op_fine->output_fields[i]->vec); CeedChk(ierr);
880     }
881   }
882 
883   // Multiplicity vector
884   CeedVector mult_vec, mult_e_vec;
885   ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec);
886   CeedChk(ierr);
887   ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr);
888   ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine,
889                                   mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
890   ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr);
891   ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec,
892                                   CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
893   ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr);
894   ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr);
895 
896   // Restriction
897   CeedInt num_comp;
898   ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr);
899   CeedQFunction qf_restrict;
900   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict);
901   CeedChk(ierr);
902   CeedInt *num_comp_r_data;
903   ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr);
904   num_comp_r_data[0] = num_comp;
905   CeedQFunctionContext ctx_r;
906   ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr);
907   ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER,
908                                      sizeof(*num_comp_r_data), num_comp_r_data);
909   CeedChk(ierr);
910   ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr);
911   ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr);
912   ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE);
913   CeedChk(ierr);
914   ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE);
915   CeedChk(ierr);
916   ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp,
917                                 CEED_EVAL_INTERP); CeedChk(ierr);
918 
919   ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE,
920                             CEED_QFUNCTION_NONE, op_restrict);
921   CeedChk(ierr);
922   ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine,
923                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
924   CeedChk(ierr);
925   ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine,
926                               CEED_BASIS_COLLOCATED, mult_vec);
927   CeedChk(ierr);
928   ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f,
929                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
930 
931   // Prolongation
932   CeedQFunction qf_prolong;
933   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong);
934   CeedChk(ierr);
935   CeedInt *num_comp_p_data;
936   ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr);
937   num_comp_p_data[0] = num_comp;
938   CeedQFunctionContext ctx_p;
939   ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr);
940   ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER,
941                                      sizeof(*num_comp_p_data), num_comp_p_data);
942   CeedChk(ierr);
943   ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr);
944   ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr);
945   ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP);
946   CeedChk(ierr);
947   ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE);
948   CeedChk(ierr);
949   ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE);
950   CeedChk(ierr);
951 
952   ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE,
953                             CEED_QFUNCTION_NONE, op_prolong);
954   CeedChk(ierr);
955   ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f,
956                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
957   ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine,
958                               CEED_BASIS_COLLOCATED, mult_vec);
959   CeedChk(ierr);
960   ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine,
961                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
962   CeedChk(ierr);
963 
964   // Cleanup
965   ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr);
966   ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr);
967   ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr);
968   ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr);
969   return CEED_ERROR_SUCCESS;
970 }
971 
972 /**
973   @brief Build 1D mass matrix and Laplacian with perturbation
974 
975   @param[in] interp_1d    Interpolation matrix in one dimension
976   @param[in] grad_1d      Gradient matrix in one dimension
977   @param[in] q_weight_1d  Quadrature weights in one dimension
978   @param[in] P_1d         Number of basis nodes in one dimension
979   @param[in] Q_1d         Number of quadrature points in one dimension
980   @param[in] dim          Dimension of basis
981   @param[out] mass        Assembled mass matrix in one dimension
982   @param[out] laplace     Assembled perturbed Laplacian in one dimension
983 
984   @return An error code: 0 - success, otherwise - failure
985 
986   @ref Developer
987 **/
988 CeedPragmaOptimizeOff
989 static int CeedBuildMassLaplace(const CeedScalar *interp_1d,
990                                 const CeedScalar *grad_1d,
991                                 const CeedScalar *q_weight_1d, CeedInt P_1d,
992                                 CeedInt Q_1d, CeedInt dim,
993                                 CeedScalar *mass, CeedScalar *laplace) {
994 
995   for (CeedInt i=0; i<P_1d; i++)
996     for (CeedInt j=0; j<P_1d; j++) {
997       CeedScalar sum = 0.0;
998       for (CeedInt k=0; k<Q_1d; k++)
999         sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j];
1000       mass[i+j*P_1d] = sum;
1001     }
1002   // -- Laplacian
1003   for (CeedInt i=0; i<P_1d; i++)
1004     for (CeedInt j=0; j<P_1d; j++) {
1005       CeedScalar sum = 0.0;
1006       for (CeedInt k=0; k<Q_1d; k++)
1007         sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j];
1008       laplace[i+j*P_1d] = sum;
1009     }
1010   CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4;
1011   for (CeedInt i=0; i<P_1d; i++)
1012     laplace[i+P_1d*i] += perturbation;
1013   return CEED_ERROR_SUCCESS;
1014 }
1015 CeedPragmaOptimizeOn
1016 
1017 /// @}
1018 
1019 /// ----------------------------------------------------------------------------
1020 /// CeedOperator Public API
1021 /// ----------------------------------------------------------------------------
1022 /// @addtogroup CeedOperatorUser
1023 /// @{
1024 
1025 /**
1026   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1027 
1028   This returns a CeedVector containing a matrix at each quadrature point
1029     providing the action of the CeedQFunction associated with the CeedOperator.
1030     The vector 'assembled' is of shape
1031       [num_elements, num_input_fields, num_output_fields, num_quad_points]
1032     and contains column-major matrices representing the action of the
1033     CeedQFunction for a corresponding quadrature point on an element. Inputs and
1034     outputs are in the order provided by the user when adding CeedOperator fields.
1035     For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and
1036     'v', provided in that order, would result in an assembled QFunction that
1037     consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting
1038     on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1039 
1040   Note: Calling this function asserts that setup is complete
1041           and sets the CeedOperator as immutable.
1042 
1043   @param op              CeedOperator to assemble CeedQFunction
1044   @param[out] assembled  CeedVector to store assembled CeedQFunction at
1045                            quadrature points
1046   @param[out] rstr       CeedElemRestriction for CeedVector containing assembled
1047                            CeedQFunction
1048   @param request         Address of CeedRequest for non-blocking completion, else
1049                            @ref CEED_REQUEST_IMMEDIATE
1050 
1051   @return An error code: 0 - success, otherwise - failure
1052 
1053   @ref User
1054 **/
1055 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled,
1056                                         CeedElemRestriction *rstr,
1057                                         CeedRequest *request) {
1058   int ierr;
1059   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1060 
1061   // Backend version
1062   if (op->LinearAssembleQFunction) {
1063     ierr = op->LinearAssembleQFunction(op, assembled, rstr, request);
1064     CeedChk(ierr);
1065   } else {
1066     // Fallback to reference Ceed
1067     if (!op->op_fallback) {
1068       ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1069     }
1070     // Assemble
1071     ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled,
1072            rstr, request); CeedChk(ierr);
1073   }
1074   return CEED_ERROR_SUCCESS;
1075 }
1076 
1077 /**
1078   @brief Assemble CeedQFunction and store result internall. Return copied
1079            references of stored data to the caller. Caller is responsible for
1080            ownership and destruction of the copied references. See also
1081            @ref CeedOperatorLinearAssembleQFunction
1082 
1083   @param op              CeedOperator to assemble CeedQFunction
1084   @param assembled       CeedVector to store assembled CeedQFunction at
1085                            quadrature points
1086   @param rstr            CeedElemRestriction for CeedVector containing assembled
1087                            CeedQFunction
1088   @param request         Address of CeedRequest for non-blocking completion, else
1089                            @ref CEED_REQUEST_IMMEDIATE
1090 
1091   @return An error code: 0 - success, otherwise - failure
1092 
1093   @ref User
1094 **/
1095 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op,
1096     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1097   int ierr;
1098   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1099 
1100   // Backend version
1101   if (op->LinearAssembleQFunctionUpdate) {
1102     if (op->has_qf_assembled) {
1103       ierr = op->LinearAssembleQFunctionUpdate(op, op->qf_assembled,
1104              op->qf_assembled_rstr, request);
1105     } else {
1106       ierr = op->LinearAssembleQFunction(op, &op->qf_assembled,
1107                                          &op->qf_assembled_rstr, request);
1108     }
1109     CeedChk(ierr);
1110     op->has_qf_assembled = true;
1111     // Copy reference to internally held copy
1112     *assembled = NULL;
1113     *rstr = NULL;
1114     ierr = CeedVectorReferenceCopy(op->qf_assembled, assembled); CeedChk(ierr);
1115     ierr = CeedElemRestrictionReferenceCopy(op->qf_assembled_rstr, rstr);
1116   } else {
1117     // Fallback to reference Ceed
1118     if (!op->op_fallback) {
1119       ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1120     }
1121     // Assemble
1122     ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback,
1123            assembled, rstr, request); CeedChk(ierr);
1124   }
1125   CeedChk(ierr);
1126 
1127   return CEED_ERROR_SUCCESS;
1128 }
1129 
1130 /**
1131   @brief Assemble the diagonal of a square linear CeedOperator
1132 
1133   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1134 
1135   Note: Currently only non-composite CeedOperators with a single field and
1136           composite CeedOperators with single field sub-operators are supported.
1137 
1138   Note: Calling this function asserts that setup is complete
1139           and sets the CeedOperator as immutable.
1140 
1141   @param op              CeedOperator to assemble CeedQFunction
1142   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1143   @param request         Address of CeedRequest for non-blocking completion, else
1144                            @ref CEED_REQUEST_IMMEDIATE
1145 
1146   @return An error code: 0 - success, otherwise - failure
1147 
1148   @ref User
1149 **/
1150 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled,
1151                                        CeedRequest *request) {
1152   int ierr;
1153   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1154 
1155   // Use backend version, if available
1156   if (op->LinearAssembleDiagonal) {
1157     ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr);
1158     return CEED_ERROR_SUCCESS;
1159   } else if (op->LinearAssembleAddDiagonal) {
1160     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1161     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1162     return CEED_ERROR_SUCCESS;
1163   } else {
1164     // Check for valid fallback resource
1165     const char *resource, *fallback_resource;
1166     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1167     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1168     CeedChk(ierr);
1169     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1170       // Fallback to reference Ceed
1171       if (!op->op_fallback) {
1172         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1173       }
1174       // Assemble
1175       ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request);
1176       CeedChk(ierr);
1177       return CEED_ERROR_SUCCESS;
1178     }
1179   }
1180 
1181   // Default interface implementation
1182   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1183   ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request);
1184   CeedChk(ierr);
1185   return CEED_ERROR_SUCCESS;
1186 }
1187 
1188 /**
1189   @brief Assemble the diagonal of a square linear CeedOperator
1190 
1191   This sums into a CeedVector the diagonal of a linear CeedOperator.
1192 
1193   Note: Currently only non-composite CeedOperators with a single field and
1194           composite CeedOperators with single field sub-operators are supported.
1195 
1196   Note: Calling this function asserts that setup is complete
1197           and sets the CeedOperator as immutable.
1198 
1199   @param op              CeedOperator to assemble CeedQFunction
1200   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1201   @param request         Address of CeedRequest for non-blocking completion, else
1202                            @ref CEED_REQUEST_IMMEDIATE
1203 
1204   @return An error code: 0 - success, otherwise - failure
1205 
1206   @ref User
1207 **/
1208 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled,
1209     CeedRequest *request) {
1210   int ierr;
1211   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1212 
1213   // Use backend version, if available
1214   if (op->LinearAssembleAddDiagonal) {
1215     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1216     return CEED_ERROR_SUCCESS;
1217   } else {
1218     // Check for valid fallback resource
1219     const char *resource, *fallback_resource;
1220     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1221     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1222     CeedChk(ierr);
1223     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1224       // Fallback to reference Ceed
1225       if (!op->op_fallback) {
1226         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1227       }
1228       // Assemble
1229       ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled,
1230              request); CeedChk(ierr);
1231       return CEED_ERROR_SUCCESS;
1232     }
1233   }
1234 
1235   // Default interface implementation
1236   bool is_composite;
1237   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1238   if (is_composite) {
1239     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1240            false, assembled); CeedChk(ierr);
1241     return CEED_ERROR_SUCCESS;
1242   } else {
1243     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled);
1244     CeedChk(ierr);
1245     return CEED_ERROR_SUCCESS;
1246   }
1247 }
1248 
1249 /**
1250   @brief Assemble the point block diagonal of a square linear CeedOperator
1251 
1252   This overwrites a CeedVector with the point block diagonal of a linear
1253     CeedOperator.
1254 
1255   Note: Currently only non-composite CeedOperators with a single field and
1256           composite CeedOperators with single field sub-operators are supported.
1257 
1258   Note: Calling this function asserts that setup is complete
1259           and sets the CeedOperator as immutable.
1260 
1261   @param op              CeedOperator to assemble CeedQFunction
1262   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1263                            diagonal, provided in row-major form with an
1264                            @a num_comp * @a num_comp block at each node. The dimensions
1265                            of this vector are derived from the active vector
1266                            for the CeedOperator. The array has shape
1267                            [nodes, component out, component in].
1268   @param request         Address of CeedRequest for non-blocking completion, else
1269                            CEED_REQUEST_IMMEDIATE
1270 
1271   @return An error code: 0 - success, otherwise - failure
1272 
1273   @ref User
1274 **/
1275 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op,
1276     CeedVector assembled, CeedRequest *request) {
1277   int ierr;
1278   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1279 
1280   // Use backend version, if available
1281   if (op->LinearAssemblePointBlockDiagonal) {
1282     ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request);
1283     CeedChk(ierr);
1284     return CEED_ERROR_SUCCESS;
1285   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1286     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1287     ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled,
1288            request); CeedChk(ierr);
1289     return CEED_ERROR_SUCCESS;
1290   } else {
1291     // Check for valid fallback resource
1292     const char *resource, *fallback_resource;
1293     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1294     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1295     CeedChk(ierr);
1296     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1297       // Fallback to reference Ceed
1298       if (!op->op_fallback) {
1299         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1300       }
1301       // Assemble
1302       ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback,
1303              assembled, request); CeedChk(ierr);
1304       return CEED_ERROR_SUCCESS;
1305     }
1306   }
1307 
1308   // Default interface implementation
1309   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1310   ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request);
1311   CeedChk(ierr);
1312   return CEED_ERROR_SUCCESS;
1313 }
1314 
1315 /**
1316   @brief Assemble the point block diagonal of a square linear CeedOperator
1317 
1318   This sums into a CeedVector with the point block diagonal of a linear
1319     CeedOperator.
1320 
1321   Note: Currently only non-composite CeedOperators with a single field and
1322           composite CeedOperators with single field sub-operators are supported.
1323 
1324   Note: Calling this function asserts that setup is complete
1325           and sets the CeedOperator as immutable.
1326 
1327   @param op              CeedOperator to assemble CeedQFunction
1328   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1329                            diagonal, provided in row-major form with an
1330                            @a num_comp * @a num_comp block at each node. The dimensions
1331                            of this vector are derived from the active vector
1332                            for the CeedOperator. The array has shape
1333                            [nodes, component out, component in].
1334   @param request         Address of CeedRequest for non-blocking completion, else
1335                            CEED_REQUEST_IMMEDIATE
1336 
1337   @return An error code: 0 - success, otherwise - failure
1338 
1339   @ref User
1340 **/
1341 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op,
1342     CeedVector assembled, CeedRequest *request) {
1343   int ierr;
1344   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1345 
1346   // Use backend version, if available
1347   if (op->LinearAssembleAddPointBlockDiagonal) {
1348     ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request);
1349     CeedChk(ierr);
1350     return CEED_ERROR_SUCCESS;
1351   } else {
1352     // Check for valid fallback resource
1353     const char *resource, *fallback_resource;
1354     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1355     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1356     CeedChk(ierr);
1357     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1358       // Fallback to reference Ceed
1359       if (!op->op_fallback) {
1360         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1361       }
1362       // Assemble
1363       ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback,
1364              assembled, request); CeedChk(ierr);
1365       return CEED_ERROR_SUCCESS;
1366     }
1367   }
1368 
1369   // Default interface implemenation
1370   bool is_composite;
1371   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1372   if (is_composite) {
1373     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1374            true, assembled); CeedChk(ierr);
1375     return CEED_ERROR_SUCCESS;
1376   } else {
1377     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled);
1378     CeedChk(ierr);
1379     return CEED_ERROR_SUCCESS;
1380   }
1381 }
1382 
1383 /**
1384    @brief Fully assemble the nonzero pattern of a linear operator.
1385 
1386    Expected to be used in conjunction with CeedOperatorLinearAssemble()
1387 
1388    The assembly routines use coordinate format, with num_entries tuples of the
1389    form (i, j, value) which indicate that value should be added to the matrix
1390    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1391    This function returns the number of entries and their (i, j) locations,
1392    while CeedOperatorLinearAssemble() provides the values in the same
1393    ordering.
1394 
1395    This will generally be slow unless your operator is low-order.
1396 
1397   Note: Calling this function asserts that setup is complete
1398           and sets the CeedOperator as immutable.
1399 
1400    @param[in]  op           CeedOperator to assemble
1401    @param[out] num_entries  Number of entries in coordinate nonzero pattern
1402    @param[out] rows         Row number for each entry
1403    @param[out] cols         Column number for each entry
1404 
1405    @ref User
1406 **/
1407 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedSize *num_entries,
1408                                        CeedInt **rows, CeedInt **cols) {
1409   int ierr;
1410   CeedInt num_suboperators, single_entries;
1411   CeedOperator *sub_operators;
1412   bool is_composite;
1413   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1414 
1415   // Use backend version, if available
1416   if (op->LinearAssembleSymbolic) {
1417     ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr);
1418     return CEED_ERROR_SUCCESS;
1419   } else {
1420     // Check for valid fallback resource
1421     const char *resource, *fallback_resource;
1422     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1423     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1424     CeedChk(ierr);
1425     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1426       // Fallback to reference Ceed
1427       if (!op->op_fallback) {
1428         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1429       }
1430       // Assemble
1431       ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows,
1432              cols); CeedChk(ierr);
1433       return CEED_ERROR_SUCCESS;
1434     }
1435   }
1436 
1437   // Default interface implementation
1438 
1439   // count entries and allocate rows, cols arrays
1440   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1441   *num_entries = 0;
1442   if (is_composite) {
1443     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1444     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1445     for (int k = 0; k < num_suboperators; ++k) {
1446       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1447              &single_entries); CeedChk(ierr);
1448       *num_entries += single_entries;
1449     }
1450   } else {
1451     ierr = CeedSingleOperatorAssemblyCountEntries(op,
1452            &single_entries); CeedChk(ierr);
1453     *num_entries += single_entries;
1454   }
1455   ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr);
1456   ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr);
1457 
1458   // assemble nonzero locations
1459   CeedInt offset = 0;
1460   if (is_composite) {
1461     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1462     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1463     for (int k = 0; k < num_suboperators; ++k) {
1464       ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows,
1465              *cols); CeedChk(ierr);
1466       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1467              &single_entries);
1468       CeedChk(ierr);
1469       offset += single_entries;
1470     }
1471   } else {
1472     ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols);
1473     CeedChk(ierr);
1474   }
1475 
1476   return CEED_ERROR_SUCCESS;
1477 }
1478 
1479 /**
1480    @brief Fully assemble the nonzero entries of a linear operator.
1481 
1482    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic()
1483 
1484    The assembly routines use coordinate format, with num_entries tuples of the
1485    form (i, j, value) which indicate that value should be added to the matrix
1486    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1487    This function returns the values of the nonzero entries to be added, their
1488    (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1489 
1490    This will generally be slow unless your operator is low-order.
1491 
1492   Note: Calling this function asserts that setup is complete
1493           and sets the CeedOperator as immutable.
1494 
1495    @param[in]  op      CeedOperator to assemble
1496    @param[out] values  Values to assemble into matrix
1497 
1498    @ref User
1499 **/
1500 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1501   int ierr;
1502   CeedInt num_suboperators, single_entries = 0;
1503   CeedOperator *sub_operators;
1504   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1505 
1506   // Use backend version, if available
1507   if (op->LinearAssemble) {
1508     ierr = op->LinearAssemble(op, values); CeedChk(ierr);
1509     return CEED_ERROR_SUCCESS;
1510   } else {
1511     // Check for valid fallback resource
1512     const char *resource, *fallback_resource;
1513     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1514     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1515     CeedChk(ierr);
1516     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1517       // Fallback to reference Ceed
1518       if (!op->op_fallback) {
1519         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1520       }
1521       // Assemble
1522       ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr);
1523       return CEED_ERROR_SUCCESS;
1524     }
1525   }
1526 
1527   // Default interface implementation
1528   bool is_composite;
1529   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1530 
1531   CeedInt offset = 0;
1532   if (is_composite) {
1533     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1534     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1535     for (int k = 0; k < num_suboperators; ++k) {
1536       ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values);
1537       CeedChk(ierr);
1538       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1539              &single_entries);
1540       CeedChk(ierr);
1541       offset += single_entries;
1542     }
1543   } else {
1544     ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr);
1545   }
1546 
1547   return CEED_ERROR_SUCCESS;
1548 }
1549 
1550 /**
1551   @brief Create a multigrid coarse operator and level transfer operators
1552            for a CeedOperator, creating the prolongation basis from the
1553            fine and coarse grid interpolation
1554 
1555   Note: Calling this function asserts that setup is complete
1556           and sets the CeedOperator as immutable.
1557 
1558   @param[in] op_fine       Fine grid operator
1559   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
1560   @param[in] rstr_coarse   Coarse grid restriction
1561   @param[in] basis_coarse  Coarse grid active vector basis
1562   @param[out] op_coarse    Coarse grid operator
1563   @param[out] op_prolong   Coarse to fine operator
1564   @param[out] op_restrict  Fine to coarse operator
1565 
1566   @return An error code: 0 - success, otherwise - failure
1567 
1568   @ref User
1569 **/
1570 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine,
1571                                      CeedVector p_mult_fine,
1572                                      CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1573                                      CeedOperator *op_coarse, CeedOperator *op_prolong,
1574                                      CeedOperator *op_restrict) {
1575   int ierr;
1576   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
1577   Ceed ceed;
1578   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1579 
1580   // Check for compatible quadrature spaces
1581   CeedBasis basis_fine;
1582   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1583   CeedInt Q_f, Q_c;
1584   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1585   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1586   if (Q_f != Q_c)
1587     // LCOV_EXCL_START
1588     return CeedError(ceed, CEED_ERROR_DIMENSION,
1589                      "Bases must have compatible quadrature spaces");
1590   // LCOV_EXCL_STOP
1591 
1592   // Coarse to fine basis
1593   CeedInt P_f, P_c, Q = Q_f;
1594   bool is_tensor_f, is_tensor_c;
1595   ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr);
1596   ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr);
1597   CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau;
1598   if (is_tensor_f && is_tensor_c) {
1599     ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr);
1600     ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr);
1601     ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr);
1602   } else if (!is_tensor_f && !is_tensor_c) {
1603     ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr);
1604     ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr);
1605   } else {
1606     // LCOV_EXCL_START
1607     return CeedError(ceed, CEED_ERROR_MINOR,
1608                      "Bases must both be tensor or non-tensor");
1609     // LCOV_EXCL_STOP
1610   }
1611 
1612   ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr);
1613   ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr);
1614   ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr);
1615   ierr = CeedMalloc(Q, &tau); CeedChk(ierr);
1616   if (is_tensor_f) {
1617     memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]);
1618     memcpy(interp_c, basis_coarse->interp_1d,
1619            Q*P_c*sizeof basis_coarse->interp_1d[0]);
1620   } else {
1621     memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]);
1622     memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]);
1623   }
1624 
1625   // -- QR Factorization, interp_f = Q R
1626   ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr);
1627 
1628   // -- Apply Qtranspose, interp_c = Qtranspose interp_c
1629   ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE,
1630                                Q, P_c, P_f, P_c, 1); CeedChk(ierr);
1631 
1632   // -- Apply Rinv, interp_c_to_f = Rinv interp_c
1633   for (CeedInt j=0; j<P_c; j++) { // Column j
1634     interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1];
1635     for (CeedInt i=P_f-2; i>=0; i--) { // Row i
1636       interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i];
1637       for (CeedInt k=i+1; k<P_f; k++)
1638         interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k];
1639       interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i];
1640     }
1641   }
1642   ierr = CeedFree(&tau); CeedChk(ierr);
1643   ierr = CeedFree(&interp_c); CeedChk(ierr);
1644   ierr = CeedFree(&interp_f); CeedChk(ierr);
1645 
1646   // Complete with interp_c_to_f versions of code
1647   if (is_tensor_f) {
1648     ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine,
1649            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
1650     CeedChk(ierr);
1651   } else {
1652     ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine,
1653            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
1654     CeedChk(ierr);
1655   }
1656 
1657   // Cleanup
1658   ierr = CeedFree(&interp_c_to_f); CeedChk(ierr);
1659   return CEED_ERROR_SUCCESS;
1660 }
1661 
1662 /**
1663   @brief Create a multigrid coarse operator and level transfer operators
1664            for a CeedOperator with a tensor basis for the active basis
1665 
1666   Note: Calling this function asserts that setup is complete
1667           and sets the CeedOperator as immutable.
1668 
1669   @param[in] op_fine        Fine grid operator
1670   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1671   @param[in] rstr_coarse    Coarse grid restriction
1672   @param[in] basis_coarse   Coarse grid active vector basis
1673   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1674   @param[out] op_coarse     Coarse grid operator
1675   @param[out] op_prolong    Coarse to fine operator
1676   @param[out] op_restrict   Fine to coarse operator
1677 
1678   @return An error code: 0 - success, otherwise - failure
1679 
1680   @ref User
1681 **/
1682 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine,
1683     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1684     const CeedScalar *interp_c_to_f, CeedOperator *op_coarse,
1685     CeedOperator *op_prolong, CeedOperator *op_restrict) {
1686   int ierr;
1687   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
1688   Ceed ceed;
1689   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1690 
1691   // Check for compatible quadrature spaces
1692   CeedBasis basis_fine;
1693   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1694   CeedInt Q_f, Q_c;
1695   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1696   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1697   if (Q_f != Q_c)
1698     // LCOV_EXCL_START
1699     return CeedError(ceed, CEED_ERROR_DIMENSION,
1700                      "Bases must have compatible quadrature spaces");
1701   // LCOV_EXCL_STOP
1702 
1703   // Coarse to fine basis
1704   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
1705   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
1706   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
1707   ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr);
1708   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
1709   CeedChk(ierr);
1710   P_1d_c = dim == 1 ? num_nodes_c :
1711            dim == 2 ? sqrt(num_nodes_c) :
1712            cbrt(num_nodes_c);
1713   CeedScalar *q_ref, *q_weight, *grad;
1714   ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr);
1715   ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr);
1716   ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr);
1717   CeedBasis basis_c_to_f;
1718   ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f,
1719                                  interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
1720   CeedChk(ierr);
1721   ierr = CeedFree(&q_ref); CeedChk(ierr);
1722   ierr = CeedFree(&q_weight); CeedChk(ierr);
1723   ierr = CeedFree(&grad); CeedChk(ierr);
1724 
1725   // Core code
1726   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
1727                                           basis_coarse, basis_c_to_f, op_coarse,
1728                                           op_prolong, op_restrict);
1729   CeedChk(ierr);
1730   return CEED_ERROR_SUCCESS;
1731 }
1732 
1733 /**
1734   @brief Create a multigrid coarse operator and level transfer operators
1735            for a CeedOperator with a non-tensor basis for the active vector
1736 
1737   Note: Calling this function asserts that setup is complete
1738           and sets the CeedOperator as immutable.
1739 
1740   @param[in] op_fine        Fine grid operator
1741   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1742   @param[in] rstr_coarse    Coarse grid restriction
1743   @param[in] basis_coarse   Coarse grid active vector basis
1744   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1745   @param[out] op_coarse     Coarse grid operator
1746   @param[out] op_prolong    Coarse to fine operator
1747   @param[out] op_restrict   Fine to coarse operator
1748 
1749   @return An error code: 0 - success, otherwise - failure
1750 
1751   @ref User
1752 **/
1753 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine,
1754                                        CeedVector p_mult_fine,
1755                                        CeedElemRestriction rstr_coarse,
1756                                        CeedBasis basis_coarse,
1757                                        const CeedScalar *interp_c_to_f,
1758                                        CeedOperator *op_coarse,
1759                                        CeedOperator *op_prolong,
1760                                        CeedOperator *op_restrict) {
1761   int ierr;
1762   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
1763   Ceed ceed;
1764   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1765 
1766   // Check for compatible quadrature spaces
1767   CeedBasis basis_fine;
1768   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1769   CeedInt Q_f, Q_c;
1770   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1771   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1772   if (Q_f != Q_c)
1773     // LCOV_EXCL_START
1774     return CeedError(ceed, CEED_ERROR_DIMENSION,
1775                      "Bases must have compatible quadrature spaces");
1776   // LCOV_EXCL_STOP
1777 
1778   // Coarse to fine basis
1779   CeedElemTopology topo;
1780   ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr);
1781   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
1782   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
1783   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
1784   ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr);
1785   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
1786   CeedChk(ierr);
1787   CeedScalar *q_ref, *q_weight, *grad;
1788   ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr);
1789   ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr);
1790   ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr);
1791   CeedBasis basis_c_to_f;
1792   ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f,
1793                            interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
1794   CeedChk(ierr);
1795   ierr = CeedFree(&q_ref); CeedChk(ierr);
1796   ierr = CeedFree(&q_weight); CeedChk(ierr);
1797   ierr = CeedFree(&grad); CeedChk(ierr);
1798 
1799   // Core code
1800   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
1801                                           basis_coarse, basis_c_to_f, op_coarse,
1802                                           op_prolong, op_restrict);
1803   CeedChk(ierr);
1804   return CEED_ERROR_SUCCESS;
1805 }
1806 
1807 /**
1808   @brief Build a FDM based approximate inverse for each element for a
1809            CeedOperator
1810 
1811   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization
1812     Method based approximate inverse. This function obtains the simultaneous
1813     diagonalization for the 1D mass and Laplacian operators,
1814       M = V^T V, K = V^T S V.
1815     The assembled QFunction is used to modify the eigenvalues from simultaneous
1816     diagonalization and obtain an approximate inverse of the form
1817       V^T S^hat V. The CeedOperator must be linear and non-composite. The
1818     associated CeedQFunction must therefore also be linear.
1819 
1820   Note: Calling this function asserts that setup is complete
1821           and sets the CeedOperator as immutable.
1822 
1823   @param op            CeedOperator to create element inverses
1824   @param[out] fdm_inv  CeedOperator to apply the action of a FDM based inverse
1825                          for each element
1826   @param request       Address of CeedRequest for non-blocking completion, else
1827                          @ref CEED_REQUEST_IMMEDIATE
1828 
1829   @return An error code: 0 - success, otherwise - failure
1830 
1831   @ref Backend
1832 **/
1833 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv,
1834                                         CeedRequest *request) {
1835   int ierr;
1836   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1837 
1838   // Use backend version, if available
1839   if (op->CreateFDMElementInverse) {
1840     ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr);
1841     return CEED_ERROR_SUCCESS;
1842   } else {
1843     // Check for valid fallback resource
1844     const char *resource, *fallback_resource;
1845     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1846     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1847     CeedChk(ierr);
1848     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1849       // Fallback to reference Ceed
1850       if (!op->op_fallback) {
1851         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1852       }
1853       // Assemble
1854       ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request);
1855       CeedChk(ierr);
1856       return CEED_ERROR_SUCCESS;
1857     }
1858   }
1859 
1860   // Interface implementation
1861   Ceed ceed, ceed_parent;
1862   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
1863   ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr);
1864   ceed_parent = ceed_parent ? ceed_parent : ceed;
1865   CeedQFunction qf;
1866   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
1867 
1868   // Determine active input basis
1869   bool interp = false, grad = false;
1870   CeedBasis basis = NULL;
1871   CeedElemRestriction rstr = NULL;
1872   CeedOperatorField *op_fields;
1873   CeedQFunctionField *qf_fields;
1874   CeedInt num_input_fields;
1875   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL);
1876   CeedChk(ierr);
1877   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
1878   for (CeedInt i=0; i<num_input_fields; i++) {
1879     CeedVector vec;
1880     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
1881     if (vec == CEED_VECTOR_ACTIVE) {
1882       CeedEvalMode eval_mode;
1883       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
1884       interp = interp || eval_mode == CEED_EVAL_INTERP;
1885       grad = grad || eval_mode == CEED_EVAL_GRAD;
1886       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr);
1887       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
1888     }
1889   }
1890   if (!basis)
1891     // LCOV_EXCL_START
1892     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
1893   // LCOV_EXCL_STOP
1894   CeedSize l_size = 1;
1895   CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1;
1896   ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr);
1897   ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr);
1898   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr);
1899   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
1900   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
1901   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
1902   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
1903   ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr);
1904 
1905   // Build and diagonalize 1D Mass and Laplacian
1906   bool tensor_basis;
1907   ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr);
1908   if (!tensor_basis)
1909     // LCOV_EXCL_START
1910     return CeedError(ceed, CEED_ERROR_BACKEND,
1911                      "FDMElementInverse only supported for tensor "
1912                      "bases");
1913   // LCOV_EXCL_STOP
1914   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
1915   ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr);
1916   ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr);
1917   ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr);
1918   ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr);
1919   ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr);
1920   // -- Build matrices
1921   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
1922   ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr);
1923   ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr);
1924   ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr);
1925   ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim,
1926                               mass, laplace); CeedChk(ierr);
1927 
1928   // -- Diagonalize
1929   ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d);
1930   CeedChk(ierr);
1931   ierr = CeedFree(&mass); CeedChk(ierr);
1932   ierr = CeedFree(&laplace); CeedChk(ierr);
1933   for (CeedInt i=0; i<P_1d; i++)
1934     for (CeedInt j=0; j<P_1d; j++)
1935       fdm_interp[i+j*P_1d] = x[j+i*P_1d];
1936   ierr = CeedFree(&x); CeedChk(ierr);
1937 
1938   // Assemble QFunction
1939   CeedVector assembled;
1940   CeedElemRestriction rstr_qf;
1941   ierr =  CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled,
1942           &rstr_qf, request); CeedChk(ierr);
1943   CeedInt layout[3];
1944   ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr);
1945   ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr);
1946   CeedScalar max_norm = 0;
1947   ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
1948 
1949   // Calculate element averages
1950   CeedInt num_modes = (interp?1:0) + (grad?dim:0);
1951   CeedScalar *elem_avg;
1952   const CeedScalar *assembled_array, *q_weight_array;
1953   CeedVector q_weight;
1954   ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr);
1955   ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
1956                         CEED_VECTOR_NONE, q_weight); CeedChk(ierr);
1957   ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array);
1958   CeedChk(ierr);
1959   ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array);
1960   CeedChk(ierr);
1961   ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr);
1962   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
1963   for (CeedInt e=0; e<num_elem; e++) {
1964     CeedInt count = 0;
1965     for (CeedInt q=0; q<num_qpts; q++)
1966       for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++)
1967         if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) >
1968             qf_value_bound) {
1969           elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] /
1970                          q_weight_array[q];
1971           count++;
1972         }
1973     if (count) {
1974       elem_avg[e] /= count;
1975     } else {
1976       elem_avg[e] = 1.0;
1977     }
1978   }
1979   ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr);
1980   ierr = CeedVectorDestroy(&assembled); CeedChk(ierr);
1981   ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr);
1982   ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr);
1983 
1984   // Build FDM diagonal
1985   CeedVector q_data;
1986   CeedScalar *q_data_array, *fdm_diagonal;
1987   ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr);
1988   const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON;
1989   for (CeedInt c=0; c<num_comp; c++)
1990     for (CeedInt n=0; n<elem_size; n++) {
1991       if (interp)
1992         fdm_diagonal[c*elem_size + n] = 1.0;
1993       if (grad)
1994         for (CeedInt d=0; d<dim; d++) {
1995           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
1996           fdm_diagonal[c*elem_size + n] += lambda[i];
1997         }
1998       if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound)
1999         fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound;
2000     }
2001   ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data);
2002   CeedChk(ierr);
2003   ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr);
2004   ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array);
2005   CeedChk(ierr);
2006   for (CeedInt e=0; e<num_elem; e++)
2007     for (CeedInt c=0; c<num_comp; c++)
2008       for (CeedInt n=0; n<elem_size; n++)
2009         q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] *
2010             fdm_diagonal[c*elem_size + n]);
2011   ierr = CeedFree(&elem_avg); CeedChk(ierr);
2012   ierr = CeedFree(&fdm_diagonal); CeedChk(ierr);
2013   ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr);
2014 
2015   // Setup FDM operator
2016   // -- Basis
2017   CeedBasis fdm_basis;
2018   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2019   ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr);
2020   ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr);
2021   ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr);
2022   ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d,
2023                                  fdm_interp, grad_dummy, q_ref_dummy,
2024                                  q_weight_dummy, &fdm_basis); CeedChk(ierr);
2025   ierr = CeedFree(&fdm_interp); CeedChk(ierr);
2026   ierr = CeedFree(&grad_dummy); CeedChk(ierr);
2027   ierr = CeedFree(&q_ref_dummy); CeedChk(ierr);
2028   ierr = CeedFree(&q_weight_dummy); CeedChk(ierr);
2029   ierr = CeedFree(&lambda); CeedChk(ierr);
2030 
2031   // -- Restriction
2032   CeedElemRestriction rstr_qd_i;
2033   CeedInt strides[3] = {1, elem_size, elem_size*num_comp};
2034   ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size,
2035                                           num_comp, num_elem*num_comp*elem_size,
2036                                           strides, &rstr_qd_i); CeedChk(ierr);
2037   // -- QFunction
2038   CeedQFunction qf_fdm;
2039   ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm);
2040   CeedChk(ierr);
2041   ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP);
2042   CeedChk(ierr);
2043   ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE);
2044   CeedChk(ierr);
2045   ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP);
2046   CeedChk(ierr);
2047   // -- QFunction context
2048   CeedInt *num_comp_data;
2049   ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr);
2050   num_comp_data[0] = num_comp;
2051   CeedQFunctionContext ctx_fdm;
2052   ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr);
2053   ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER,
2054                                      sizeof(*num_comp_data), num_comp_data);
2055   CeedChk(ierr);
2056   ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr);
2057   ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr);
2058   // -- Operator
2059   ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv);
2060   CeedChk(ierr);
2061   CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2062   CeedChk(ierr);
2063   CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED,
2064                        q_data); CeedChk(ierr);
2065   CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2066   CeedChk(ierr);
2067 
2068   // Cleanup
2069   ierr = CeedVectorDestroy(&q_data); CeedChk(ierr);
2070   ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr);
2071   ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr);
2072   ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr);
2073 
2074   return CEED_ERROR_SUCCESS;
2075 }
2076 
2077 /// @}
2078