xref: /libCEED/interface/ceed-preconditioning.c (revision 3047f7896db8a74672e0eaf4fe2741a47841cabb)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <ceed-impl.h>
20 #include <math.h>
21 #include <stdbool.h>
22 #include <stdio.h>
23 #include <string.h>
24 
25 /// @file
26 /// Implementation of CeedOperator preconditioning interfaces
27 
28 /// ----------------------------------------------------------------------------
29 /// CeedOperator Library Internal Preconditioning Functions
30 /// ----------------------------------------------------------------------------
31 /// @addtogroup CeedOperatorDeveloper
32 /// @{
33 
34 /**
35   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced
36          CeedOperator functionality
37 
38   @param op  CeedOperator to create fallback for
39 
40   @return An error code: 0 - success, otherwise - failure
41 
42   @ref Developer
43 **/
44 int CeedOperatorCreateFallback(CeedOperator op) {
45   int ierr;
46 
47   // Fallback Ceed
48   const char *resource, *fallback_resource;
49   ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
50   ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
51   CeedChk(ierr);
52   if (!strcmp(resource, fallback_resource))
53     // LCOV_EXCL_START
54     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
55                      "Backend %s cannot create an operator"
56                      "fallback to resource %s", resource, fallback_resource);
57   // LCOV_EXCL_STOP
58 
59   // Fallback Ceed
60   Ceed ceed_ref;
61   if (!op->ceed->op_fallback_ceed) {
62     ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr);
63     ceed_ref->op_fallback_parent = op->ceed;
64     ceed_ref->Error = op->ceed->Error;
65     op->ceed->op_fallback_ceed = ceed_ref;
66   }
67   ceed_ref = op->ceed->op_fallback_ceed;
68 
69   // Clone Op
70   CeedOperator op_ref;
71   ierr = CeedCalloc(1, &op_ref); CeedChk(ierr);
72   memcpy(op_ref, op, sizeof(*op_ref));
73   op_ref->data = NULL;
74   op_ref->is_interface_setup = false;
75   op_ref->is_backend_setup = false;
76   op_ref->ceed = ceed_ref;
77   ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr);
78   op->op_fallback = op_ref;
79 
80   // Clone QF
81   CeedQFunction qf_ref;
82   ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr);
83   memcpy(qf_ref, (op->qf), sizeof(*qf_ref));
84   qf_ref->data = NULL;
85   qf_ref->ceed = ceed_ref;
86   ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr);
87   op_ref->qf = qf_ref;
88   op->qf_fallback = qf_ref;
89   return CEED_ERROR_SUCCESS;
90 }
91 
92 /**
93   @brief Select correct basis matrix pointer based on CeedEvalMode
94 
95   @param[in] eval_mode   Current basis evaluation mode
96   @param[in] identity    Pointer to identity matrix
97   @param[in] interp      Pointer to interpolation matrix
98   @param[in] grad        Pointer to gradient matrix
99   @param[out] basis_ptr  Basis pointer to set
100 
101   @ref Developer
102 **/
103 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode,
104     const CeedScalar *identity, const CeedScalar *interp,
105     const CeedScalar *grad, const CeedScalar **basis_ptr) {
106   switch (eval_mode) {
107   case CEED_EVAL_NONE:
108     *basis_ptr = identity;
109     break;
110   case CEED_EVAL_INTERP:
111     *basis_ptr = interp;
112     break;
113   case CEED_EVAL_GRAD:
114     *basis_ptr = grad;
115     break;
116   case CEED_EVAL_WEIGHT:
117   case CEED_EVAL_DIV:
118   case CEED_EVAL_CURL:
119     break; // Caught by QF Assembly
120   }
121 }
122 
123 /**
124   @brief Create point block restriction for active operator field
125 
126   @param[in] rstr              Original CeedElemRestriction for active field
127   @param[out] pointblock_rstr  Address of the variable where the newly created
128                                  CeedElemRestriction will be stored
129 
130   @return An error code: 0 - success, otherwise - failure
131 
132   @ref Developer
133 **/
134 static int CeedOperatorCreateActivePointBlockRestriction(
135   CeedElemRestriction rstr,
136   CeedElemRestriction *pointblock_rstr) {
137   int ierr;
138   Ceed ceed;
139   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr);
140   const CeedInt *offsets;
141   ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets);
142   CeedChk(ierr);
143 
144   // Expand offsets
145   CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1,
146                                                       *pointblock_offsets;
147   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
148   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
149   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
150   ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr);
151   CeedInt shift = num_comp;
152   if (comp_stride != 1)
153     shift *= num_comp;
154   ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets);
155   CeedChk(ierr);
156   for (CeedInt i = 0; i < num_elem*elem_size; i++) {
157     pointblock_offsets[i] = offsets[i]*shift;
158     if (pointblock_offsets[i] > max)
159       max = pointblock_offsets[i];
160   }
161 
162   // Create new restriction
163   ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp,
164                                    1, max + num_comp*num_comp, CEED_MEM_HOST,
165                                    CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr);
166   CeedChk(ierr);
167 
168   // Cleanup
169   ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr);
170 
171   return CEED_ERROR_SUCCESS;
172 }
173 
174 /**
175   @brief Core logic for assembling operator diagonal or point block diagonal
176 
177   @param[in] op             CeedOperator to assemble point block diagonal
178   @param[in] request        Address of CeedRequest for non-blocking completion, else
179                               CEED_REQUEST_IMMEDIATE
180   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
181   @param[out] assembled     CeedVector to store assembled diagonal
182 
183   @return An error code: 0 - success, otherwise - failure
184 
185   @ref Developer
186 **/
187 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op,
188     CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
189   int ierr;
190   Ceed ceed;
191   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
192 
193   // Assemble QFunction
194   CeedQFunction qf;
195   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
196   CeedInt num_input_fields, num_output_fields;
197   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
198   CeedChk(ierr);
199   CeedVector assembled_qf;
200   CeedElemRestriction rstr;
201   ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op,  &assembled_qf,
202          &rstr, request); CeedChk(ierr);
203   CeedInt layout[3];
204   ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr);
205   ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr);
206   CeedScalar max_norm = 0;
207   ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
208 
209   // Determine active input basis
210   CeedOperatorField *op_fields;
211   CeedQFunctionField *qf_fields;
212   ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr);
213   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
214   CeedInt num_eval_mode_in = 0, num_comp, dim = 1;
215   CeedEvalMode *eval_mode_in = NULL;
216   CeedBasis basis_in = NULL;
217   CeedElemRestriction rstr_in = NULL;
218   for (CeedInt i=0; i<num_input_fields; i++) {
219     CeedVector vec;
220     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
221     if (vec == CEED_VECTOR_ACTIVE) {
222       CeedElemRestriction rstr;
223       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr);
224       ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr);
225       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
226       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
227       if (rstr_in && rstr_in != rstr)
228         // LCOV_EXCL_START
229         return CeedError(ceed, CEED_ERROR_BACKEND,
230                          "Multi-field non-composite operator diagonal assembly not supported");
231       // LCOV_EXCL_STOP
232       rstr_in = rstr;
233       CeedEvalMode eval_mode;
234       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
235       switch (eval_mode) {
236       case CEED_EVAL_NONE:
237       case CEED_EVAL_INTERP:
238         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
239         eval_mode_in[num_eval_mode_in] = eval_mode;
240         num_eval_mode_in += 1;
241         break;
242       case CEED_EVAL_GRAD:
243         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
244         for (CeedInt d=0; d<dim; d++)
245           eval_mode_in[num_eval_mode_in+d] = eval_mode;
246         num_eval_mode_in += dim;
247         break;
248       case CEED_EVAL_WEIGHT:
249       case CEED_EVAL_DIV:
250       case CEED_EVAL_CURL:
251         break; // Caught by QF Assembly
252       }
253     }
254   }
255 
256   // Determine active output basis
257   ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr);
258   ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr);
259   CeedInt num_eval_mode_out = 0;
260   CeedEvalMode *eval_mode_out = NULL;
261   CeedBasis basis_out = NULL;
262   CeedElemRestriction rstr_out = NULL;
263   for (CeedInt i=0; i<num_output_fields; i++) {
264     CeedVector vec;
265     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
266     if (vec == CEED_VECTOR_ACTIVE) {
267       CeedElemRestriction rstr;
268       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr);
269       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
270       if (rstr_out && rstr_out != rstr)
271         // LCOV_EXCL_START
272         return CeedError(ceed, CEED_ERROR_BACKEND,
273                          "Multi-field non-composite operator diagonal assembly not supported");
274       // LCOV_EXCL_STOP
275       rstr_out = rstr;
276       CeedEvalMode eval_mode;
277       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
278       switch (eval_mode) {
279       case CEED_EVAL_NONE:
280       case CEED_EVAL_INTERP:
281         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
282         eval_mode_out[num_eval_mode_out] = eval_mode;
283         num_eval_mode_out += 1;
284         break;
285       case CEED_EVAL_GRAD:
286         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
287         for (CeedInt d=0; d<dim; d++)
288           eval_mode_out[num_eval_mode_out+d] = eval_mode;
289         num_eval_mode_out += dim;
290         break;
291       case CEED_EVAL_WEIGHT:
292       case CEED_EVAL_DIV:
293       case CEED_EVAL_CURL:
294         break; // Caught by QF Assembly
295       }
296     }
297   }
298 
299   // Assemble point block diagonal restriction, if needed
300   CeedElemRestriction diag_rstr = rstr_out;
301   if (is_pointblock) {
302     ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr);
303     CeedChk(ierr);
304   }
305 
306   // Create diagonal vector
307   CeedVector elem_diag;
308   ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag);
309   CeedChk(ierr);
310 
311   // Assemble element operator diagonals
312   CeedScalar *elem_diag_array;
313   const CeedScalar *assembled_qf_array;
314   ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr);
315   ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array);
316   CeedChk(ierr);
317   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
318   CeedChk(ierr);
319   CeedInt num_elem, num_nodes, num_qpts;
320   ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr);
321   ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr);
322   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
323   // Basis matrices
324   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
325   CeedScalar *identity = NULL;
326   bool evalNone = false;
327   for (CeedInt i=0; i<num_eval_mode_in; i++)
328     evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE);
329   for (CeedInt i=0; i<num_eval_mode_out; i++)
330     evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE);
331   if (evalNone) {
332     ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr);
333     for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++)
334       identity[i*num_nodes+i] = 1.0;
335   }
336   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
337   ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr);
338   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
339   ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr);
340   // Compute the diagonal of B^T D B
341   // Each element
342   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
343   for (CeedInt e=0; e<num_elem; e++) {
344     CeedInt d_out = -1;
345     // Each basis eval mode pair
346     for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) {
347       const CeedScalar *bt = NULL;
348       if (eval_mode_out[e_out] == CEED_EVAL_GRAD)
349         d_out += 1;
350       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out,
351                                   &grad_out[d_out*num_qpts*num_nodes], &bt);
352       CeedInt d_in = -1;
353       for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) {
354         const CeedScalar *b = NULL;
355         if (eval_mode_in[e_in] == CEED_EVAL_GRAD)
356           d_in += 1;
357         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in,
358                                     &grad_in[d_in*num_qpts*num_nodes], &b);
359         // Each component
360         for (CeedInt c_out=0; c_out<num_comp; c_out++)
361           // Each qpoint/node pair
362           for (CeedInt q=0; q<num_qpts; q++)
363             if (is_pointblock) {
364               // Point Block Diagonal
365               for (CeedInt c_in=0; c_in<num_comp; c_in++) {
366                 const CeedScalar qf_value =
367                   assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)*
368                                                      num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
369                 if (fabs(qf_value) > qf_value_bound)
370                   for (CeedInt n=0; n<num_nodes; n++)
371                     elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] +=
372                       bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
373               }
374             } else {
375               // Diagonal Only
376               const CeedScalar qf_value =
377                 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)*
378                                                    num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
379               if (fabs(qf_value) > qf_value_bound)
380                 for (CeedInt n=0; n<num_nodes; n++)
381                   elem_diag_array[(e*num_comp+c_out)*num_nodes+n] +=
382                     bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
383             }
384       }
385     }
386   }
387   ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr);
388   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
389   CeedChk(ierr);
390 
391   // Assemble local operator diagonal
392   ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag,
393                                   assembled, request); CeedChk(ierr);
394 
395   // Cleanup
396   if (is_pointblock) {
397     ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr);
398   }
399   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
400   ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr);
401   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
402   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
403   ierr = CeedFree(&identity); CeedChk(ierr);
404 
405   return CEED_ERROR_SUCCESS;
406 }
407 
408 /**
409   @brief Core logic for assembling composite operator diagonal
410 
411   @param[in] op             CeedOperator to assemble point block diagonal
412   @param[in] request        Address of CeedRequest for non-blocking completion, else
413                             CEED_REQUEST_IMMEDIATE
414   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
415   @param[out] assembled     CeedVector to store assembled diagonal
416 
417   @return An error code: 0 - success, otherwise - failure
418 
419   @ref Developer
420 **/
421 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(
422   CeedOperator op, CeedRequest *request, const bool is_pointblock,
423   CeedVector assembled) {
424   int ierr;
425   CeedInt num_sub;
426   CeedOperator *suboperators;
427   ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr);
428   ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr);
429   for (CeedInt i = 0; i < num_sub; i++) {
430     ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request,
431            is_pointblock, assembled); CeedChk(ierr);
432   }
433   return CEED_ERROR_SUCCESS;
434 }
435 
436 /**
437   @brief Build nonzero pattern for non-composite operator
438 
439   Users should generally use CeedOperatorLinearAssembleSymbolic()
440 
441   @param[in] op      CeedOperator to assemble nonzero pattern
442   @param[in] offset  Offset for number of entries
443   @param[out] rows   Row number for each entry
444   @param[out] cols   Column number for each entry
445 
446   @return An error code: 0 - success, otherwise - failure
447 
448   @ref Developer
449 **/
450 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset,
451     CeedInt *rows, CeedInt *cols) {
452   int ierr;
453   Ceed ceed = op->ceed;
454   if (op->is_composite)
455     // LCOV_EXCL_START
456     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
457                      "Composite operator not supported");
458   // LCOV_EXCL_STOP
459 
460   CeedElemRestriction rstr_in;
461   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr);
462   CeedInt num_elem, elem_size, num_nodes, num_comp;
463   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
464   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
465   ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr);
466   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
467   CeedInt layout_er[3];
468   ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr);
469 
470   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
471 
472   // Determine elem_dof relation
473   CeedVector index_vec;
474   ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr);
475   CeedScalar *array;
476   ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr);
477   for (CeedInt i = 0; i < num_nodes; ++i) {
478     array[i] = i;
479   }
480   ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr);
481   CeedVector elem_dof;
482   ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof);
483   CeedChk(ierr);
484   ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr);
485   CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec,
486                            elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
487   const CeedScalar *elem_dof_a;
488   ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a);
489   CeedChk(ierr);
490   ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr);
491 
492   // Determine i, j locations for element matrices
493   CeedInt count = 0;
494   for (int e = 0; e < num_elem; ++e) {
495     for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
496       for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
497         for (int i = 0; i < elem_size; ++i) {
498           for (int j = 0; j < elem_size; ++j) {
499             const CeedInt elem_dof_index_row = (i)*layout_er[0] +
500                                                (comp_out)*layout_er[1] + e*layout_er[2];
501             const CeedInt elem_dof_index_col = (j)*layout_er[0] +
502                                                (comp_in)*layout_er[1] + e*layout_er[2];
503 
504             const CeedInt row = elem_dof_a[elem_dof_index_row];
505             const CeedInt col = elem_dof_a[elem_dof_index_col];
506 
507             rows[offset + count] = row;
508             cols[offset + count] = col;
509             count++;
510           }
511         }
512       }
513     }
514   }
515   if (count != local_num_entries)
516     // LCOV_EXCL_START
517     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
518   // LCOV_EXCL_STOP
519   ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr);
520   ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr);
521 
522   return CEED_ERROR_SUCCESS;
523 }
524 
525 /**
526   @brief Assemble nonzero entries for non-composite operator
527 
528   Users should generally use CeedOperatorLinearAssemble()
529 
530   @param[in] op       CeedOperator to assemble
531   @param[out] offset  Offest for number of entries
532   @param[out] values  Values to assemble into matrix
533 
534   @return An error code: 0 - success, otherwise - failure
535 
536   @ref Developer
537 **/
538 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
539                                       CeedVector values) {
540   int ierr;
541   Ceed ceed = op->ceed;
542   if (op->is_composite)
543     // LCOV_EXCL_START
544     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
545                      "Composite operator not supported");
546   // LCOV_EXCL_STOP
547 
548   // Assemble QFunction
549   CeedQFunction qf;
550   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
551   CeedVector assembled_qf;
552   CeedElemRestriction rstr_q;
553   ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(
554            op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
555 
556   CeedInt qf_length;
557   ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr);
558 
559   CeedInt num_input_fields, num_output_fields;
560   CeedOperatorField *input_fields;
561   CeedOperatorField *output_fields;
562   ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields,
563                                &num_output_fields, &output_fields); CeedChk(ierr);
564 
565   // Determine active input basis
566   CeedQFunctionField *qf_fields;
567   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
568   CeedInt num_eval_mode_in = 0, dim = 1;
569   CeedEvalMode *eval_mode_in = NULL;
570   CeedBasis basis_in = NULL;
571   CeedElemRestriction rstr_in = NULL;
572   for (CeedInt i=0; i<num_input_fields; i++) {
573     CeedVector vec;
574     ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr);
575     if (vec == CEED_VECTOR_ACTIVE) {
576       ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in);
577       CeedChk(ierr);
578       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
579       ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in);
580       CeedChk(ierr);
581       CeedEvalMode eval_mode;
582       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
583       CeedChk(ierr);
584       switch (eval_mode) {
585       case CEED_EVAL_NONE:
586       case CEED_EVAL_INTERP:
587         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
588         eval_mode_in[num_eval_mode_in] = eval_mode;
589         num_eval_mode_in += 1;
590         break;
591       case CEED_EVAL_GRAD:
592         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
593         for (CeedInt d=0; d<dim; d++) {
594           eval_mode_in[num_eval_mode_in+d] = eval_mode;
595         }
596         num_eval_mode_in += dim;
597         break;
598       case CEED_EVAL_WEIGHT:
599       case CEED_EVAL_DIV:
600       case CEED_EVAL_CURL:
601         break; // Caught by QF Assembly
602       }
603     }
604   }
605 
606   // Determine active output basis
607   ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr);
608   CeedInt num_eval_mode_out = 0;
609   CeedEvalMode *eval_mode_out = NULL;
610   CeedBasis basis_out = NULL;
611   CeedElemRestriction rstr_out = NULL;
612   for (CeedInt i=0; i<num_output_fields; i++) {
613     CeedVector vec;
614     ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr);
615     if (vec == CEED_VECTOR_ACTIVE) {
616       ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out);
617       CeedChk(ierr);
618       ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out);
619       CeedChk(ierr);
620       CeedEvalMode eval_mode;
621       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
622       CeedChk(ierr);
623       switch (eval_mode) {
624       case CEED_EVAL_NONE:
625       case CEED_EVAL_INTERP:
626         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
627         eval_mode_out[num_eval_mode_out] = eval_mode;
628         num_eval_mode_out += 1;
629         break;
630       case CEED_EVAL_GRAD:
631         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
632         for (CeedInt d=0; d<dim; d++) {
633           eval_mode_out[num_eval_mode_out+d] = eval_mode;
634         }
635         num_eval_mode_out += dim;
636         break;
637       case CEED_EVAL_WEIGHT:
638       case CEED_EVAL_DIV:
639       case CEED_EVAL_CURL:
640         break; // Caught by QF Assembly
641       }
642     }
643   }
644 
645   if (num_eval_mode_in == 0 || num_eval_mode_out == 0)
646     // LCOV_EXCL_START
647     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
648                      "Cannot assemble operator with out inputs/outputs");
649   // LCOV_EXCL_STOP
650 
651   CeedInt num_elem, elem_size, num_qpts, num_comp;
652   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
653   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
654   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
655   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
656 
657   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
658 
659   // loop over elements and put in data structure
660   const CeedScalar *interp_in, *grad_in;
661   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
662   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
663 
664   const CeedScalar *assembled_qf_array;
665   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
666   CeedChk(ierr);
667 
668   CeedInt layout_qf[3];
669   ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr);
670   ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr);
671 
672   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
673   CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size];
674   CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size];
675   CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in *
676                                      num_qpts]; // logically 3-tensor
677   CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in];
678   CeedScalar elem_mat[elem_size * elem_size];
679   int count = 0;
680   CeedScalar *vals;
681   ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr);
682   for (int e = 0; e < num_elem; ++e) {
683     for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
684       for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
685         for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) {
686           B_mat_in[ell] = 0.0;
687         }
688         for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) {
689           B_mat_out[ell] = 0.0;
690         }
691         // Store block-diagonal D matrix as collection of small dense blocks
692         for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) {
693           D_mat[ell] = 0.0;
694         }
695         // form element matrix itself (for each block component)
696         for (int ell = 0; ell < elem_size*elem_size; ++ell) {
697           elem_mat[ell] = 0.0;
698         }
699         for (int q = 0; q < num_qpts; ++q) {
700           for (int n = 0; n < elem_size; ++n) {
701             CeedInt d_in = -1;
702             for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) {
703               const int qq = num_eval_mode_in*q;
704               if (eval_mode_in[e_in] == CEED_EVAL_INTERP) {
705                 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n];
706               } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) {
707                 d_in += 1;
708                 B_mat_in[(qq+e_in)*elem_size + n] +=
709                   grad_in[(d_in*num_qpts+q) * elem_size + n];
710               } else {
711                 // LCOV_EXCL_START
712                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
713                 // LCOV_EXCL_STOP
714               }
715             }
716             CeedInt d_out = -1;
717             for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) {
718               const int qq = num_eval_mode_out*q;
719               if (eval_mode_out[e_out] == CEED_EVAL_INTERP) {
720                 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n];
721               } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) {
722                 d_out += 1;
723                 B_mat_out[(qq+e_out)*elem_size + n] +=
724                   grad_in[(d_out*num_qpts+q) * elem_size + n];
725               } else {
726                 // LCOV_EXCL_START
727                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
728                 // LCOV_EXCL_STOP
729               }
730             }
731           }
732           for (int ei = 0; ei < num_eval_mode_out; ++ei) {
733             for (int ej = 0; ej < num_eval_mode_in; ++ej) {
734               const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp
735                                           +comp_out;
736               const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] +
737                                 e*layout_qf[2];
738               D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index];
739             }
740           }
741         }
742         // Compute B^T*D
743         for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) {
744           BTD[ell] = 0.0;
745         }
746         for (int j = 0; j<elem_size; ++j) {
747           for (int q = 0; q<num_qpts; ++q) {
748             int qq = num_eval_mode_out*q;
749             for (int ei = 0; ei < num_eval_mode_in; ++ei) {
750               for (int ej = 0; ej < num_eval_mode_out; ++ej) {
751                 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] +=
752                   B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q];
753               }
754             }
755           }
756         }
757 
758         ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size,
759                                   elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr);
760 
761         // put element matrix in coordinate data structure
762         for (int i = 0; i < elem_size; ++i) {
763           for (int j = 0; j < elem_size; ++j) {
764             vals[offset + count] = elem_mat[i*elem_size + j];
765             count++;
766           }
767         }
768       }
769     }
770   }
771   if (count != local_num_entries)
772     // LCOV_EXCL_START
773     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
774   // LCOV_EXCL_STOP
775   ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr);
776 
777   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
778   CeedChk(ierr);
779   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
780   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
781   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
782 
783   return CEED_ERROR_SUCCESS;
784 }
785 
786 /**
787   @brief Count number of entries for assembled CeedOperator
788 
789   @param[in] op            CeedOperator to assemble
790   @param[out] num_entries  Number of entries in assembled representation
791 
792   @return An error code: 0 - success, otherwise - failure
793 
794   @ref Utility
795 **/
796 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op,
797     CeedInt *num_entries) {
798   int ierr;
799   CeedElemRestriction rstr;
800   CeedInt num_elem, elem_size, num_comp;
801 
802   if (op->is_composite)
803     // LCOV_EXCL_START
804     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
805                      "Composite operator not supported");
806   // LCOV_EXCL_STOP
807   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr);
808   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
809   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
810   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
811   *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
812 
813   return CEED_ERROR_SUCCESS;
814 }
815 
816 /**
817   @brief Common code for creating a multigrid coarse operator and level
818            transfer operators for a CeedOperator
819 
820   @param[in] op_fine       Fine grid operator
821   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
822   @param[in] rstr_coarse   Coarse grid restriction
823   @param[in] basis_coarse  Coarse grid active vector basis
824   @param[in] basis_c_to_f  Basis for coarse to fine interpolation
825   @param[out] op_coarse    Coarse grid operator
826   @param[out] op_prolong   Coarse to fine operator
827   @param[out] op_restrict  Fine to coarse operator
828 
829   @return An error code: 0 - success, otherwise - failure
830 
831   @ref Developer
832 **/
833 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
834     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
835     CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
836     CeedOperator *op_restrict) {
837   int ierr;
838   Ceed ceed;
839   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
840 
841   // Check for composite operator
842   bool is_composite;
843   ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr);
844   if (is_composite)
845     // LCOV_EXCL_START
846     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
847                      "Automatic multigrid setup for composite operators not supported");
848   // LCOV_EXCL_STOP
849 
850   // Coarse Grid
851   ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT,
852                             op_coarse); CeedChk(ierr);
853   CeedElemRestriction rstr_fine = NULL;
854   // -- Clone input fields
855   for (int i = 0; i < op_fine->qf->num_input_fields; i++) {
856     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
857       rstr_fine = op_fine->input_fields[i]->elem_restr;
858       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
859                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
860       CeedChk(ierr);
861     } else {
862       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
863                                   op_fine->input_fields[i]->elem_restr,
864                                   op_fine->input_fields[i]->basis,
865                                   op_fine->input_fields[i]->vec); CeedChk(ierr);
866     }
867   }
868   // -- Clone output fields
869   for (int i = 0; i < op_fine->qf->num_output_fields; i++) {
870     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
871       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
872                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
873       CeedChk(ierr);
874     } else {
875       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
876                                   op_fine->output_fields[i]->elem_restr,
877                                   op_fine->output_fields[i]->basis,
878                                   op_fine->output_fields[i]->vec); CeedChk(ierr);
879     }
880   }
881 
882   // Multiplicity vector
883   CeedVector mult_vec, mult_e_vec;
884   ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec);
885   CeedChk(ierr);
886   ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr);
887   ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine,
888                                   mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
889   ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr);
890   ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec,
891                                   CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
892   ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr);
893   ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr);
894 
895   // Restriction
896   CeedInt num_comp;
897   ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr);
898   CeedQFunction qf_restrict;
899   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict);
900   CeedChk(ierr);
901   CeedInt *num_comp_r_data;
902   ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr);
903   num_comp_r_data[0] = num_comp;
904   CeedQFunctionContext ctx_r;
905   ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr);
906   ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER,
907                                      sizeof(*num_comp_r_data), num_comp_r_data);
908   CeedChk(ierr);
909   ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr);
910   ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr);
911   ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE);
912   CeedChk(ierr);
913   ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE);
914   CeedChk(ierr);
915   ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp,
916                                 CEED_EVAL_INTERP); CeedChk(ierr);
917 
918   ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE,
919                             CEED_QFUNCTION_NONE, op_restrict);
920   CeedChk(ierr);
921   ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine,
922                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
923   CeedChk(ierr);
924   ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine,
925                               CEED_BASIS_COLLOCATED, mult_vec);
926   CeedChk(ierr);
927   ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f,
928                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
929 
930   // Prolongation
931   CeedQFunction qf_prolong;
932   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong);
933   CeedChk(ierr);
934   CeedInt *num_comp_p_data;
935   ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr);
936   num_comp_p_data[0] = num_comp;
937   CeedQFunctionContext ctx_p;
938   ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr);
939   ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER,
940                                      sizeof(*num_comp_p_data), num_comp_p_data);
941   CeedChk(ierr);
942   ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr);
943   ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr);
944   ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP);
945   CeedChk(ierr);
946   ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE);
947   CeedChk(ierr);
948   ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE);
949   CeedChk(ierr);
950 
951   ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE,
952                             CEED_QFUNCTION_NONE, op_prolong);
953   CeedChk(ierr);
954   ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f,
955                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
956   ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine,
957                               CEED_BASIS_COLLOCATED, mult_vec);
958   CeedChk(ierr);
959   ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine,
960                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
961   CeedChk(ierr);
962 
963   // Cleanup
964   ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr);
965   ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr);
966   ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr);
967   ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr);
968   return CEED_ERROR_SUCCESS;
969 }
970 
971 /**
972   @brief Build 1D mass matrix and Laplacian with perturbation
973 
974   @param[in] interp_1d    Interpolation matrix in one dimension
975   @param[in] grad_1d      Gradient matrix in one dimension
976   @param[in] q_weight_1d  Quadrature weights in one dimension
977   @param[in] P_1d         Number of basis nodes in one dimension
978   @param[in] Q_1d         Number of quadrature points in one dimension
979   @param[in] dim          Dimension of basis
980   @param[out] mass        Assembled mass matrix in one dimension
981   @param[out] laplace     Assembled perturbed Laplacian in one dimension
982 
983   @return An error code: 0 - success, otherwise - failure
984 
985   @ref Developer
986 **/
987 CeedPragmaOptimizeOff
988 static int CeedBuildMassLaplace(const CeedScalar *interp_1d,
989                                 const CeedScalar *grad_1d,
990                                 const CeedScalar *q_weight_1d, CeedInt P_1d,
991                                 CeedInt Q_1d, CeedInt dim,
992                                 CeedScalar *mass, CeedScalar *laplace) {
993 
994   for (CeedInt i=0; i<P_1d; i++)
995     for (CeedInt j=0; j<P_1d; j++) {
996       CeedScalar sum = 0.0;
997       for (CeedInt k=0; k<Q_1d; k++)
998         sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j];
999       mass[i+j*P_1d] = sum;
1000     }
1001   // -- Laplacian
1002   for (CeedInt i=0; i<P_1d; i++)
1003     for (CeedInt j=0; j<P_1d; j++) {
1004       CeedScalar sum = 0.0;
1005       for (CeedInt k=0; k<Q_1d; k++)
1006         sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j];
1007       laplace[i+j*P_1d] = sum;
1008     }
1009   CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4;
1010   for (CeedInt i=0; i<P_1d; i++)
1011     laplace[i+P_1d*i] += perturbation;
1012   return CEED_ERROR_SUCCESS;
1013 }
1014 CeedPragmaOptimizeOn
1015 
1016 /// @}
1017 
1018 /// ----------------------------------------------------------------------------
1019 /// CeedOperator Public API
1020 /// ----------------------------------------------------------------------------
1021 /// @addtogroup CeedOperatorUser
1022 /// @{
1023 
1024 /**
1025   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1026 
1027   This returns a CeedVector containing a matrix at each quadrature point
1028     providing the action of the CeedQFunction associated with the CeedOperator.
1029     The vector 'assembled' is of shape
1030       [num_elements, num_input_fields, num_output_fields, num_quad_points]
1031     and contains column-major matrices representing the action of the
1032     CeedQFunction for a corresponding quadrature point on an element. Inputs and
1033     outputs are in the order provided by the user when adding CeedOperator fields.
1034     For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and
1035     'v', provided in that order, would result in an assembled QFunction that
1036     consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting
1037     on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1038 
1039   Note: Calling this function asserts that setup is complete
1040           and sets the CeedOperator as immutable.
1041 
1042   @param op              CeedOperator to assemble CeedQFunction
1043   @param[out] assembled  CeedVector to store assembled CeedQFunction at
1044                            quadrature points
1045   @param[out] rstr       CeedElemRestriction for CeedVector containing assembled
1046                            CeedQFunction
1047   @param request         Address of CeedRequest for non-blocking completion, else
1048                            @ref CEED_REQUEST_IMMEDIATE
1049 
1050   @return An error code: 0 - success, otherwise - failure
1051 
1052   @ref User
1053 **/
1054 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled,
1055                                         CeedElemRestriction *rstr,
1056                                         CeedRequest *request) {
1057   int ierr;
1058   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1059 
1060   // Backend version
1061   if (op->LinearAssembleQFunction) {
1062     ierr = op->LinearAssembleQFunction(op, assembled, rstr, request);
1063     CeedChk(ierr);
1064   } else {
1065     // Fallback to reference Ceed
1066     if (!op->op_fallback) {
1067       ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1068     }
1069     // Assemble
1070     ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled,
1071            rstr, request); CeedChk(ierr);
1072   }
1073   return CEED_ERROR_SUCCESS;
1074 }
1075 
1076 /**
1077   @brief Assemble CeedQFunction and store result internall. Return copied
1078            references of stored data to the caller. Caller is responsible for
1079            ownership and destruction of the copied references. See also
1080            @ref CeedOperatorLinearAssembleQFunction
1081 
1082   @param op              CeedOperator to assemble CeedQFunction
1083   @param assembled       CeedVector to store assembled CeedQFunction at
1084                            quadrature points
1085   @param rstr            CeedElemRestriction for CeedVector containing assembled
1086                            CeedQFunction
1087   @param request         Address of CeedRequest for non-blocking completion, else
1088                            @ref CEED_REQUEST_IMMEDIATE
1089 
1090   @return An error code: 0 - success, otherwise - failure
1091 
1092   @ref User
1093 **/
1094 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op,
1095     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1096   int ierr;
1097   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1098 
1099   // Backend version
1100   if (op->LinearAssembleQFunctionUpdate) {
1101     if (op->has_qf_assembled) {
1102       ierr = op->LinearAssembleQFunctionUpdate(op, op->qf_assembled,
1103              op->qf_assembled_rstr, request);
1104     } else {
1105       ierr = op->LinearAssembleQFunction(op, &op->qf_assembled,
1106                                          &op->qf_assembled_rstr, request);
1107     }
1108     CeedChk(ierr);
1109     op->has_qf_assembled = true;
1110     // Copy reference to internally held copy
1111     *assembled = NULL;
1112     *rstr = NULL;
1113     ierr = CeedVectorReferenceCopy(op->qf_assembled, assembled); CeedChk(ierr);
1114     ierr = CeedElemRestrictionReferenceCopy(op->qf_assembled_rstr, rstr);
1115   } else {
1116     // Fallback to reference Ceed
1117     if (!op->op_fallback) {
1118       ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1119     }
1120     // Assemble
1121     ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback,
1122            assembled, rstr, request); CeedChk(ierr);
1123   }
1124   CeedChk(ierr);
1125 
1126   return CEED_ERROR_SUCCESS;
1127 }
1128 
1129 /**
1130   @brief Assemble the diagonal of a square linear CeedOperator
1131 
1132   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1133 
1134   Note: Currently only non-composite CeedOperators with a single field and
1135           composite CeedOperators with single field sub-operators are supported.
1136 
1137   Note: Calling this function asserts that setup is complete
1138           and sets the CeedOperator as immutable.
1139 
1140   @param op              CeedOperator to assemble CeedQFunction
1141   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1142   @param request         Address of CeedRequest for non-blocking completion, else
1143                            @ref CEED_REQUEST_IMMEDIATE
1144 
1145   @return An error code: 0 - success, otherwise - failure
1146 
1147   @ref User
1148 **/
1149 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled,
1150                                        CeedRequest *request) {
1151   int ierr;
1152   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1153 
1154   // Use backend version, if available
1155   if (op->LinearAssembleDiagonal) {
1156     ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr);
1157     return CEED_ERROR_SUCCESS;
1158   } else if (op->LinearAssembleAddDiagonal) {
1159     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1160     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1161     return CEED_ERROR_SUCCESS;
1162   } else {
1163     // Check for valid fallback resource
1164     const char *resource, *fallback_resource;
1165     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1166     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1167     CeedChk(ierr);
1168     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1169       // Fallback to reference Ceed
1170       if (!op->op_fallback) {
1171         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1172       }
1173       // Assemble
1174       ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request);
1175       CeedChk(ierr);
1176       return CEED_ERROR_SUCCESS;
1177     }
1178   }
1179 
1180   // Default interface implementation
1181   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1182   ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request);
1183   CeedChk(ierr);
1184   return CEED_ERROR_SUCCESS;
1185 }
1186 
1187 /**
1188   @brief Assemble the diagonal of a square linear CeedOperator
1189 
1190   This sums into a CeedVector the diagonal of a linear CeedOperator.
1191 
1192   Note: Currently only non-composite CeedOperators with a single field and
1193           composite CeedOperators with single field sub-operators are supported.
1194 
1195   Note: Calling this function asserts that setup is complete
1196           and sets the CeedOperator as immutable.
1197 
1198   @param op              CeedOperator to assemble CeedQFunction
1199   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1200   @param request         Address of CeedRequest for non-blocking completion, else
1201                            @ref CEED_REQUEST_IMMEDIATE
1202 
1203   @return An error code: 0 - success, otherwise - failure
1204 
1205   @ref User
1206 **/
1207 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled,
1208     CeedRequest *request) {
1209   int ierr;
1210   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1211 
1212   // Use backend version, if available
1213   if (op->LinearAssembleAddDiagonal) {
1214     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1215     return CEED_ERROR_SUCCESS;
1216   } else {
1217     // Check for valid fallback resource
1218     const char *resource, *fallback_resource;
1219     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1220     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1221     CeedChk(ierr);
1222     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1223       // Fallback to reference Ceed
1224       if (!op->op_fallback) {
1225         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1226       }
1227       // Assemble
1228       ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled,
1229              request); CeedChk(ierr);
1230       return CEED_ERROR_SUCCESS;
1231     }
1232   }
1233 
1234   // Default interface implementation
1235   bool is_composite;
1236   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1237   if (is_composite) {
1238     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1239            false, assembled); CeedChk(ierr);
1240     return CEED_ERROR_SUCCESS;
1241   } else {
1242     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled);
1243     CeedChk(ierr);
1244     return CEED_ERROR_SUCCESS;
1245   }
1246 }
1247 
1248 /**
1249   @brief Assemble the point block diagonal of a square linear CeedOperator
1250 
1251   This overwrites a CeedVector with the point block diagonal of a linear
1252     CeedOperator.
1253 
1254   Note: Currently only non-composite CeedOperators with a single field and
1255           composite CeedOperators with single field sub-operators are supported.
1256 
1257   Note: Calling this function asserts that setup is complete
1258           and sets the CeedOperator as immutable.
1259 
1260   @param op              CeedOperator to assemble CeedQFunction
1261   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1262                            diagonal, provided in row-major form with an
1263                            @a num_comp * @a num_comp block at each node. The dimensions
1264                            of this vector are derived from the active vector
1265                            for the CeedOperator. The array has shape
1266                            [nodes, component out, component in].
1267   @param request         Address of CeedRequest for non-blocking completion, else
1268                            CEED_REQUEST_IMMEDIATE
1269 
1270   @return An error code: 0 - success, otherwise - failure
1271 
1272   @ref User
1273 **/
1274 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op,
1275     CeedVector assembled, CeedRequest *request) {
1276   int ierr;
1277   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1278 
1279   // Use backend version, if available
1280   if (op->LinearAssemblePointBlockDiagonal) {
1281     ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request);
1282     CeedChk(ierr);
1283     return CEED_ERROR_SUCCESS;
1284   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1285     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1286     ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled,
1287            request); CeedChk(ierr);
1288     return CEED_ERROR_SUCCESS;
1289   } else {
1290     // Check for valid fallback resource
1291     const char *resource, *fallback_resource;
1292     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1293     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1294     CeedChk(ierr);
1295     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1296       // Fallback to reference Ceed
1297       if (!op->op_fallback) {
1298         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1299       }
1300       // Assemble
1301       ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback,
1302              assembled, request); CeedChk(ierr);
1303       return CEED_ERROR_SUCCESS;
1304     }
1305   }
1306 
1307   // Default interface implementation
1308   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1309   ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request);
1310   CeedChk(ierr);
1311   return CEED_ERROR_SUCCESS;
1312 }
1313 
1314 /**
1315   @brief Assemble the point block diagonal of a square linear CeedOperator
1316 
1317   This sums into a CeedVector with the point block diagonal of a linear
1318     CeedOperator.
1319 
1320   Note: Currently only non-composite CeedOperators with a single field and
1321           composite CeedOperators with single field sub-operators are supported.
1322 
1323   Note: Calling this function asserts that setup is complete
1324           and sets the CeedOperator as immutable.
1325 
1326   @param op              CeedOperator to assemble CeedQFunction
1327   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1328                            diagonal, provided in row-major form with an
1329                            @a num_comp * @a num_comp block at each node. The dimensions
1330                            of this vector are derived from the active vector
1331                            for the CeedOperator. The array has shape
1332                            [nodes, component out, component in].
1333   @param request         Address of CeedRequest for non-blocking completion, else
1334                            CEED_REQUEST_IMMEDIATE
1335 
1336   @return An error code: 0 - success, otherwise - failure
1337 
1338   @ref User
1339 **/
1340 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op,
1341     CeedVector assembled, CeedRequest *request) {
1342   int ierr;
1343   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1344 
1345   // Use backend version, if available
1346   if (op->LinearAssembleAddPointBlockDiagonal) {
1347     ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request);
1348     CeedChk(ierr);
1349     return CEED_ERROR_SUCCESS;
1350   } else {
1351     // Check for valid fallback resource
1352     const char *resource, *fallback_resource;
1353     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1354     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1355     CeedChk(ierr);
1356     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1357       // Fallback to reference Ceed
1358       if (!op->op_fallback) {
1359         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1360       }
1361       // Assemble
1362       ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback,
1363              assembled, request); CeedChk(ierr);
1364       return CEED_ERROR_SUCCESS;
1365     }
1366   }
1367 
1368   // Default interface implemenation
1369   bool is_composite;
1370   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1371   if (is_composite) {
1372     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1373            true, assembled); CeedChk(ierr);
1374     return CEED_ERROR_SUCCESS;
1375   } else {
1376     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled);
1377     CeedChk(ierr);
1378     return CEED_ERROR_SUCCESS;
1379   }
1380 }
1381 
1382 /**
1383    @brief Fully assemble the nonzero pattern of a linear operator.
1384 
1385    Expected to be used in conjunction with CeedOperatorLinearAssemble()
1386 
1387    The assembly routines use coordinate format, with num_entries tuples of the
1388    form (i, j, value) which indicate that value should be added to the matrix
1389    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1390    This function returns the number of entries and their (i, j) locations,
1391    while CeedOperatorLinearAssemble() provides the values in the same
1392    ordering.
1393 
1394    This will generally be slow unless your operator is low-order.
1395 
1396   Note: Calling this function asserts that setup is complete
1397           and sets the CeedOperator as immutable.
1398 
1399    @param[in]  op           CeedOperator to assemble
1400    @param[out] num_entries  Number of entries in coordinate nonzero pattern
1401    @param[out] rows         Row number for each entry
1402    @param[out] cols         Column number for each entry
1403 
1404    @ref User
1405 **/
1406 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, ptrdiff_t *num_entries,
1407                                        CeedInt **rows, CeedInt **cols) {
1408   int ierr;
1409   CeedInt num_suboperators, single_entries;
1410   CeedOperator *sub_operators;
1411   bool is_composite;
1412   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1413 
1414   // Use backend version, if available
1415   if (op->LinearAssembleSymbolic) {
1416     ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr);
1417     return CEED_ERROR_SUCCESS;
1418   } else {
1419     // Check for valid fallback resource
1420     const char *resource, *fallback_resource;
1421     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1422     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1423     CeedChk(ierr);
1424     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1425       // Fallback to reference Ceed
1426       if (!op->op_fallback) {
1427         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1428       }
1429       // Assemble
1430       ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows,
1431              cols); CeedChk(ierr);
1432       return CEED_ERROR_SUCCESS;
1433     }
1434   }
1435 
1436   // Default interface implementation
1437 
1438   // count entries and allocate rows, cols arrays
1439   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1440   *num_entries = 0;
1441   if (is_composite) {
1442     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1443     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1444     for (int k = 0; k < num_suboperators; ++k) {
1445       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1446              &single_entries); CeedChk(ierr);
1447       *num_entries += single_entries;
1448     }
1449   } else {
1450     ierr = CeedSingleOperatorAssemblyCountEntries(op,
1451            &single_entries); CeedChk(ierr);
1452     *num_entries += single_entries;
1453   }
1454   ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr);
1455   ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr);
1456 
1457   // assemble nonzero locations
1458   CeedInt offset = 0;
1459   if (is_composite) {
1460     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1461     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1462     for (int k = 0; k < num_suboperators; ++k) {
1463       ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows,
1464              *cols); CeedChk(ierr);
1465       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1466              &single_entries);
1467       CeedChk(ierr);
1468       offset += single_entries;
1469     }
1470   } else {
1471     ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols);
1472     CeedChk(ierr);
1473   }
1474 
1475   return CEED_ERROR_SUCCESS;
1476 }
1477 
1478 /**
1479    @brief Fully assemble the nonzero entries of a linear operator.
1480 
1481    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic()
1482 
1483    The assembly routines use coordinate format, with num_entries tuples of the
1484    form (i, j, value) which indicate that value should be added to the matrix
1485    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1486    This function returns the values of the nonzero entries to be added, their
1487    (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1488 
1489    This will generally be slow unless your operator is low-order.
1490 
1491   Note: Calling this function asserts that setup is complete
1492           and sets the CeedOperator as immutable.
1493 
1494    @param[in]  op      CeedOperator to assemble
1495    @param[out] values  Values to assemble into matrix
1496 
1497    @ref User
1498 **/
1499 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1500   int ierr;
1501   CeedInt num_suboperators, single_entries = 0;
1502   CeedOperator *sub_operators;
1503   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1504 
1505   // Use backend version, if available
1506   if (op->LinearAssemble) {
1507     ierr = op->LinearAssemble(op, values); CeedChk(ierr);
1508     return CEED_ERROR_SUCCESS;
1509   } else {
1510     // Check for valid fallback resource
1511     const char *resource, *fallback_resource;
1512     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1513     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1514     CeedChk(ierr);
1515     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1516       // Fallback to reference Ceed
1517       if (!op->op_fallback) {
1518         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1519       }
1520       // Assemble
1521       ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr);
1522       return CEED_ERROR_SUCCESS;
1523     }
1524   }
1525 
1526   // Default interface implementation
1527   bool is_composite;
1528   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1529 
1530   CeedInt offset = 0;
1531   if (is_composite) {
1532     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1533     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1534     for (int k = 0; k < num_suboperators; ++k) {
1535       ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values);
1536       CeedChk(ierr);
1537       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1538              &single_entries);
1539       CeedChk(ierr);
1540       offset += single_entries;
1541     }
1542   } else {
1543     ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr);
1544   }
1545 
1546   return CEED_ERROR_SUCCESS;
1547 }
1548 
1549 /**
1550   @brief Create a multigrid coarse operator and level transfer operators
1551            for a CeedOperator, creating the prolongation basis from the
1552            fine and coarse grid interpolation
1553 
1554   Note: Calling this function asserts that setup is complete
1555           and sets the CeedOperator as immutable.
1556 
1557   @param[in] op_fine       Fine grid operator
1558   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
1559   @param[in] rstr_coarse   Coarse grid restriction
1560   @param[in] basis_coarse  Coarse grid active vector basis
1561   @param[out] op_coarse    Coarse grid operator
1562   @param[out] op_prolong   Coarse to fine operator
1563   @param[out] op_restrict  Fine to coarse operator
1564 
1565   @return An error code: 0 - success, otherwise - failure
1566 
1567   @ref User
1568 **/
1569 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine,
1570                                      CeedVector p_mult_fine,
1571                                      CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1572                                      CeedOperator *op_coarse, CeedOperator *op_prolong,
1573                                      CeedOperator *op_restrict) {
1574   int ierr;
1575   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
1576   Ceed ceed;
1577   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1578 
1579   // Check for compatible quadrature spaces
1580   CeedBasis basis_fine;
1581   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1582   CeedInt Q_f, Q_c;
1583   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1584   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1585   if (Q_f != Q_c)
1586     // LCOV_EXCL_START
1587     return CeedError(ceed, CEED_ERROR_DIMENSION,
1588                      "Bases must have compatible quadrature spaces");
1589   // LCOV_EXCL_STOP
1590 
1591   // Coarse to fine basis
1592   CeedInt P_f, P_c, Q = Q_f;
1593   bool is_tensor_f, is_tensor_c;
1594   ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr);
1595   ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr);
1596   CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau;
1597   if (is_tensor_f && is_tensor_c) {
1598     ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr);
1599     ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr);
1600     ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr);
1601   } else if (!is_tensor_f && !is_tensor_c) {
1602     ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr);
1603     ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr);
1604   } else {
1605     // LCOV_EXCL_START
1606     return CeedError(ceed, CEED_ERROR_MINOR,
1607                      "Bases must both be tensor or non-tensor");
1608     // LCOV_EXCL_STOP
1609   }
1610 
1611   ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr);
1612   ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr);
1613   ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr);
1614   ierr = CeedMalloc(Q, &tau); CeedChk(ierr);
1615   if (is_tensor_f) {
1616     memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]);
1617     memcpy(interp_c, basis_coarse->interp_1d,
1618            Q*P_c*sizeof basis_coarse->interp_1d[0]);
1619   } else {
1620     memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]);
1621     memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]);
1622   }
1623 
1624   // -- QR Factorization, interp_f = Q R
1625   ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr);
1626 
1627   // -- Apply Qtranspose, interp_c = Qtranspose interp_c
1628   ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE,
1629                                Q, P_c, P_f, P_c, 1); CeedChk(ierr);
1630 
1631   // -- Apply Rinv, interp_c_to_f = Rinv interp_c
1632   for (CeedInt j=0; j<P_c; j++) { // Column j
1633     interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1];
1634     for (CeedInt i=P_f-2; i>=0; i--) { // Row i
1635       interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i];
1636       for (CeedInt k=i+1; k<P_f; k++)
1637         interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k];
1638       interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i];
1639     }
1640   }
1641   ierr = CeedFree(&tau); CeedChk(ierr);
1642   ierr = CeedFree(&interp_c); CeedChk(ierr);
1643   ierr = CeedFree(&interp_f); CeedChk(ierr);
1644 
1645   // Complete with interp_c_to_f versions of code
1646   if (is_tensor_f) {
1647     ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine,
1648            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
1649     CeedChk(ierr);
1650   } else {
1651     ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine,
1652            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
1653     CeedChk(ierr);
1654   }
1655 
1656   // Cleanup
1657   ierr = CeedFree(&interp_c_to_f); CeedChk(ierr);
1658   return CEED_ERROR_SUCCESS;
1659 }
1660 
1661 /**
1662   @brief Create a multigrid coarse operator and level transfer operators
1663            for a CeedOperator with a tensor basis for the active basis
1664 
1665   Note: Calling this function asserts that setup is complete
1666           and sets the CeedOperator as immutable.
1667 
1668   @param[in] op_fine        Fine grid operator
1669   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1670   @param[in] rstr_coarse    Coarse grid restriction
1671   @param[in] basis_coarse   Coarse grid active vector basis
1672   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1673   @param[out] op_coarse     Coarse grid operator
1674   @param[out] op_prolong    Coarse to fine operator
1675   @param[out] op_restrict   Fine to coarse operator
1676 
1677   @return An error code: 0 - success, otherwise - failure
1678 
1679   @ref User
1680 **/
1681 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine,
1682     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1683     const CeedScalar *interp_c_to_f, CeedOperator *op_coarse,
1684     CeedOperator *op_prolong, CeedOperator *op_restrict) {
1685   int ierr;
1686   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
1687   Ceed ceed;
1688   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1689 
1690   // Check for compatible quadrature spaces
1691   CeedBasis basis_fine;
1692   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1693   CeedInt Q_f, Q_c;
1694   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1695   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1696   if (Q_f != Q_c)
1697     // LCOV_EXCL_START
1698     return CeedError(ceed, CEED_ERROR_DIMENSION,
1699                      "Bases must have compatible quadrature spaces");
1700   // LCOV_EXCL_STOP
1701 
1702   // Coarse to fine basis
1703   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
1704   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
1705   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
1706   ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr);
1707   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
1708   CeedChk(ierr);
1709   P_1d_c = dim == 1 ? num_nodes_c :
1710            dim == 2 ? sqrt(num_nodes_c) :
1711            cbrt(num_nodes_c);
1712   CeedScalar *q_ref, *q_weight, *grad;
1713   ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr);
1714   ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr);
1715   ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr);
1716   CeedBasis basis_c_to_f;
1717   ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f,
1718                                  interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
1719   CeedChk(ierr);
1720   ierr = CeedFree(&q_ref); CeedChk(ierr);
1721   ierr = CeedFree(&q_weight); CeedChk(ierr);
1722   ierr = CeedFree(&grad); CeedChk(ierr);
1723 
1724   // Core code
1725   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
1726                                           basis_coarse, basis_c_to_f, op_coarse,
1727                                           op_prolong, op_restrict);
1728   CeedChk(ierr);
1729   return CEED_ERROR_SUCCESS;
1730 }
1731 
1732 /**
1733   @brief Create a multigrid coarse operator and level transfer operators
1734            for a CeedOperator with a non-tensor basis for the active vector
1735 
1736   Note: Calling this function asserts that setup is complete
1737           and sets the CeedOperator as immutable.
1738 
1739   @param[in] op_fine        Fine grid operator
1740   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1741   @param[in] rstr_coarse    Coarse grid restriction
1742   @param[in] basis_coarse   Coarse grid active vector basis
1743   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1744   @param[out] op_coarse     Coarse grid operator
1745   @param[out] op_prolong    Coarse to fine operator
1746   @param[out] op_restrict   Fine to coarse operator
1747 
1748   @return An error code: 0 - success, otherwise - failure
1749 
1750   @ref User
1751 **/
1752 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine,
1753                                        CeedVector p_mult_fine,
1754                                        CeedElemRestriction rstr_coarse,
1755                                        CeedBasis basis_coarse,
1756                                        const CeedScalar *interp_c_to_f,
1757                                        CeedOperator *op_coarse,
1758                                        CeedOperator *op_prolong,
1759                                        CeedOperator *op_restrict) {
1760   int ierr;
1761   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
1762   Ceed ceed;
1763   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1764 
1765   // Check for compatible quadrature spaces
1766   CeedBasis basis_fine;
1767   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1768   CeedInt Q_f, Q_c;
1769   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1770   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1771   if (Q_f != Q_c)
1772     // LCOV_EXCL_START
1773     return CeedError(ceed, CEED_ERROR_DIMENSION,
1774                      "Bases must have compatible quadrature spaces");
1775   // LCOV_EXCL_STOP
1776 
1777   // Coarse to fine basis
1778   CeedElemTopology topo;
1779   ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr);
1780   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
1781   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
1782   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
1783   ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr);
1784   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
1785   CeedChk(ierr);
1786   CeedScalar *q_ref, *q_weight, *grad;
1787   ierr = CeedCalloc(num_nodes_f*dim, &q_ref); CeedChk(ierr);
1788   ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr);
1789   ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr);
1790   CeedBasis basis_c_to_f;
1791   ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f,
1792                            interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
1793   CeedChk(ierr);
1794   ierr = CeedFree(&q_ref); CeedChk(ierr);
1795   ierr = CeedFree(&q_weight); CeedChk(ierr);
1796   ierr = CeedFree(&grad); CeedChk(ierr);
1797 
1798   // Core code
1799   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
1800                                           basis_coarse, basis_c_to_f, op_coarse,
1801                                           op_prolong, op_restrict);
1802   CeedChk(ierr);
1803   return CEED_ERROR_SUCCESS;
1804 }
1805 
1806 /**
1807   @brief Build a FDM based approximate inverse for each element for a
1808            CeedOperator
1809 
1810   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization
1811     Method based approximate inverse. This function obtains the simultaneous
1812     diagonalization for the 1D mass and Laplacian operators,
1813       M = V^T V, K = V^T S V.
1814     The assembled QFunction is used to modify the eigenvalues from simultaneous
1815     diagonalization and obtain an approximate inverse of the form
1816       V^T S^hat V. The CeedOperator must be linear and non-composite. The
1817     associated CeedQFunction must therefore also be linear.
1818 
1819   Note: Calling this function asserts that setup is complete
1820           and sets the CeedOperator as immutable.
1821 
1822   @param op            CeedOperator to create element inverses
1823   @param[out] fdm_inv  CeedOperator to apply the action of a FDM based inverse
1824                          for each element
1825   @param request       Address of CeedRequest for non-blocking completion, else
1826                          @ref CEED_REQUEST_IMMEDIATE
1827 
1828   @return An error code: 0 - success, otherwise - failure
1829 
1830   @ref Backend
1831 **/
1832 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv,
1833                                         CeedRequest *request) {
1834   int ierr;
1835   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1836 
1837   // Use backend version, if available
1838   if (op->CreateFDMElementInverse) {
1839     ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr);
1840     return CEED_ERROR_SUCCESS;
1841   } else {
1842     // Check for valid fallback resource
1843     const char *resource, *fallback_resource;
1844     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1845     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1846     CeedChk(ierr);
1847     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1848       // Fallback to reference Ceed
1849       if (!op->op_fallback) {
1850         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1851       }
1852       // Assemble
1853       ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request);
1854       CeedChk(ierr);
1855       return CEED_ERROR_SUCCESS;
1856     }
1857   }
1858 
1859   // Interface implementation
1860   Ceed ceed, ceed_parent;
1861   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
1862   ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr);
1863   ceed_parent = ceed_parent ? ceed_parent : ceed;
1864   CeedQFunction qf;
1865   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
1866 
1867   // Determine active input basis
1868   bool interp = false, grad = false;
1869   CeedBasis basis = NULL;
1870   CeedElemRestriction rstr = NULL;
1871   CeedOperatorField *op_fields;
1872   CeedQFunctionField *qf_fields;
1873   CeedInt num_input_fields;
1874   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL);
1875   CeedChk(ierr);
1876   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
1877   for (CeedInt i=0; i<num_input_fields; i++) {
1878     CeedVector vec;
1879     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
1880     if (vec == CEED_VECTOR_ACTIVE) {
1881       CeedEvalMode eval_mode;
1882       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
1883       interp = interp || eval_mode == CEED_EVAL_INTERP;
1884       grad = grad || eval_mode == CEED_EVAL_GRAD;
1885       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr);
1886       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
1887     }
1888   }
1889   if (!basis)
1890     // LCOV_EXCL_START
1891     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
1892   // LCOV_EXCL_STOP
1893   CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1,
1894                                                 l_size = 1;
1895   ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr);
1896   ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr);
1897   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr);
1898   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
1899   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
1900   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
1901   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
1902   ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr);
1903 
1904   // Build and diagonalize 1D Mass and Laplacian
1905   bool tensor_basis;
1906   ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr);
1907   if (!tensor_basis)
1908     // LCOV_EXCL_START
1909     return CeedError(ceed, CEED_ERROR_BACKEND,
1910                      "FDMElementInverse only supported for tensor "
1911                      "bases");
1912   // LCOV_EXCL_STOP
1913   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
1914   ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr);
1915   ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr);
1916   ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr);
1917   ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr);
1918   ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr);
1919   // -- Build matrices
1920   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
1921   ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr);
1922   ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr);
1923   ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr);
1924   ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim,
1925                               mass, laplace); CeedChk(ierr);
1926 
1927   // -- Diagonalize
1928   ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d);
1929   CeedChk(ierr);
1930   ierr = CeedFree(&mass); CeedChk(ierr);
1931   ierr = CeedFree(&laplace); CeedChk(ierr);
1932   for (CeedInt i=0; i<P_1d; i++)
1933     for (CeedInt j=0; j<P_1d; j++)
1934       fdm_interp[i+j*P_1d] = x[j+i*P_1d];
1935   ierr = CeedFree(&x); CeedChk(ierr);
1936 
1937   // Assemble QFunction
1938   CeedVector assembled;
1939   CeedElemRestriction rstr_qf;
1940   ierr =  CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled,
1941           &rstr_qf, request); CeedChk(ierr);
1942   CeedInt layout[3];
1943   ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr);
1944   ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr);
1945   CeedScalar max_norm = 0;
1946   ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
1947 
1948   // Calculate element averages
1949   CeedInt num_modes = (interp?1:0) + (grad?dim:0);
1950   CeedScalar *elem_avg;
1951   const CeedScalar *assembled_array, *q_weight_array;
1952   CeedVector q_weight;
1953   ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr);
1954   ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
1955                         CEED_VECTOR_NONE, q_weight); CeedChk(ierr);
1956   ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array);
1957   CeedChk(ierr);
1958   ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array);
1959   CeedChk(ierr);
1960   ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr);
1961   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
1962   for (CeedInt e=0; e<num_elem; e++) {
1963     CeedInt count = 0;
1964     for (CeedInt q=0; q<num_qpts; q++)
1965       for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++)
1966         if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) >
1967             qf_value_bound) {
1968           elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] /
1969                          q_weight_array[q];
1970           count++;
1971         }
1972     if (count) {
1973       elem_avg[e] /= count;
1974     } else {
1975       elem_avg[e] = 1.0;
1976     }
1977   }
1978   ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr);
1979   ierr = CeedVectorDestroy(&assembled); CeedChk(ierr);
1980   ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr);
1981   ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr);
1982 
1983   // Build FDM diagonal
1984   CeedVector q_data;
1985   CeedScalar *q_data_array, *fdm_diagonal;
1986   ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr);
1987   const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON;
1988   for (CeedInt c=0; c<num_comp; c++)
1989     for (CeedInt n=0; n<elem_size; n++) {
1990       if (interp)
1991         fdm_diagonal[c*elem_size + n] = 1.0;
1992       if (grad)
1993         for (CeedInt d=0; d<dim; d++) {
1994           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
1995           fdm_diagonal[c*elem_size + n] += lambda[i];
1996         }
1997       if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound)
1998         fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound;
1999     }
2000   ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data);
2001   CeedChk(ierr);
2002   ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr);
2003   ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array);
2004   CeedChk(ierr);
2005   for (CeedInt e=0; e<num_elem; e++)
2006     for (CeedInt c=0; c<num_comp; c++)
2007       for (CeedInt n=0; n<elem_size; n++)
2008         q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] *
2009             fdm_diagonal[c*elem_size + n]);
2010   ierr = CeedFree(&elem_avg); CeedChk(ierr);
2011   ierr = CeedFree(&fdm_diagonal); CeedChk(ierr);
2012   ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr);
2013 
2014   // Setup FDM operator
2015   // -- Basis
2016   CeedBasis fdm_basis;
2017   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2018   ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr);
2019   ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr);
2020   ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr);
2021   ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d,
2022                                  fdm_interp, grad_dummy, q_ref_dummy,
2023                                  q_weight_dummy, &fdm_basis); CeedChk(ierr);
2024   ierr = CeedFree(&fdm_interp); CeedChk(ierr);
2025   ierr = CeedFree(&grad_dummy); CeedChk(ierr);
2026   ierr = CeedFree(&q_ref_dummy); CeedChk(ierr);
2027   ierr = CeedFree(&q_weight_dummy); CeedChk(ierr);
2028   ierr = CeedFree(&lambda); CeedChk(ierr);
2029 
2030   // -- Restriction
2031   CeedElemRestriction rstr_qd_i;
2032   CeedInt strides[3] = {1, elem_size, elem_size*num_comp};
2033   ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size,
2034                                           num_comp, num_elem*num_comp*elem_size,
2035                                           strides, &rstr_qd_i); CeedChk(ierr);
2036   // -- QFunction
2037   CeedQFunction qf_fdm;
2038   ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm);
2039   CeedChk(ierr);
2040   ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP);
2041   CeedChk(ierr);
2042   ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE);
2043   CeedChk(ierr);
2044   ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP);
2045   CeedChk(ierr);
2046   // -- QFunction context
2047   CeedInt *num_comp_data;
2048   ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr);
2049   num_comp_data[0] = num_comp;
2050   CeedQFunctionContext ctx_fdm;
2051   ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr);
2052   ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER,
2053                                      sizeof(*num_comp_data), num_comp_data);
2054   CeedChk(ierr);
2055   ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr);
2056   ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr);
2057   // -- Operator
2058   ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv);
2059   CeedChk(ierr);
2060   CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2061   CeedChk(ierr);
2062   CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED,
2063                        q_data); CeedChk(ierr);
2064   CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2065   CeedChk(ierr);
2066 
2067   // Cleanup
2068   ierr = CeedVectorDestroy(&q_data); CeedChk(ierr);
2069   ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr);
2070   ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr);
2071   ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr);
2072 
2073   return CEED_ERROR_SUCCESS;
2074 }
2075 
2076 /// @}
2077