xref: /libCEED/interface/ceed-preconditioning.c (revision d3677ae8c8463ed424a8c5269c03811212785cd9)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <ceed-impl.h>
20 #include <math.h>
21 #include <stdbool.h>
22 #include <stdio.h>
23 #include <string.h>
24 
25 /// @file
26 /// Implementation of CeedOperator preconditioning interfaces
27 
28 /// ----------------------------------------------------------------------------
29 /// CeedOperator Library Internal Preconditioning Functions
30 /// ----------------------------------------------------------------------------
31 /// @addtogroup CeedOperatorDeveloper
32 /// @{
33 
34 /**
35   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced
36          CeedOperator functionality
37 
38   @param op  CeedOperator to create fallback for
39 
40   @return An error code: 0 - success, otherwise - failure
41 
42   @ref Developer
43 **/
44 int CeedOperatorCreateFallback(CeedOperator op) {
45   int ierr;
46 
47   // Fallback Ceed
48   const char *resource, *fallback_resource;
49   ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
50   ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
51   CeedChk(ierr);
52   if (!strcmp(resource, fallback_resource))
53     // LCOV_EXCL_START
54     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
55                      "Backend %s cannot create an operator"
56                      "fallback to resource %s", resource, fallback_resource);
57   // LCOV_EXCL_STOP
58 
59   // Fallback Ceed
60   Ceed ceed_ref;
61   if (!op->ceed->op_fallback_ceed) {
62     ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr);
63     ceed_ref->op_fallback_parent = op->ceed;
64     ceed_ref->Error = op->ceed->Error;
65     op->ceed->op_fallback_ceed = ceed_ref;
66   }
67   ceed_ref = op->ceed->op_fallback_ceed;
68 
69   // Clone Op
70   CeedOperator op_ref;
71   ierr = CeedCalloc(1, &op_ref); CeedChk(ierr);
72   memcpy(op_ref, op, sizeof(*op_ref));
73   op_ref->data = NULL;
74   op_ref->is_interface_setup = false;
75   op_ref->is_backend_setup = false;
76   op_ref->ceed = ceed_ref;
77   ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr);
78   op->op_fallback = op_ref;
79 
80   // Clone QF
81   CeedQFunction qf_ref;
82   ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr);
83   memcpy(qf_ref, (op->qf), sizeof(*qf_ref));
84   qf_ref->data = NULL;
85   qf_ref->ceed = ceed_ref;
86   ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr);
87   op_ref->qf = qf_ref;
88   op->qf_fallback = qf_ref;
89   return CEED_ERROR_SUCCESS;
90 }
91 
92 /**
93   @brief Select correct basis matrix pointer based on CeedEvalMode
94 
95   @param[in] eval_mode   Current basis evaluation mode
96   @param[in] identity    Pointer to identity matrix
97   @param[in] interp      Pointer to interpolation matrix
98   @param[in] grad        Pointer to gradient matrix
99   @param[out] basis_ptr  Basis pointer to set
100 
101   @return none
102 
103   @ref Developer
104 **/
105 static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode,
106     const CeedScalar *identity, const CeedScalar *interp,
107     const CeedScalar *grad, const CeedScalar **basis_ptr) {
108   switch (eval_mode) {
109   case CEED_EVAL_NONE:
110     *basis_ptr = identity;
111     break;
112   case CEED_EVAL_INTERP:
113     *basis_ptr = interp;
114     break;
115   case CEED_EVAL_GRAD:
116     *basis_ptr = grad;
117     break;
118   case CEED_EVAL_WEIGHT:
119   case CEED_EVAL_DIV:
120   case CEED_EVAL_CURL:
121     break; // Caught by QF Assembly
122   }
123 }
124 
125 /**
126   @brief Create point block restriction for active operator field
127 
128   @param[in] rstr              Original CeedElemRestriction for active field
129   @param[out] pointblock_rstr  Address of the variable where the newly created
130                                  CeedElemRestriction will be stored
131 
132   @return An error code: 0 - success, otherwise - failure
133 
134   @ref Developer
135 **/
136 static int CeedOperatorCreateActivePointBlockRestriction(
137   CeedElemRestriction rstr,
138   CeedElemRestriction *pointblock_rstr) {
139   int ierr;
140   Ceed ceed;
141   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr);
142   const CeedInt *offsets;
143   ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets);
144   CeedChk(ierr);
145 
146   // Expand offsets
147   CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1,
148                                                       *pointblock_offsets;
149   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
150   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
151   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
152   ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr);
153   CeedInt shift = num_comp;
154   if (comp_stride != 1)
155     shift *= num_comp;
156   ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets);
157   CeedChk(ierr);
158   for (CeedInt i = 0; i < num_elem*elem_size; i++) {
159     pointblock_offsets[i] = offsets[i]*shift;
160     if (pointblock_offsets[i] > max)
161       max = pointblock_offsets[i];
162   }
163 
164   // Create new restriction
165   ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp,
166                                    1, max + num_comp*num_comp, CEED_MEM_HOST,
167                                    CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr);
168   CeedChk(ierr);
169 
170   // Cleanup
171   ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr);
172 
173   return CEED_ERROR_SUCCESS;
174 }
175 
176 /**
177   @brief Core logic for assembling operator diagonal or point block diagonal
178 
179   @param[in] op             CeedOperator to assemble point block diagonal
180   @param[in] request        Address of CeedRequest for non-blocking completion, else
181                               CEED_REQUEST_IMMEDIATE
182   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
183   @param[out] assembled     CeedVector to store assembled diagonal
184 
185   @return An error code: 0 - success, otherwise - failure
186 
187   @ref Developer
188 **/
189 static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op,
190     CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
191   int ierr;
192   Ceed ceed;
193   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
194 
195   // Assemble QFunction
196   CeedQFunction qf;
197   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
198   CeedInt num_input_fields, num_output_fields;
199   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
200   CeedChk(ierr);
201   CeedVector assembled_qf;
202   CeedElemRestriction rstr;
203   ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op,  &assembled_qf,
204          &rstr, request); CeedChk(ierr);
205   CeedInt layout[3];
206   ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr);
207   ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr);
208   CeedScalar max_norm = 0;
209   ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
210 
211   // Determine active input basis
212   CeedOperatorField *op_fields;
213   CeedQFunctionField *qf_fields;
214   ierr = CeedOperatorGetFields(op, NULL, &op_fields, NULL, NULL); CeedChk(ierr);
215   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
216   CeedInt num_eval_mode_in = 0, num_comp, dim = 1;
217   CeedEvalMode *eval_mode_in = NULL;
218   CeedBasis basis_in = NULL;
219   CeedElemRestriction rstr_in = NULL;
220   for (CeedInt i=0; i<num_input_fields; i++) {
221     CeedVector vec;
222     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
223     if (vec == CEED_VECTOR_ACTIVE) {
224       CeedElemRestriction rstr;
225       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr);
226       ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr);
227       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
228       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
229       if (rstr_in && rstr_in != rstr)
230         // LCOV_EXCL_START
231         return CeedError(ceed, CEED_ERROR_BACKEND,
232                          "Multi-field non-composite operator diagonal assembly not supported");
233       // LCOV_EXCL_STOP
234       rstr_in = rstr;
235       CeedEvalMode eval_mode;
236       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
237       switch (eval_mode) {
238       case CEED_EVAL_NONE:
239       case CEED_EVAL_INTERP:
240         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
241         eval_mode_in[num_eval_mode_in] = eval_mode;
242         num_eval_mode_in += 1;
243         break;
244       case CEED_EVAL_GRAD:
245         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
246         for (CeedInt d=0; d<dim; d++)
247           eval_mode_in[num_eval_mode_in+d] = eval_mode;
248         num_eval_mode_in += dim;
249         break;
250       case CEED_EVAL_WEIGHT:
251       case CEED_EVAL_DIV:
252       case CEED_EVAL_CURL:
253         break; // Caught by QF Assembly
254       }
255     }
256   }
257 
258   // Determine active output basis
259   ierr = CeedOperatorGetFields(op, NULL, NULL, NULL, &op_fields); CeedChk(ierr);
260   ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr);
261   CeedInt num_eval_mode_out = 0;
262   CeedEvalMode *eval_mode_out = NULL;
263   CeedBasis basis_out = NULL;
264   CeedElemRestriction rstr_out = NULL;
265   for (CeedInt i=0; i<num_output_fields; i++) {
266     CeedVector vec;
267     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
268     if (vec == CEED_VECTOR_ACTIVE) {
269       CeedElemRestriction rstr;
270       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr);
271       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
272       if (rstr_out && rstr_out != rstr)
273         // LCOV_EXCL_START
274         return CeedError(ceed, CEED_ERROR_BACKEND,
275                          "Multi-field non-composite operator diagonal assembly not supported");
276       // LCOV_EXCL_STOP
277       rstr_out = rstr;
278       CeedEvalMode eval_mode;
279       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
280       switch (eval_mode) {
281       case CEED_EVAL_NONE:
282       case CEED_EVAL_INTERP:
283         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
284         eval_mode_out[num_eval_mode_out] = eval_mode;
285         num_eval_mode_out += 1;
286         break;
287       case CEED_EVAL_GRAD:
288         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
289         for (CeedInt d=0; d<dim; d++)
290           eval_mode_out[num_eval_mode_out+d] = eval_mode;
291         num_eval_mode_out += dim;
292         break;
293       case CEED_EVAL_WEIGHT:
294       case CEED_EVAL_DIV:
295       case CEED_EVAL_CURL:
296         break; // Caught by QF Assembly
297       }
298     }
299   }
300 
301   // Assemble point block diagonal restriction, if needed
302   CeedElemRestriction diag_rstr = rstr_out;
303   if (is_pointblock) {
304     ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr);
305     CeedChk(ierr);
306   }
307 
308   // Create diagonal vector
309   CeedVector elem_diag;
310   ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag);
311   CeedChk(ierr);
312 
313   // Assemble element operator diagonals
314   CeedScalar *elem_diag_array;
315   const CeedScalar *assembled_qf_array;
316   ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr);
317   ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array);
318   CeedChk(ierr);
319   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
320   CeedChk(ierr);
321   CeedInt num_elem, num_nodes, num_qpts;
322   ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr);
323   ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr);
324   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
325   // Basis matrices
326   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
327   CeedScalar *identity = NULL;
328   bool evalNone = false;
329   for (CeedInt i=0; i<num_eval_mode_in; i++)
330     evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE);
331   for (CeedInt i=0; i<num_eval_mode_out; i++)
332     evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE);
333   if (evalNone) {
334     ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr);
335     for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++)
336       identity[i*num_nodes+i] = 1.0;
337   }
338   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
339   ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr);
340   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
341   ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr);
342   // Compute the diagonal of B^T D B
343   // Each element
344   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
345   for (CeedInt e=0; e<num_elem; e++) {
346     CeedInt d_out = -1;
347     // Each basis eval mode pair
348     for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) {
349       const CeedScalar *bt = NULL;
350       if (eval_mode_out[e_out] == CEED_EVAL_GRAD)
351         d_out += 1;
352       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out,
353                                   &grad_out[d_out*num_qpts*num_nodes], &bt);
354       CeedInt d_in = -1;
355       for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) {
356         const CeedScalar *b = NULL;
357         if (eval_mode_in[e_in] == CEED_EVAL_GRAD)
358           d_in += 1;
359         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in,
360                                     &grad_in[d_in*num_qpts*num_nodes], &b);
361         // Each component
362         for (CeedInt c_out=0; c_out<num_comp; c_out++)
363           // Each qpoint/node pair
364           for (CeedInt q=0; q<num_qpts; q++)
365             if (is_pointblock) {
366               // Point Block Diagonal
367               for (CeedInt c_in=0; c_in<num_comp; c_in++) {
368                 const CeedScalar qf_value =
369                   assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)*
370                                                      num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
371                 if (fabs(qf_value) > qf_value_bound)
372                   for (CeedInt n=0; n<num_nodes; n++)
373                     elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] +=
374                       bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
375               }
376             } else {
377               // Diagonal Only
378               const CeedScalar qf_value =
379                 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)*
380                                                    num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
381               if (fabs(qf_value) > qf_value_bound)
382                 for (CeedInt n=0; n<num_nodes; n++)
383                   elem_diag_array[(e*num_comp+c_out)*num_nodes+n] +=
384                     bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
385             }
386       }
387     }
388   }
389   ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr);
390   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
391   CeedChk(ierr);
392 
393   // Assemble local operator diagonal
394   ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag,
395                                   assembled, request); CeedChk(ierr);
396 
397   // Cleanup
398   if (is_pointblock) {
399     ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr);
400   }
401   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
402   ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr);
403   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
404   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
405   ierr = CeedFree(&identity); CeedChk(ierr);
406 
407   return CEED_ERROR_SUCCESS;
408 }
409 
410 /**
411   @brief Core logic for assembling composite operator diagonal
412 
413   @param[in] op             CeedOperator to assemble point block diagonal
414   @param[in] request        Address of CeedRequest for non-blocking completion, else
415                             CEED_REQUEST_IMMEDIATE
416   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
417   @param[out] assembled     CeedVector to store assembled diagonal
418 
419   @return An error code: 0 - success, otherwise - failure
420 
421   @ref Developer
422 **/
423 static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(
424   CeedOperator op, CeedRequest *request, const bool is_pointblock,
425   CeedVector assembled) {
426   int ierr;
427   CeedInt num_sub;
428   CeedOperator *suboperators;
429   ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr);
430   ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr);
431   for (CeedInt i = 0; i < num_sub; i++) {
432     ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request,
433            is_pointblock, assembled); CeedChk(ierr);
434   }
435   return CEED_ERROR_SUCCESS;
436 }
437 
438 /**
439   @brief Build nonzero pattern for non-composite operator
440 
441   Users should generally use CeedOperatorLinearAssembleSymbolic()
442 
443   @param[in] op      CeedOperator to assemble nonzero pattern
444   @param[in] offset  Offset for number of entries
445   @param[out] rows   Row number for each entry
446   @param[out] cols   Column number for each entry
447 
448   @return An error code: 0 - success, otherwise - failure
449 
450   @ref Developer
451 **/
452 static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset,
453     CeedInt *rows, CeedInt *cols) {
454   int ierr;
455   Ceed ceed = op->ceed;
456   if (op->is_composite)
457     // LCOV_EXCL_START
458     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
459                      "Composite operator not supported");
460   // LCOV_EXCL_STOP
461 
462   CeedElemRestriction rstr_in;
463   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr);
464   CeedInt num_elem, elem_size, num_nodes, num_comp;
465   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
466   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
467   ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr);
468   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
469   CeedInt layout_er[3];
470   ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr);
471 
472   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
473 
474   // Determine elem_dof relation
475   CeedVector index_vec;
476   ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr);
477   CeedScalar *array;
478   ierr = CeedVectorGetArrayWrite(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr);
479   for (CeedInt i = 0; i < num_nodes; ++i) {
480     array[i] = i;
481   }
482   ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr);
483   CeedVector elem_dof;
484   ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof);
485   CeedChk(ierr);
486   ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr);
487   CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec,
488                            elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
489   const CeedScalar *elem_dof_a;
490   ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a);
491   CeedChk(ierr);
492   ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr);
493 
494   // Determine i, j locations for element matrices
495   CeedInt count = 0;
496   for (int e = 0; e < num_elem; ++e) {
497     for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
498       for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
499         for (int i = 0; i < elem_size; ++i) {
500           for (int j = 0; j < elem_size; ++j) {
501             const CeedInt elem_dof_index_row = (i)*layout_er[0] +
502                                                (comp_out)*layout_er[1] + e*layout_er[2];
503             const CeedInt elem_dof_index_col = (j)*layout_er[0] +
504                                                (comp_in)*layout_er[1] + e*layout_er[2];
505 
506             const CeedInt row = elem_dof_a[elem_dof_index_row];
507             const CeedInt col = elem_dof_a[elem_dof_index_col];
508 
509             rows[offset + count] = row;
510             cols[offset + count] = col;
511             count++;
512           }
513         }
514       }
515     }
516   }
517   if (count != local_num_entries)
518     // LCOV_EXCL_START
519     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
520   // LCOV_EXCL_STOP
521   ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr);
522   ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr);
523 
524   return CEED_ERROR_SUCCESS;
525 }
526 
527 /**
528   @brief Assemble nonzero entries for non-composite operator
529 
530   Users should generally use CeedOperatorLinearAssemble()
531 
532   @param[in] op       CeedOperator to assemble
533   @param[out] offset  Offest for number of entries
534   @param[out] values  Values to assemble into matrix
535 
536   @return An error code: 0 - success, otherwise - failure
537 
538   @ref Developer
539 **/
540 static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
541                                       CeedVector values) {
542   int ierr;
543   Ceed ceed = op->ceed;
544   if (op->is_composite)
545     // LCOV_EXCL_START
546     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
547                      "Composite operator not supported");
548   // LCOV_EXCL_STOP
549 
550   // Assemble QFunction
551   CeedQFunction qf;
552   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
553   CeedVector assembled_qf;
554   CeedElemRestriction rstr_q;
555   ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(
556            op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
557 
558   CeedInt qf_length;
559   ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr);
560 
561   CeedInt num_input_fields, num_output_fields;
562   CeedOperatorField *input_fields;
563   CeedOperatorField *output_fields;
564   ierr = CeedOperatorGetFields(op, &num_input_fields, &input_fields,
565                                &num_output_fields, &output_fields); CeedChk(ierr);
566 
567   // Determine active input basis
568   CeedQFunctionField *qf_fields;
569   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
570   CeedInt num_eval_mode_in = 0, dim = 1;
571   CeedEvalMode *eval_mode_in = NULL;
572   CeedBasis basis_in = NULL;
573   CeedElemRestriction rstr_in = NULL;
574   for (CeedInt i=0; i<num_input_fields; i++) {
575     CeedVector vec;
576     ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr);
577     if (vec == CEED_VECTOR_ACTIVE) {
578       ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in);
579       CeedChk(ierr);
580       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
581       ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in);
582       CeedChk(ierr);
583       CeedEvalMode eval_mode;
584       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
585       CeedChk(ierr);
586       switch (eval_mode) {
587       case CEED_EVAL_NONE:
588       case CEED_EVAL_INTERP:
589         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
590         eval_mode_in[num_eval_mode_in] = eval_mode;
591         num_eval_mode_in += 1;
592         break;
593       case CEED_EVAL_GRAD:
594         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
595         for (CeedInt d=0; d<dim; d++) {
596           eval_mode_in[num_eval_mode_in+d] = eval_mode;
597         }
598         num_eval_mode_in += dim;
599         break;
600       case CEED_EVAL_WEIGHT:
601       case CEED_EVAL_DIV:
602       case CEED_EVAL_CURL:
603         break; // Caught by QF Assembly
604       }
605     }
606   }
607 
608   // Determine active output basis
609   ierr = CeedQFunctionGetFields(qf, NULL, NULL, NULL, &qf_fields); CeedChk(ierr);
610   CeedInt num_eval_mode_out = 0;
611   CeedEvalMode *eval_mode_out = NULL;
612   CeedBasis basis_out = NULL;
613   CeedElemRestriction rstr_out = NULL;
614   for (CeedInt i=0; i<num_output_fields; i++) {
615     CeedVector vec;
616     ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr);
617     if (vec == CEED_VECTOR_ACTIVE) {
618       ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out);
619       CeedChk(ierr);
620       ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out);
621       CeedChk(ierr);
622       CeedEvalMode eval_mode;
623       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
624       CeedChk(ierr);
625       switch (eval_mode) {
626       case CEED_EVAL_NONE:
627       case CEED_EVAL_INTERP:
628         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
629         eval_mode_out[num_eval_mode_out] = eval_mode;
630         num_eval_mode_out += 1;
631         break;
632       case CEED_EVAL_GRAD:
633         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
634         for (CeedInt d=0; d<dim; d++) {
635           eval_mode_out[num_eval_mode_out+d] = eval_mode;
636         }
637         num_eval_mode_out += dim;
638         break;
639       case CEED_EVAL_WEIGHT:
640       case CEED_EVAL_DIV:
641       case CEED_EVAL_CURL:
642         break; // Caught by QF Assembly
643       }
644     }
645   }
646 
647   if (num_eval_mode_in == 0 || num_eval_mode_out == 0)
648     // LCOV_EXCL_START
649     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
650                      "Cannot assemble operator with out inputs/outputs");
651   // LCOV_EXCL_STOP
652 
653   CeedInt num_elem, elem_size, num_qpts, num_comp;
654   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
655   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
656   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
657   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
658 
659   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
660 
661   // loop over elements and put in data structure
662   const CeedScalar *interp_in, *grad_in;
663   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
664   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
665 
666   const CeedScalar *assembled_qf_array;
667   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
668   CeedChk(ierr);
669 
670   CeedInt layout_qf[3];
671   ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr);
672   ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr);
673 
674   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
675   CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size];
676   CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size];
677   CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in *
678                                      num_qpts]; // logically 3-tensor
679   CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in];
680   CeedScalar elem_mat[elem_size * elem_size];
681   int count = 0;
682   CeedScalar *vals;
683   ierr = CeedVectorGetArrayWrite(values, CEED_MEM_HOST, &vals); CeedChk(ierr);
684   for (int e = 0; e < num_elem; ++e) {
685     for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
686       for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
687         for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) {
688           B_mat_in[ell] = 0.0;
689         }
690         for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) {
691           B_mat_out[ell] = 0.0;
692         }
693         // Store block-diagonal D matrix as collection of small dense blocks
694         for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) {
695           D_mat[ell] = 0.0;
696         }
697         // form element matrix itself (for each block component)
698         for (int ell = 0; ell < elem_size*elem_size; ++ell) {
699           elem_mat[ell] = 0.0;
700         }
701         for (int q = 0; q < num_qpts; ++q) {
702           for (int n = 0; n < elem_size; ++n) {
703             CeedInt d_in = -1;
704             for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) {
705               const int qq = num_eval_mode_in*q;
706               if (eval_mode_in[e_in] == CEED_EVAL_INTERP) {
707                 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n];
708               } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) {
709                 d_in += 1;
710                 B_mat_in[(qq+e_in)*elem_size + n] +=
711                   grad_in[(d_in*num_qpts+q) * elem_size + n];
712               } else {
713                 // LCOV_EXCL_START
714                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
715                 // LCOV_EXCL_STOP
716               }
717             }
718             CeedInt d_out = -1;
719             for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) {
720               const int qq = num_eval_mode_out*q;
721               if (eval_mode_out[e_out] == CEED_EVAL_INTERP) {
722                 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n];
723               } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) {
724                 d_out += 1;
725                 B_mat_out[(qq+e_out)*elem_size + n] +=
726                   grad_in[(d_out*num_qpts+q) * elem_size + n];
727               } else {
728                 // LCOV_EXCL_START
729                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
730                 // LCOV_EXCL_STOP
731               }
732             }
733           }
734           for (int ei = 0; ei < num_eval_mode_out; ++ei) {
735             for (int ej = 0; ej < num_eval_mode_in; ++ej) {
736               const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp
737                                           +comp_out;
738               const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] +
739                                 e*layout_qf[2];
740               D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index];
741             }
742           }
743         }
744         // Compute B^T*D
745         for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) {
746           BTD[ell] = 0.0;
747         }
748         for (int j = 0; j<elem_size; ++j) {
749           for (int q = 0; q<num_qpts; ++q) {
750             int qq = num_eval_mode_out*q;
751             for (int ei = 0; ei < num_eval_mode_in; ++ei) {
752               for (int ej = 0; ej < num_eval_mode_out; ++ej) {
753                 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] +=
754                   B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q];
755               }
756             }
757           }
758         }
759 
760         ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size,
761                                   elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr);
762 
763         // put element matrix in coordinate data structure
764         for (int i = 0; i < elem_size; ++i) {
765           for (int j = 0; j < elem_size; ++j) {
766             vals[offset + count] = elem_mat[i*elem_size + j];
767             count++;
768           }
769         }
770       }
771     }
772   }
773   if (count != local_num_entries)
774     // LCOV_EXCL_START
775     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
776   // LCOV_EXCL_STOP
777   ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr);
778 
779   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
780   CeedChk(ierr);
781   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
782   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
783   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
784 
785   return CEED_ERROR_SUCCESS;
786 }
787 
788 /**
789   @brief Count number of entries for assembled CeedOperator
790 
791   @param[in] op            CeedOperator to assemble
792   @param[out] num_entries  Number of entries in assembled representation
793 
794   @return An error code: 0 - success, otherwise - failure
795 
796   @ref Utility
797 **/
798 static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op,
799     CeedInt *num_entries) {
800   int ierr;
801   CeedElemRestriction rstr;
802   CeedInt num_elem, elem_size, num_comp;
803 
804   if (op->is_composite)
805     // LCOV_EXCL_START
806     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
807                      "Composite operator not supported");
808   // LCOV_EXCL_STOP
809   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr);
810   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
811   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
812   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
813   *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
814 
815   return CEED_ERROR_SUCCESS;
816 }
817 
818 /**
819   @brief Common code for creating a multigrid coarse operator and level
820            transfer operators for a CeedOperator
821 
822   @param[in] op_fine       Fine grid operator
823   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
824   @param[in] rstr_coarse   Coarse grid restriction
825   @param[in] basis_coarse  Coarse grid active vector basis
826   @param[in] basis_c_to_f  Basis for coarse to fine interpolation
827   @param[out] op_coarse    Coarse grid operator
828   @param[out] op_prolong   Coarse to fine operator
829   @param[out] op_restrict  Fine to coarse operator
830 
831   @return An error code: 0 - success, otherwise - failure
832 
833   @ref Developer
834 **/
835 static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
836     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
837     CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
838     CeedOperator *op_restrict) {
839   int ierr;
840   Ceed ceed;
841   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
842 
843   // Check for composite operator
844   bool is_composite;
845   ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr);
846   if (is_composite)
847     // LCOV_EXCL_START
848     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
849                      "Automatic multigrid setup for composite operators not supported");
850   // LCOV_EXCL_STOP
851 
852   // Coarse Grid
853   ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT,
854                             op_coarse); CeedChk(ierr);
855   CeedElemRestriction rstr_fine = NULL;
856   // -- Clone input fields
857   for (int i = 0; i < op_fine->qf->num_input_fields; i++) {
858     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
859       rstr_fine = op_fine->input_fields[i]->elem_restr;
860       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
861                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
862       CeedChk(ierr);
863     } else {
864       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
865                                   op_fine->input_fields[i]->elem_restr,
866                                   op_fine->input_fields[i]->basis,
867                                   op_fine->input_fields[i]->vec); CeedChk(ierr);
868     }
869   }
870   // -- Clone output fields
871   for (int i = 0; i < op_fine->qf->num_output_fields; i++) {
872     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
873       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
874                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
875       CeedChk(ierr);
876     } else {
877       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
878                                   op_fine->output_fields[i]->elem_restr,
879                                   op_fine->output_fields[i]->basis,
880                                   op_fine->output_fields[i]->vec); CeedChk(ierr);
881     }
882   }
883 
884   // Multiplicity vector
885   CeedVector mult_vec, mult_e_vec;
886   ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec);
887   CeedChk(ierr);
888   ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr);
889   ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine,
890                                   mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
891   ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr);
892   ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec,
893                                   CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
894   ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr);
895   ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr);
896 
897   // Restriction
898   CeedInt num_comp;
899   ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr);
900   CeedQFunction qf_restrict;
901   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict);
902   CeedChk(ierr);
903   CeedInt *num_comp_r_data;
904   ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr);
905   num_comp_r_data[0] = num_comp;
906   CeedQFunctionContext ctx_r;
907   ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr);
908   ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER,
909                                      sizeof(*num_comp_r_data), num_comp_r_data);
910   CeedChk(ierr);
911   ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr);
912   ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr);
913   ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE);
914   CeedChk(ierr);
915   ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE);
916   CeedChk(ierr);
917   ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp,
918                                 CEED_EVAL_INTERP); CeedChk(ierr);
919 
920   ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE,
921                             CEED_QFUNCTION_NONE, op_restrict);
922   CeedChk(ierr);
923   ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine,
924                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
925   CeedChk(ierr);
926   ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine,
927                               CEED_BASIS_COLLOCATED, mult_vec);
928   CeedChk(ierr);
929   ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f,
930                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
931 
932   // Prolongation
933   CeedQFunction qf_prolong;
934   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong);
935   CeedChk(ierr);
936   CeedInt *num_comp_p_data;
937   ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr);
938   num_comp_p_data[0] = num_comp;
939   CeedQFunctionContext ctx_p;
940   ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr);
941   ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER,
942                                      sizeof(*num_comp_p_data), num_comp_p_data);
943   CeedChk(ierr);
944   ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr);
945   ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr);
946   ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP);
947   CeedChk(ierr);
948   ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE);
949   CeedChk(ierr);
950   ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE);
951   CeedChk(ierr);
952 
953   ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE,
954                             CEED_QFUNCTION_NONE, op_prolong);
955   CeedChk(ierr);
956   ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f,
957                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
958   ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine,
959                               CEED_BASIS_COLLOCATED, mult_vec);
960   CeedChk(ierr);
961   ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine,
962                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
963   CeedChk(ierr);
964 
965   // Cleanup
966   ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr);
967   ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr);
968   ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr);
969   ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr);
970   return CEED_ERROR_SUCCESS;
971 }
972 
973 /**
974   @brief Build 1D mass matrix and Laplacian with perturbation
975 
976   @param[in] interp_1d    Interpolation matrix in one dimension
977   @param[in] grad_1d      Gradient matrix in one dimension
978   @param[in] q_weight_1d  Quadrature weights in one dimension
979   @param[in] P_1d         Number of basis nodes in one dimension
980   @param[in] Q_1d         Number of quadrature points in one dimension
981   @param[in] dim          Dimension of basis
982   @param[out] mass        Assembled mass matrix in one dimension
983   @param[out] laplace     Assembled perturbed Laplacian in one dimension
984 
985   @return An error code: 0 - success, otherwise - failure
986 
987   @ref Developer
988 **/
989 CeedPragmaOptimizeOff
990 static int CeedBuildMassLaplace(const CeedScalar *interp_1d,
991                                 const CeedScalar *grad_1d,
992                                 const CeedScalar *q_weight_1d, CeedInt P_1d,
993                                 CeedInt Q_1d, CeedInt dim,
994                                 CeedScalar *mass, CeedScalar *laplace) {
995 
996   for (CeedInt i=0; i<P_1d; i++)
997     for (CeedInt j=0; j<P_1d; j++) {
998       CeedScalar sum = 0.0;
999       for (CeedInt k=0; k<Q_1d; k++)
1000         sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j];
1001       mass[i+j*P_1d] = sum;
1002     }
1003   // -- Laplacian
1004   for (CeedInt i=0; i<P_1d; i++)
1005     for (CeedInt j=0; j<P_1d; j++) {
1006       CeedScalar sum = 0.0;
1007       for (CeedInt k=0; k<Q_1d; k++)
1008         sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j];
1009       laplace[i+j*P_1d] = sum;
1010     }
1011   CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4;
1012   for (CeedInt i=0; i<P_1d; i++)
1013     laplace[i+P_1d*i] += perturbation;
1014   return CEED_ERROR_SUCCESS;
1015 }
1016 CeedPragmaOptimizeOn
1017 
1018 /// @}
1019 
1020 /// ----------------------------------------------------------------------------
1021 /// CeedOperator Public API
1022 /// ----------------------------------------------------------------------------
1023 /// @addtogroup CeedOperatorUser
1024 /// @{
1025 
1026 /**
1027   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1028 
1029   This returns a CeedVector containing a matrix at each quadrature point
1030     providing the action of the CeedQFunction associated with the CeedOperator.
1031     The vector 'assembled' is of shape
1032       [num_elements, num_input_fields, num_output_fields, num_quad_points]
1033     and contains column-major matrices representing the action of the
1034     CeedQFunction for a corresponding quadrature point on an element. Inputs and
1035     outputs are in the order provided by the user when adding CeedOperator fields.
1036     For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and
1037     'v', provided in that order, would result in an assembled QFunction that
1038     consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting
1039     on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1040 
1041   Note: Calling this function asserts that setup is complete
1042           and sets the CeedOperator as immutable.
1043 
1044   @param op              CeedOperator to assemble CeedQFunction
1045   @param[out] assembled  CeedVector to store assembled CeedQFunction at
1046                            quadrature points
1047   @param[out] rstr       CeedElemRestriction for CeedVector containing assembled
1048                            CeedQFunction
1049   @param request         Address of CeedRequest for non-blocking completion, else
1050                            @ref CEED_REQUEST_IMMEDIATE
1051 
1052   @return An error code: 0 - success, otherwise - failure
1053 
1054   @ref User
1055 **/
1056 int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled,
1057                                         CeedElemRestriction *rstr,
1058                                         CeedRequest *request) {
1059   int ierr;
1060   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1061 
1062   // Backend version
1063   if (op->LinearAssembleQFunction) {
1064     ierr = op->LinearAssembleQFunction(op, assembled, rstr, request);
1065     CeedChk(ierr);
1066   } else {
1067     // Fallback to reference Ceed
1068     if (!op->op_fallback) {
1069       ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1070     }
1071     // Assemble
1072     ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled,
1073            rstr, request); CeedChk(ierr);
1074   }
1075   return CEED_ERROR_SUCCESS;
1076 }
1077 
1078 /**
1079   @brief Assemble CeedQFunction and store result internall. Return copied
1080            references of stored data to the caller. Caller is responsible for
1081            ownership and destruction of the copied references. See also
1082            @ref CeedOperatorLinearAssembleQFunction
1083 
1084   @param op              CeedOperator to assemble CeedQFunction
1085   @param assembled       CeedVector to store assembled CeedQFunction at
1086                            quadrature points
1087   @param rstr            CeedElemRestriction for CeedVector containing assembled
1088                            CeedQFunction
1089   @param request         Address of CeedRequest for non-blocking completion, else
1090                            @ref CEED_REQUEST_IMMEDIATE
1091 
1092   @return An error code: 0 - success, otherwise - failure
1093 
1094   @ref User
1095 **/
1096 int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op,
1097     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
1098   int ierr;
1099   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1100 
1101   // Backend version
1102   if (op->LinearAssembleQFunctionUpdate) {
1103     if (op->has_qf_assembled) {
1104       ierr = op->LinearAssembleQFunctionUpdate(op, op->qf_assembled,
1105              op->qf_assembled_rstr, request);
1106     } else {
1107       ierr = op->LinearAssembleQFunction(op, &op->qf_assembled,
1108                                          &op->qf_assembled_rstr, request);
1109     }
1110     CeedChk(ierr);
1111     op->has_qf_assembled = true;
1112     // Copy reference to internally held copy
1113     *assembled = NULL;
1114     *rstr = NULL;
1115     ierr = CeedVectorReferenceCopy(op->qf_assembled, assembled); CeedChk(ierr);
1116     ierr = CeedElemRestrictionReferenceCopy(op->qf_assembled_rstr, rstr);
1117   } else {
1118     // Fallback to reference Ceed
1119     if (!op->op_fallback) {
1120       ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1121     }
1122     // Assemble
1123     ierr = CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op->op_fallback,
1124            assembled, rstr, request); CeedChk(ierr);
1125   }
1126   CeedChk(ierr);
1127 
1128   return CEED_ERROR_SUCCESS;
1129 }
1130 
1131 /**
1132   @brief Assemble the diagonal of a square linear CeedOperator
1133 
1134   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1135 
1136   Note: Currently only non-composite CeedOperators with a single field and
1137           composite CeedOperators with single field sub-operators are supported.
1138 
1139   Note: Calling this function asserts that setup is complete
1140           and sets the CeedOperator as immutable.
1141 
1142   @param op              CeedOperator to assemble CeedQFunction
1143   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1144   @param request         Address of CeedRequest for non-blocking completion, else
1145                            @ref CEED_REQUEST_IMMEDIATE
1146 
1147   @return An error code: 0 - success, otherwise - failure
1148 
1149   @ref User
1150 **/
1151 int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled,
1152                                        CeedRequest *request) {
1153   int ierr;
1154   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1155 
1156   // Use backend version, if available
1157   if (op->LinearAssembleDiagonal) {
1158     ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr);
1159     return CEED_ERROR_SUCCESS;
1160   } else if (op->LinearAssembleAddDiagonal) {
1161     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1162     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1163     return CEED_ERROR_SUCCESS;
1164   } else {
1165     // Check for valid fallback resource
1166     const char *resource, *fallback_resource;
1167     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1168     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1169     CeedChk(ierr);
1170     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1171       // Fallback to reference Ceed
1172       if (!op->op_fallback) {
1173         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1174       }
1175       // Assemble
1176       ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request);
1177       CeedChk(ierr);
1178       return CEED_ERROR_SUCCESS;
1179     }
1180   }
1181 
1182   // Default interface implementation
1183   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1184   ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request);
1185   CeedChk(ierr);
1186   return CEED_ERROR_SUCCESS;
1187 }
1188 
1189 /**
1190   @brief Assemble the diagonal of a square linear CeedOperator
1191 
1192   This sums into a CeedVector the diagonal of a linear CeedOperator.
1193 
1194   Note: Currently only non-composite CeedOperators with a single field and
1195           composite CeedOperators with single field sub-operators are supported.
1196 
1197   Note: Calling this function asserts that setup is complete
1198           and sets the CeedOperator as immutable.
1199 
1200   @param op              CeedOperator to assemble CeedQFunction
1201   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1202   @param request         Address of CeedRequest for non-blocking completion, else
1203                            @ref CEED_REQUEST_IMMEDIATE
1204 
1205   @return An error code: 0 - success, otherwise - failure
1206 
1207   @ref User
1208 **/
1209 int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled,
1210     CeedRequest *request) {
1211   int ierr;
1212   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1213 
1214   // Use backend version, if available
1215   if (op->LinearAssembleAddDiagonal) {
1216     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1217     return CEED_ERROR_SUCCESS;
1218   } else {
1219     // Check for valid fallback resource
1220     const char *resource, *fallback_resource;
1221     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1222     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1223     CeedChk(ierr);
1224     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1225       // Fallback to reference Ceed
1226       if (!op->op_fallback) {
1227         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1228       }
1229       // Assemble
1230       ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled,
1231              request); CeedChk(ierr);
1232       return CEED_ERROR_SUCCESS;
1233     }
1234   }
1235 
1236   // Default interface implementation
1237   bool is_composite;
1238   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1239   if (is_composite) {
1240     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1241            false, assembled); CeedChk(ierr);
1242     return CEED_ERROR_SUCCESS;
1243   } else {
1244     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled);
1245     CeedChk(ierr);
1246     return CEED_ERROR_SUCCESS;
1247   }
1248 }
1249 
1250 /**
1251   @brief Assemble the point block diagonal of a square linear CeedOperator
1252 
1253   This overwrites a CeedVector with the point block diagonal of a linear
1254     CeedOperator.
1255 
1256   Note: Currently only non-composite CeedOperators with a single field and
1257           composite CeedOperators with single field sub-operators are supported.
1258 
1259   Note: Calling this function asserts that setup is complete
1260           and sets the CeedOperator as immutable.
1261 
1262   @param op              CeedOperator to assemble CeedQFunction
1263   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1264                            diagonal, provided in row-major form with an
1265                            @a num_comp * @a num_comp block at each node. The dimensions
1266                            of this vector are derived from the active vector
1267                            for the CeedOperator. The array has shape
1268                            [nodes, component out, component in].
1269   @param request         Address of CeedRequest for non-blocking completion, else
1270                            CEED_REQUEST_IMMEDIATE
1271 
1272   @return An error code: 0 - success, otherwise - failure
1273 
1274   @ref User
1275 **/
1276 int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op,
1277     CeedVector assembled, CeedRequest *request) {
1278   int ierr;
1279   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1280 
1281   // Use backend version, if available
1282   if (op->LinearAssemblePointBlockDiagonal) {
1283     ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request);
1284     CeedChk(ierr);
1285     return CEED_ERROR_SUCCESS;
1286   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1287     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1288     ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled,
1289            request); CeedChk(ierr);
1290     return CEED_ERROR_SUCCESS;
1291   } else {
1292     // Check for valid fallback resource
1293     const char *resource, *fallback_resource;
1294     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1295     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1296     CeedChk(ierr);
1297     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1298       // Fallback to reference Ceed
1299       if (!op->op_fallback) {
1300         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1301       }
1302       // Assemble
1303       ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback,
1304              assembled, request); CeedChk(ierr);
1305       return CEED_ERROR_SUCCESS;
1306     }
1307   }
1308 
1309   // Default interface implementation
1310   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1311   ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request);
1312   CeedChk(ierr);
1313   return CEED_ERROR_SUCCESS;
1314 }
1315 
1316 /**
1317   @brief Assemble the point block diagonal of a square linear CeedOperator
1318 
1319   This sums into a CeedVector with the point block diagonal of a linear
1320     CeedOperator.
1321 
1322   Note: Currently only non-composite CeedOperators with a single field and
1323           composite CeedOperators with single field sub-operators are supported.
1324 
1325   Note: Calling this function asserts that setup is complete
1326           and sets the CeedOperator as immutable.
1327 
1328   @param op              CeedOperator to assemble CeedQFunction
1329   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1330                            diagonal, provided in row-major form with an
1331                            @a num_comp * @a num_comp block at each node. The dimensions
1332                            of this vector are derived from the active vector
1333                            for the CeedOperator. The array has shape
1334                            [nodes, component out, component in].
1335   @param request         Address of CeedRequest for non-blocking completion, else
1336                            CEED_REQUEST_IMMEDIATE
1337 
1338   @return An error code: 0 - success, otherwise - failure
1339 
1340   @ref User
1341 **/
1342 int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op,
1343     CeedVector assembled, CeedRequest *request) {
1344   int ierr;
1345   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1346 
1347   // Use backend version, if available
1348   if (op->LinearAssembleAddPointBlockDiagonal) {
1349     ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request);
1350     CeedChk(ierr);
1351     return CEED_ERROR_SUCCESS;
1352   } else {
1353     // Check for valid fallback resource
1354     const char *resource, *fallback_resource;
1355     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1356     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1357     CeedChk(ierr);
1358     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1359       // Fallback to reference Ceed
1360       if (!op->op_fallback) {
1361         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1362       }
1363       // Assemble
1364       ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback,
1365              assembled, request); CeedChk(ierr);
1366       return CEED_ERROR_SUCCESS;
1367     }
1368   }
1369 
1370   // Default interface implemenation
1371   bool is_composite;
1372   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1373   if (is_composite) {
1374     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1375            true, assembled); CeedChk(ierr);
1376     return CEED_ERROR_SUCCESS;
1377   } else {
1378     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled);
1379     CeedChk(ierr);
1380     return CEED_ERROR_SUCCESS;
1381   }
1382 }
1383 
1384 /**
1385    @brief Fully assemble the nonzero pattern of a linear operator.
1386 
1387    Expected to be used in conjunction with CeedOperatorLinearAssemble()
1388 
1389    The assembly routines use coordinate format, with num_entries tuples of the
1390    form (i, j, value) which indicate that value should be added to the matrix
1391    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1392    This function returns the number of entries and their (i, j) locations,
1393    while CeedOperatorLinearAssemble() provides the values in the same
1394    ordering.
1395 
1396    This will generally be slow unless your operator is low-order.
1397 
1398   Note: Calling this function asserts that setup is complete
1399           and sets the CeedOperator as immutable.
1400 
1401    @param[in]  op           CeedOperator to assemble
1402    @param[out] num_entries  Number of entries in coordinate nonzero pattern
1403    @param[out] rows         Row number for each entry
1404    @param[out] cols         Column number for each entry
1405 
1406    @ref User
1407 **/
1408 int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedInt *num_entries,
1409                                        CeedInt **rows, CeedInt **cols) {
1410   int ierr;
1411   CeedInt num_suboperators, single_entries;
1412   CeedOperator *sub_operators;
1413   bool is_composite;
1414   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1415 
1416   // Use backend version, if available
1417   if (op->LinearAssembleSymbolic) {
1418     ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr);
1419     return CEED_ERROR_SUCCESS;
1420   } else {
1421     // Check for valid fallback resource
1422     const char *resource, *fallback_resource;
1423     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1424     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1425     CeedChk(ierr);
1426     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1427       // Fallback to reference Ceed
1428       if (!op->op_fallback) {
1429         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1430       }
1431       // Assemble
1432       ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows,
1433              cols); CeedChk(ierr);
1434       return CEED_ERROR_SUCCESS;
1435     }
1436   }
1437 
1438   // Default interface implementation
1439 
1440   // count entries and allocate rows, cols arrays
1441   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1442   *num_entries = 0;
1443   if (is_composite) {
1444     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1445     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1446     for (int k = 0; k < num_suboperators; ++k) {
1447       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1448              &single_entries); CeedChk(ierr);
1449       *num_entries += single_entries;
1450     }
1451   } else {
1452     ierr = CeedSingleOperatorAssemblyCountEntries(op,
1453            &single_entries); CeedChk(ierr);
1454     *num_entries += single_entries;
1455   }
1456   ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr);
1457   ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr);
1458 
1459   // assemble nonzero locations
1460   CeedInt offset = 0;
1461   if (is_composite) {
1462     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1463     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1464     for (int k = 0; k < num_suboperators; ++k) {
1465       ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows,
1466              *cols); CeedChk(ierr);
1467       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1468              &single_entries);
1469       CeedChk(ierr);
1470       offset += single_entries;
1471     }
1472   } else {
1473     ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols);
1474     CeedChk(ierr);
1475   }
1476 
1477   return CEED_ERROR_SUCCESS;
1478 }
1479 
1480 /**
1481    @brief Fully assemble the nonzero entries of a linear operator.
1482 
1483    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic()
1484 
1485    The assembly routines use coordinate format, with num_entries tuples of the
1486    form (i, j, value) which indicate that value should be added to the matrix
1487    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1488    This function returns the values of the nonzero entries to be added, their
1489    (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1490 
1491    This will generally be slow unless your operator is low-order.
1492 
1493   Note: Calling this function asserts that setup is complete
1494           and sets the CeedOperator as immutable.
1495 
1496    @param[in]  op      CeedOperator to assemble
1497    @param[out] values  Values to assemble into matrix
1498 
1499    @ref User
1500 **/
1501 int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1502   int ierr;
1503   CeedInt num_suboperators, single_entries = 0;
1504   CeedOperator *sub_operators;
1505   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1506 
1507   // Use backend version, if available
1508   if (op->LinearAssemble) {
1509     ierr = op->LinearAssemble(op, values); CeedChk(ierr);
1510     return CEED_ERROR_SUCCESS;
1511   } else {
1512     // Check for valid fallback resource
1513     const char *resource, *fallback_resource;
1514     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1515     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1516     CeedChk(ierr);
1517     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1518       // Fallback to reference Ceed
1519       if (!op->op_fallback) {
1520         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1521       }
1522       // Assemble
1523       ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr);
1524       return CEED_ERROR_SUCCESS;
1525     }
1526   }
1527 
1528   // Default interface implementation
1529   bool is_composite;
1530   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1531 
1532   CeedInt offset = 0;
1533   if (is_composite) {
1534     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1535     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1536     for (int k = 0; k < num_suboperators; ++k) {
1537       ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values);
1538       CeedChk(ierr);
1539       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1540              &single_entries);
1541       CeedChk(ierr);
1542       offset += single_entries;
1543     }
1544   } else {
1545     ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr);
1546   }
1547 
1548   return CEED_ERROR_SUCCESS;
1549 }
1550 
1551 /**
1552   @brief Create a multigrid coarse operator and level transfer operators
1553            for a CeedOperator, creating the prolongation basis from the
1554            fine and coarse grid interpolation
1555 
1556   Note: Calling this function asserts that setup is complete
1557           and sets the CeedOperator as immutable.
1558 
1559   @param[in] op_fine       Fine grid operator
1560   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
1561   @param[in] rstr_coarse   Coarse grid restriction
1562   @param[in] basis_coarse  Coarse grid active vector basis
1563   @param[out] op_coarse    Coarse grid operator
1564   @param[out] op_prolong   Coarse to fine operator
1565   @param[out] op_restrict  Fine to coarse operator
1566 
1567   @return An error code: 0 - success, otherwise - failure
1568 
1569   @ref User
1570 **/
1571 int CeedOperatorMultigridLevelCreate(CeedOperator op_fine,
1572                                      CeedVector p_mult_fine,
1573                                      CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1574                                      CeedOperator *op_coarse, CeedOperator *op_prolong,
1575                                      CeedOperator *op_restrict) {
1576   int ierr;
1577   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
1578   Ceed ceed;
1579   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1580 
1581   // Check for compatible quadrature spaces
1582   CeedBasis basis_fine;
1583   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1584   CeedInt Q_f, Q_c;
1585   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1586   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1587   if (Q_f != Q_c)
1588     // LCOV_EXCL_START
1589     return CeedError(ceed, CEED_ERROR_DIMENSION,
1590                      "Bases must have compatible quadrature spaces");
1591   // LCOV_EXCL_STOP
1592 
1593   // Coarse to fine basis
1594   CeedInt P_f, P_c, Q = Q_f;
1595   bool is_tensor_f, is_tensor_c;
1596   ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr);
1597   ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr);
1598   CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau;
1599   if (is_tensor_f && is_tensor_c) {
1600     ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr);
1601     ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr);
1602     ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr);
1603   } else if (!is_tensor_f && !is_tensor_c) {
1604     ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr);
1605     ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr);
1606   } else {
1607     // LCOV_EXCL_START
1608     return CeedError(ceed, CEED_ERROR_MINOR,
1609                      "Bases must both be tensor or non-tensor");
1610     // LCOV_EXCL_STOP
1611   }
1612 
1613   ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr);
1614   ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr);
1615   ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr);
1616   ierr = CeedMalloc(Q, &tau); CeedChk(ierr);
1617   if (is_tensor_f) {
1618     memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]);
1619     memcpy(interp_c, basis_coarse->interp_1d,
1620            Q*P_c*sizeof basis_coarse->interp_1d[0]);
1621   } else {
1622     memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]);
1623     memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]);
1624   }
1625 
1626   // -- QR Factorization, interp_f = Q R
1627   ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr);
1628 
1629   // -- Apply Qtranspose, interp_c = Qtranspose interp_c
1630   ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE,
1631                                Q, P_c, P_f, P_c, 1); CeedChk(ierr);
1632 
1633   // -- Apply Rinv, interp_c_to_f = Rinv interp_c
1634   for (CeedInt j=0; j<P_c; j++) { // Column j
1635     interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1];
1636     for (CeedInt i=P_f-2; i>=0; i--) { // Row i
1637       interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i];
1638       for (CeedInt k=i+1; k<P_f; k++)
1639         interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k];
1640       interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i];
1641     }
1642   }
1643   ierr = CeedFree(&tau); CeedChk(ierr);
1644   ierr = CeedFree(&interp_c); CeedChk(ierr);
1645   ierr = CeedFree(&interp_f); CeedChk(ierr);
1646 
1647   // Complete with interp_c_to_f versions of code
1648   if (is_tensor_f) {
1649     ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine,
1650            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
1651     CeedChk(ierr);
1652   } else {
1653     ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine,
1654            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
1655     CeedChk(ierr);
1656   }
1657 
1658   // Cleanup
1659   ierr = CeedFree(&interp_c_to_f); CeedChk(ierr);
1660   return CEED_ERROR_SUCCESS;
1661 }
1662 
1663 /**
1664   @brief Create a multigrid coarse operator and level transfer operators
1665            for a CeedOperator with a tensor basis for the active basis
1666 
1667   Note: Calling this function asserts that setup is complete
1668           and sets the CeedOperator as immutable.
1669 
1670   @param[in] op_fine        Fine grid operator
1671   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1672   @param[in] rstr_coarse    Coarse grid restriction
1673   @param[in] basis_coarse   Coarse grid active vector basis
1674   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1675   @param[out] op_coarse     Coarse grid operator
1676   @param[out] op_prolong    Coarse to fine operator
1677   @param[out] op_restrict   Fine to coarse operator
1678 
1679   @return An error code: 0 - success, otherwise - failure
1680 
1681   @ref User
1682 **/
1683 int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine,
1684     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1685     const CeedScalar *interp_c_to_f, CeedOperator *op_coarse,
1686     CeedOperator *op_prolong, CeedOperator *op_restrict) {
1687   int ierr;
1688   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
1689   Ceed ceed;
1690   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1691 
1692   // Check for compatible quadrature spaces
1693   CeedBasis basis_fine;
1694   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1695   CeedInt Q_f, Q_c;
1696   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1697   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1698   if (Q_f != Q_c)
1699     // LCOV_EXCL_START
1700     return CeedError(ceed, CEED_ERROR_DIMENSION,
1701                      "Bases must have compatible quadrature spaces");
1702   // LCOV_EXCL_STOP
1703 
1704   // Coarse to fine basis
1705   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
1706   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
1707   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
1708   ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr);
1709   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
1710   CeedChk(ierr);
1711   P_1d_c = dim == 1 ? num_nodes_c :
1712            dim == 2 ? sqrt(num_nodes_c) :
1713            cbrt(num_nodes_c);
1714   CeedScalar *q_ref, *q_weight, *grad;
1715   ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr);
1716   ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr);
1717   ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr);
1718   CeedBasis basis_c_to_f;
1719   ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f,
1720                                  interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
1721   CeedChk(ierr);
1722   ierr = CeedFree(&q_ref); CeedChk(ierr);
1723   ierr = CeedFree(&q_weight); CeedChk(ierr);
1724   ierr = CeedFree(&grad); CeedChk(ierr);
1725 
1726   // Core code
1727   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
1728                                           basis_coarse, basis_c_to_f, op_coarse,
1729                                           op_prolong, op_restrict);
1730   CeedChk(ierr);
1731   return CEED_ERROR_SUCCESS;
1732 }
1733 
1734 /**
1735   @brief Create a multigrid coarse operator and level transfer operators
1736            for a CeedOperator with a non-tensor basis for the active vector
1737 
1738   Note: Calling this function asserts that setup is complete
1739           and sets the CeedOperator as immutable.
1740 
1741   @param[in] op_fine        Fine grid operator
1742   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1743   @param[in] rstr_coarse    Coarse grid restriction
1744   @param[in] basis_coarse   Coarse grid active vector basis
1745   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1746   @param[out] op_coarse     Coarse grid operator
1747   @param[out] op_prolong    Coarse to fine operator
1748   @param[out] op_restrict   Fine to coarse operator
1749 
1750   @return An error code: 0 - success, otherwise - failure
1751 
1752   @ref User
1753 **/
1754 int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine,
1755                                        CeedVector p_mult_fine,
1756                                        CeedElemRestriction rstr_coarse,
1757                                        CeedBasis basis_coarse,
1758                                        const CeedScalar *interp_c_to_f,
1759                                        CeedOperator *op_coarse,
1760                                        CeedOperator *op_prolong,
1761                                        CeedOperator *op_restrict) {
1762   int ierr;
1763   ierr = CeedOperatorCheckReady(op_fine); CeedChk(ierr);
1764   Ceed ceed;
1765   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1766 
1767   // Check for compatible quadrature spaces
1768   CeedBasis basis_fine;
1769   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1770   CeedInt Q_f, Q_c;
1771   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1772   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1773   if (Q_f != Q_c)
1774     // LCOV_EXCL_START
1775     return CeedError(ceed, CEED_ERROR_DIMENSION,
1776                      "Bases must have compatible quadrature spaces");
1777   // LCOV_EXCL_STOP
1778 
1779   // Coarse to fine basis
1780   CeedElemTopology topo;
1781   ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr);
1782   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
1783   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
1784   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
1785   ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr);
1786   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
1787   CeedChk(ierr);
1788   CeedScalar *q_ref, *q_weight, *grad;
1789   ierr = CeedCalloc(num_nodes_f, &q_ref); CeedChk(ierr);
1790   ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr);
1791   ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr);
1792   CeedBasis basis_c_to_f;
1793   ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f,
1794                            interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
1795   CeedChk(ierr);
1796   ierr = CeedFree(&q_ref); CeedChk(ierr);
1797   ierr = CeedFree(&q_weight); CeedChk(ierr);
1798   ierr = CeedFree(&grad); CeedChk(ierr);
1799 
1800   // Core code
1801   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
1802                                           basis_coarse, basis_c_to_f, op_coarse,
1803                                           op_prolong, op_restrict);
1804   CeedChk(ierr);
1805   return CEED_ERROR_SUCCESS;
1806 }
1807 
1808 /**
1809   @brief Build a FDM based approximate inverse for each element for a
1810            CeedOperator
1811 
1812   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization
1813     Method based approximate inverse. This function obtains the simultaneous
1814     diagonalization for the 1D mass and Laplacian operators,
1815       M = V^T V, K = V^T S V.
1816     The assembled QFunction is used to modify the eigenvalues from simultaneous
1817     diagonalization and obtain an approximate inverse of the form
1818       V^T S^hat V. The CeedOperator must be linear and non-composite. The
1819     associated CeedQFunction must therefore also be linear.
1820 
1821   Note: Calling this function asserts that setup is complete
1822           and sets the CeedOperator as immutable.
1823 
1824   @param op            CeedOperator to create element inverses
1825   @param[out] fdm_inv  CeedOperator to apply the action of a FDM based inverse
1826                          for each element
1827   @param request       Address of CeedRequest for non-blocking completion, else
1828                          @ref CEED_REQUEST_IMMEDIATE
1829 
1830   @return An error code: 0 - success, otherwise - failure
1831 
1832   @ref Backend
1833 **/
1834 int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv,
1835                                         CeedRequest *request) {
1836   int ierr;
1837   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1838 
1839   // Use backend version, if available
1840   if (op->CreateFDMElementInverse) {
1841     ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr);
1842     return CEED_ERROR_SUCCESS;
1843   } else {
1844     // Check for valid fallback resource
1845     const char *resource, *fallback_resource;
1846     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1847     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1848     CeedChk(ierr);
1849     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1850       // Fallback to reference Ceed
1851       if (!op->op_fallback) {
1852         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1853       }
1854       // Assemble
1855       ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request);
1856       CeedChk(ierr);
1857       return CEED_ERROR_SUCCESS;
1858     }
1859   }
1860 
1861   // Interface implementation
1862   Ceed ceed, ceed_parent;
1863   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
1864   ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr);
1865   ceed_parent = ceed_parent ? ceed_parent : ceed;
1866   CeedQFunction qf;
1867   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
1868 
1869   // Determine active input basis
1870   bool interp = false, grad = false;
1871   CeedBasis basis = NULL;
1872   CeedElemRestriction rstr = NULL;
1873   CeedOperatorField *op_fields;
1874   CeedQFunctionField *qf_fields;
1875   CeedInt num_input_fields;
1876   ierr = CeedOperatorGetFields(op, &num_input_fields, &op_fields, NULL, NULL);
1877   CeedChk(ierr);
1878   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields, NULL, NULL); CeedChk(ierr);
1879   for (CeedInt i=0; i<num_input_fields; i++) {
1880     CeedVector vec;
1881     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
1882     if (vec == CEED_VECTOR_ACTIVE) {
1883       CeedEvalMode eval_mode;
1884       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
1885       interp = interp || eval_mode == CEED_EVAL_INTERP;
1886       grad = grad || eval_mode == CEED_EVAL_GRAD;
1887       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr);
1888       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
1889     }
1890   }
1891   if (!basis)
1892     // LCOV_EXCL_START
1893     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
1894   // LCOV_EXCL_STOP
1895   CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1,
1896                                                 l_size = 1;
1897   ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr);
1898   ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr);
1899   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr);
1900   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
1901   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
1902   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
1903   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
1904   ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr);
1905 
1906   // Build and diagonalize 1D Mass and Laplacian
1907   bool tensor_basis;
1908   ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr);
1909   if (!tensor_basis)
1910     // LCOV_EXCL_START
1911     return CeedError(ceed, CEED_ERROR_BACKEND,
1912                      "FDMElementInverse only supported for tensor "
1913                      "bases");
1914   // LCOV_EXCL_STOP
1915   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
1916   ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr);
1917   ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr);
1918   ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr);
1919   ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr);
1920   ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr);
1921   // -- Build matrices
1922   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
1923   ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr);
1924   ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr);
1925   ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr);
1926   ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim,
1927                               mass, laplace); CeedChk(ierr);
1928 
1929   // -- Diagonalize
1930   ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d);
1931   CeedChk(ierr);
1932   ierr = CeedFree(&mass); CeedChk(ierr);
1933   ierr = CeedFree(&laplace); CeedChk(ierr);
1934   for (CeedInt i=0; i<P_1d; i++)
1935     for (CeedInt j=0; j<P_1d; j++)
1936       fdm_interp[i+j*P_1d] = x[j+i*P_1d];
1937   ierr = CeedFree(&x); CeedChk(ierr);
1938 
1939   // Assemble QFunction
1940   CeedVector assembled;
1941   CeedElemRestriction rstr_qf;
1942   ierr =  CeedOperatorLinearAssembleQFunctionBuildOrUpdate(op, &assembled,
1943           &rstr_qf, request); CeedChk(ierr);
1944   CeedInt layout[3];
1945   ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr);
1946   ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr);
1947   CeedScalar max_norm = 0;
1948   ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
1949 
1950   // Calculate element averages
1951   CeedInt num_modes = (interp?1:0) + (grad?dim:0);
1952   CeedScalar *elem_avg;
1953   const CeedScalar *assembled_array, *q_weight_array;
1954   CeedVector q_weight;
1955   ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr);
1956   ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
1957                         CEED_VECTOR_NONE, q_weight); CeedChk(ierr);
1958   ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array);
1959   CeedChk(ierr);
1960   ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array);
1961   CeedChk(ierr);
1962   ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr);
1963   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
1964   for (CeedInt e=0; e<num_elem; e++) {
1965     CeedInt count = 0;
1966     for (CeedInt q=0; q<num_qpts; q++)
1967       for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++)
1968         if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) >
1969             qf_value_bound) {
1970           elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] /
1971                          q_weight_array[q];
1972           count++;
1973         }
1974     if (count) {
1975       elem_avg[e] /= count;
1976     } else {
1977       elem_avg[e] = 1.0;
1978     }
1979   }
1980   ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr);
1981   ierr = CeedVectorDestroy(&assembled); CeedChk(ierr);
1982   ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr);
1983   ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr);
1984 
1985   // Build FDM diagonal
1986   CeedVector q_data;
1987   CeedScalar *q_data_array, *fdm_diagonal;
1988   ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr);
1989   const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON;
1990   for (CeedInt c=0; c<num_comp; c++)
1991     for (CeedInt n=0; n<elem_size; n++) {
1992       if (interp)
1993         fdm_diagonal[c*elem_size + n] = 1.0;
1994       if (grad)
1995         for (CeedInt d=0; d<dim; d++) {
1996           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
1997           fdm_diagonal[c*elem_size + n] += lambda[i];
1998         }
1999       if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound)
2000         fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound;
2001     }
2002   ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data);
2003   CeedChk(ierr);
2004   ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr);
2005   ierr = CeedVectorGetArrayWrite(q_data, CEED_MEM_HOST, &q_data_array);
2006   CeedChk(ierr);
2007   for (CeedInt e=0; e<num_elem; e++)
2008     for (CeedInt c=0; c<num_comp; c++)
2009       for (CeedInt n=0; n<elem_size; n++)
2010         q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] *
2011             fdm_diagonal[c*elem_size + n]);
2012   ierr = CeedFree(&elem_avg); CeedChk(ierr);
2013   ierr = CeedFree(&fdm_diagonal); CeedChk(ierr);
2014   ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr);
2015 
2016   // Setup FDM operator
2017   // -- Basis
2018   CeedBasis fdm_basis;
2019   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
2020   ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr);
2021   ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr);
2022   ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr);
2023   ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d,
2024                                  fdm_interp, grad_dummy, q_ref_dummy,
2025                                  q_weight_dummy, &fdm_basis); CeedChk(ierr);
2026   ierr = CeedFree(&fdm_interp); CeedChk(ierr);
2027   ierr = CeedFree(&grad_dummy); CeedChk(ierr);
2028   ierr = CeedFree(&q_ref_dummy); CeedChk(ierr);
2029   ierr = CeedFree(&q_weight_dummy); CeedChk(ierr);
2030   ierr = CeedFree(&lambda); CeedChk(ierr);
2031 
2032   // -- Restriction
2033   CeedElemRestriction rstr_qd_i;
2034   CeedInt strides[3] = {1, elem_size, elem_size*num_comp};
2035   ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size,
2036                                           num_comp, num_elem*num_comp*elem_size,
2037                                           strides, &rstr_qd_i); CeedChk(ierr);
2038   // -- QFunction
2039   CeedQFunction qf_fdm;
2040   ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm);
2041   CeedChk(ierr);
2042   ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP);
2043   CeedChk(ierr);
2044   ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE);
2045   CeedChk(ierr);
2046   ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP);
2047   CeedChk(ierr);
2048   // -- QFunction context
2049   CeedInt *num_comp_data;
2050   ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr);
2051   num_comp_data[0] = num_comp;
2052   CeedQFunctionContext ctx_fdm;
2053   ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr);
2054   ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER,
2055                                      sizeof(*num_comp_data), num_comp_data);
2056   CeedChk(ierr);
2057   ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr);
2058   ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr);
2059   // -- Operator
2060   ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv);
2061   CeedChk(ierr);
2062   CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2063   CeedChk(ierr);
2064   CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED,
2065                        q_data); CeedChk(ierr);
2066   CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
2067   CeedChk(ierr);
2068 
2069   // Cleanup
2070   ierr = CeedVectorDestroy(&q_data); CeedChk(ierr);
2071   ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr);
2072   ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr);
2073   ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr);
2074 
2075   return CEED_ERROR_SUCCESS;
2076 }
2077 
2078 /// @}
2079