xref: /libCEED/rust/libceed-sys/c-src/interface/ceed-preconditioning.c (revision eaf62fff1deb4c7d4a34a324240d6e3aac43fc8b)
1*eaf62fffSJeremy L Thompson // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2*eaf62fffSJeremy L Thompson // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3*eaf62fffSJeremy L Thompson // reserved. See files LICENSE and NOTICE for details.
4*eaf62fffSJeremy L Thompson //
5*eaf62fffSJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software
6*eaf62fffSJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral
7*eaf62fffSJeremy L Thompson // element discretizations for exascale applications. For more information and
8*eaf62fffSJeremy L Thompson // source code availability see http://github.com/ceed.
9*eaf62fffSJeremy L Thompson //
10*eaf62fffSJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11*eaf62fffSJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office
12*eaf62fffSJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for
13*eaf62fffSJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including
14*eaf62fffSJeremy L Thompson // software, applications, hardware, advanced system engineering and early
15*eaf62fffSJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative.
16*eaf62fffSJeremy L Thompson 
17*eaf62fffSJeremy L Thompson #include <ceed/ceed.h>
18*eaf62fffSJeremy L Thompson #include <ceed/backend.h>
19*eaf62fffSJeremy L Thompson #include <ceed-impl.h>
20*eaf62fffSJeremy L Thompson #include <math.h>
21*eaf62fffSJeremy L Thompson #include <stdbool.h>
22*eaf62fffSJeremy L Thompson #include <stdio.h>
23*eaf62fffSJeremy L Thompson #include <string.h>
24*eaf62fffSJeremy L Thompson 
25*eaf62fffSJeremy L Thompson /// @file
26*eaf62fffSJeremy L Thompson /// Implementation of CeedOperator preconditioning interfaces
27*eaf62fffSJeremy L Thompson 
28*eaf62fffSJeremy L Thompson /// ----------------------------------------------------------------------------
29*eaf62fffSJeremy L Thompson /// CeedOperator Library Internal Preconditioning Functions
30*eaf62fffSJeremy L Thompson /// ----------------------------------------------------------------------------
31*eaf62fffSJeremy L Thompson /// @addtogroup CeedOperatorDeveloper
32*eaf62fffSJeremy L Thompson /// @{
33*eaf62fffSJeremy L Thompson 
34*eaf62fffSJeremy L Thompson /**
35*eaf62fffSJeremy L Thompson   @brief Duplicate a CeedOperator with a reference Ceed to fallback for advanced
36*eaf62fffSJeremy L Thompson          CeedOperator functionality
37*eaf62fffSJeremy L Thompson 
38*eaf62fffSJeremy L Thompson   @param op  CeedOperator to create fallback for
39*eaf62fffSJeremy L Thompson 
40*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
41*eaf62fffSJeremy L Thompson 
42*eaf62fffSJeremy L Thompson   @ref Developer
43*eaf62fffSJeremy L Thompson **/
44*eaf62fffSJeremy L Thompson int CeedOperatorCreateFallback(CeedOperator op) {
45*eaf62fffSJeremy L Thompson   int ierr;
46*eaf62fffSJeremy L Thompson 
47*eaf62fffSJeremy L Thompson   // Fallback Ceed
48*eaf62fffSJeremy L Thompson   const char *resource, *fallback_resource;
49*eaf62fffSJeremy L Thompson   ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
50*eaf62fffSJeremy L Thompson   ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
51*eaf62fffSJeremy L Thompson   CeedChk(ierr);
52*eaf62fffSJeremy L Thompson   if (!strcmp(resource, fallback_resource))
53*eaf62fffSJeremy L Thompson     // LCOV_EXCL_START
54*eaf62fffSJeremy L Thompson     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
55*eaf62fffSJeremy L Thompson                      "Backend %s cannot create an operator"
56*eaf62fffSJeremy L Thompson                      "fallback to resource %s", resource, fallback_resource);
57*eaf62fffSJeremy L Thompson   // LCOV_EXCL_STOP
58*eaf62fffSJeremy L Thompson 
59*eaf62fffSJeremy L Thompson   // Fallback Ceed
60*eaf62fffSJeremy L Thompson   Ceed ceed_ref;
61*eaf62fffSJeremy L Thompson   if (!op->ceed->op_fallback_ceed) {
62*eaf62fffSJeremy L Thompson     ierr = CeedInit(fallback_resource, &ceed_ref); CeedChk(ierr);
63*eaf62fffSJeremy L Thompson     ceed_ref->op_fallback_parent = op->ceed;
64*eaf62fffSJeremy L Thompson     ceed_ref->Error = op->ceed->Error;
65*eaf62fffSJeremy L Thompson     op->ceed->op_fallback_ceed = ceed_ref;
66*eaf62fffSJeremy L Thompson   }
67*eaf62fffSJeremy L Thompson   ceed_ref = op->ceed->op_fallback_ceed;
68*eaf62fffSJeremy L Thompson 
69*eaf62fffSJeremy L Thompson   // Clone Op
70*eaf62fffSJeremy L Thompson   CeedOperator op_ref;
71*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(1, &op_ref); CeedChk(ierr);
72*eaf62fffSJeremy L Thompson   memcpy(op_ref, op, sizeof(*op_ref));
73*eaf62fffSJeremy L Thompson   op_ref->data = NULL;
74*eaf62fffSJeremy L Thompson   op_ref->interface_setup = false;
75*eaf62fffSJeremy L Thompson   op_ref->backend_setup = false;
76*eaf62fffSJeremy L Thompson   op_ref->ceed = ceed_ref;
77*eaf62fffSJeremy L Thompson   ierr = ceed_ref->OperatorCreate(op_ref); CeedChk(ierr);
78*eaf62fffSJeremy L Thompson   op->op_fallback = op_ref;
79*eaf62fffSJeremy L Thompson 
80*eaf62fffSJeremy L Thompson   // Clone QF
81*eaf62fffSJeremy L Thompson   CeedQFunction qf_ref;
82*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(1, &qf_ref); CeedChk(ierr);
83*eaf62fffSJeremy L Thompson   memcpy(qf_ref, (op->qf), sizeof(*qf_ref));
84*eaf62fffSJeremy L Thompson   qf_ref->data = NULL;
85*eaf62fffSJeremy L Thompson   qf_ref->ceed = ceed_ref;
86*eaf62fffSJeremy L Thompson   ierr = ceed_ref->QFunctionCreate(qf_ref); CeedChk(ierr);
87*eaf62fffSJeremy L Thompson   op_ref->qf = qf_ref;
88*eaf62fffSJeremy L Thompson   op->qf_fallback = qf_ref;
89*eaf62fffSJeremy L Thompson   return CEED_ERROR_SUCCESS;
90*eaf62fffSJeremy L Thompson }
91*eaf62fffSJeremy L Thompson 
92*eaf62fffSJeremy L Thompson /**
93*eaf62fffSJeremy L Thompson   @brief Select correct basis matrix pointer based on CeedEvalMode
94*eaf62fffSJeremy L Thompson 
95*eaf62fffSJeremy L Thompson   @param[in] eval_mode   Current basis evaluation mode
96*eaf62fffSJeremy L Thompson   @param[in] identity    Pointer to identity matrix
97*eaf62fffSJeremy L Thompson   @param[in] interp      Pointer to interpolation matrix
98*eaf62fffSJeremy L Thompson   @param[in] grad        Pointer to gradient matrix
99*eaf62fffSJeremy L Thompson   @param[out] basis_ptr  Basis pointer to set
100*eaf62fffSJeremy L Thompson 
101*eaf62fffSJeremy L Thompson   @return none
102*eaf62fffSJeremy L Thompson 
103*eaf62fffSJeremy L Thompson   @ref Developer
104*eaf62fffSJeremy L Thompson **/
105*eaf62fffSJeremy L Thompson static inline void CeedOperatorGetBasisPointer(CeedEvalMode eval_mode,
106*eaf62fffSJeremy L Thompson     const CeedScalar *identity, const CeedScalar *interp,
107*eaf62fffSJeremy L Thompson     const CeedScalar *grad, const CeedScalar **basis_ptr) {
108*eaf62fffSJeremy L Thompson   switch (eval_mode) {
109*eaf62fffSJeremy L Thompson   case CEED_EVAL_NONE:
110*eaf62fffSJeremy L Thompson     *basis_ptr = identity;
111*eaf62fffSJeremy L Thompson     break;
112*eaf62fffSJeremy L Thompson   case CEED_EVAL_INTERP:
113*eaf62fffSJeremy L Thompson     *basis_ptr = interp;
114*eaf62fffSJeremy L Thompson     break;
115*eaf62fffSJeremy L Thompson   case CEED_EVAL_GRAD:
116*eaf62fffSJeremy L Thompson     *basis_ptr = grad;
117*eaf62fffSJeremy L Thompson     break;
118*eaf62fffSJeremy L Thompson   case CEED_EVAL_WEIGHT:
119*eaf62fffSJeremy L Thompson   case CEED_EVAL_DIV:
120*eaf62fffSJeremy L Thompson   case CEED_EVAL_CURL:
121*eaf62fffSJeremy L Thompson     break; // Caught by QF Assembly
122*eaf62fffSJeremy L Thompson   }
123*eaf62fffSJeremy L Thompson }
124*eaf62fffSJeremy L Thompson 
125*eaf62fffSJeremy L Thompson /**
126*eaf62fffSJeremy L Thompson   @brief Create point block restriction for active operator field
127*eaf62fffSJeremy L Thompson 
128*eaf62fffSJeremy L Thompson   @param[in] rstr              Original CeedElemRestriction for active field
129*eaf62fffSJeremy L Thompson   @param[out] pointblock_rstr  Address of the variable where the newly created
130*eaf62fffSJeremy L Thompson                                  CeedElemRestriction will be stored
131*eaf62fffSJeremy L Thompson 
132*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
133*eaf62fffSJeremy L Thompson 
134*eaf62fffSJeremy L Thompson   @ref Developer
135*eaf62fffSJeremy L Thompson **/
136*eaf62fffSJeremy L Thompson static int CeedOperatorCreateActivePointBlockRestriction(
137*eaf62fffSJeremy L Thompson   CeedElemRestriction rstr,
138*eaf62fffSJeremy L Thompson   CeedElemRestriction *pointblock_rstr) {
139*eaf62fffSJeremy L Thompson   int ierr;
140*eaf62fffSJeremy L Thompson   Ceed ceed;
141*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr);
142*eaf62fffSJeremy L Thompson   const CeedInt *offsets;
143*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetOffsets(rstr, CEED_MEM_HOST, &offsets);
144*eaf62fffSJeremy L Thompson   CeedChk(ierr);
145*eaf62fffSJeremy L Thompson 
146*eaf62fffSJeremy L Thompson   // Expand offsets
147*eaf62fffSJeremy L Thompson   CeedInt num_elem, num_comp, elem_size, comp_stride, max = 1,
148*eaf62fffSJeremy L Thompson                                                       *pointblock_offsets;
149*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
150*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
151*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
152*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetCompStride(rstr, &comp_stride); CeedChk(ierr);
153*eaf62fffSJeremy L Thompson   CeedInt shift = num_comp;
154*eaf62fffSJeremy L Thompson   if (comp_stride != 1)
155*eaf62fffSJeremy L Thompson     shift *= num_comp;
156*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(num_elem*elem_size, &pointblock_offsets);
157*eaf62fffSJeremy L Thompson   CeedChk(ierr);
158*eaf62fffSJeremy L Thompson   for (CeedInt i = 0; i < num_elem*elem_size; i++) {
159*eaf62fffSJeremy L Thompson     pointblock_offsets[i] = offsets[i]*shift;
160*eaf62fffSJeremy L Thompson     if (pointblock_offsets[i] > max)
161*eaf62fffSJeremy L Thompson       max = pointblock_offsets[i];
162*eaf62fffSJeremy L Thompson   }
163*eaf62fffSJeremy L Thompson 
164*eaf62fffSJeremy L Thompson   // Create new restriction
165*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionCreate(ceed, num_elem, elem_size, num_comp*num_comp,
166*eaf62fffSJeremy L Thompson                                    1, max + num_comp*num_comp, CEED_MEM_HOST,
167*eaf62fffSJeremy L Thompson                                    CEED_OWN_POINTER, pointblock_offsets, pointblock_rstr);
168*eaf62fffSJeremy L Thompson   CeedChk(ierr);
169*eaf62fffSJeremy L Thompson 
170*eaf62fffSJeremy L Thompson   // Cleanup
171*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionRestoreOffsets(rstr, &offsets); CeedChk(ierr);
172*eaf62fffSJeremy L Thompson 
173*eaf62fffSJeremy L Thompson   return CEED_ERROR_SUCCESS;
174*eaf62fffSJeremy L Thompson }
175*eaf62fffSJeremy L Thompson 
176*eaf62fffSJeremy L Thompson /**
177*eaf62fffSJeremy L Thompson   @brief Core logic for assembling operator diagonal or point block diagonal
178*eaf62fffSJeremy L Thompson 
179*eaf62fffSJeremy L Thompson   @param[in] op             CeedOperator to assemble point block diagonal
180*eaf62fffSJeremy L Thompson   @param[in] request        Address of CeedRequest for non-blocking completion, else
181*eaf62fffSJeremy L Thompson                               CEED_REQUEST_IMMEDIATE
182*eaf62fffSJeremy L Thompson   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
183*eaf62fffSJeremy L Thompson   @param[out] assembled     CeedVector to store assembled diagonal
184*eaf62fffSJeremy L Thompson 
185*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
186*eaf62fffSJeremy L Thompson 
187*eaf62fffSJeremy L Thompson   @ref Developer
188*eaf62fffSJeremy L Thompson **/
189*eaf62fffSJeremy L Thompson static inline int CeedSingleOperatorAssembleAddDiagonal(CeedOperator op,
190*eaf62fffSJeremy L Thompson     CeedRequest *request, const bool is_pointblock, CeedVector assembled) {
191*eaf62fffSJeremy L Thompson   int ierr;
192*eaf62fffSJeremy L Thompson   Ceed ceed;
193*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
194*eaf62fffSJeremy L Thompson 
195*eaf62fffSJeremy L Thompson   // Assemble QFunction
196*eaf62fffSJeremy L Thompson   CeedQFunction qf;
197*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
198*eaf62fffSJeremy L Thompson   CeedInt num_input_fields, num_output_fields;
199*eaf62fffSJeremy L Thompson   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
200*eaf62fffSJeremy L Thompson   CeedChk(ierr);
201*eaf62fffSJeremy L Thompson   CeedVector assembled_qf;
202*eaf62fffSJeremy L Thompson   CeedElemRestriction rstr;
203*eaf62fffSJeremy L Thompson   ierr = CeedOperatorLinearAssembleQFunction(op,  &assembled_qf, &rstr, request);
204*eaf62fffSJeremy L Thompson   CeedChk(ierr);
205*eaf62fffSJeremy L Thompson   CeedInt layout[3];
206*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetELayout(rstr, &layout); CeedChk(ierr);
207*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionDestroy(&rstr); CeedChk(ierr);
208*eaf62fffSJeremy L Thompson   CeedScalar max_norm = 0;
209*eaf62fffSJeremy L Thompson   ierr = CeedVectorNorm(assembled_qf, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
210*eaf62fffSJeremy L Thompson 
211*eaf62fffSJeremy L Thompson   // Determine active input basis
212*eaf62fffSJeremy L Thompson   CeedOperatorField *op_fields;
213*eaf62fffSJeremy L Thompson   CeedQFunctionField *qf_fields;
214*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetFields(op, &op_fields, NULL); CeedChk(ierr);
215*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL); CeedChk(ierr);
216*eaf62fffSJeremy L Thompson   CeedInt num_eval_mode_in = 0, num_comp, dim = 1;
217*eaf62fffSJeremy L Thompson   CeedEvalMode *eval_mode_in = NULL;
218*eaf62fffSJeremy L Thompson   CeedBasis basis_in = NULL;
219*eaf62fffSJeremy L Thompson   CeedElemRestriction rstr_in = NULL;
220*eaf62fffSJeremy L Thompson   for (CeedInt i=0; i<num_input_fields; i++) {
221*eaf62fffSJeremy L Thompson     CeedVector vec;
222*eaf62fffSJeremy L Thompson     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
223*eaf62fffSJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
224*eaf62fffSJeremy L Thompson       CeedElemRestriction rstr;
225*eaf62fffSJeremy L Thompson       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_in); CeedChk(ierr);
226*eaf62fffSJeremy L Thompson       ierr = CeedBasisGetNumComponents(basis_in, &num_comp); CeedChk(ierr);
227*eaf62fffSJeremy L Thompson       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
228*eaf62fffSJeremy L Thompson       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
229*eaf62fffSJeremy L Thompson       if (rstr_in && rstr_in != rstr)
230*eaf62fffSJeremy L Thompson         // LCOV_EXCL_START
231*eaf62fffSJeremy L Thompson         return CeedError(ceed, CEED_ERROR_BACKEND,
232*eaf62fffSJeremy L Thompson                          "Multi-field non-composite operator diagonal assembly not supported");
233*eaf62fffSJeremy L Thompson       // LCOV_EXCL_STOP
234*eaf62fffSJeremy L Thompson       rstr_in = rstr;
235*eaf62fffSJeremy L Thompson       CeedEvalMode eval_mode;
236*eaf62fffSJeremy L Thompson       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
237*eaf62fffSJeremy L Thompson       switch (eval_mode) {
238*eaf62fffSJeremy L Thompson       case CEED_EVAL_NONE:
239*eaf62fffSJeremy L Thompson       case CEED_EVAL_INTERP:
240*eaf62fffSJeremy L Thompson         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
241*eaf62fffSJeremy L Thompson         eval_mode_in[num_eval_mode_in] = eval_mode;
242*eaf62fffSJeremy L Thompson         num_eval_mode_in += 1;
243*eaf62fffSJeremy L Thompson         break;
244*eaf62fffSJeremy L Thompson       case CEED_EVAL_GRAD:
245*eaf62fffSJeremy L Thompson         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
246*eaf62fffSJeremy L Thompson         for (CeedInt d=0; d<dim; d++)
247*eaf62fffSJeremy L Thompson           eval_mode_in[num_eval_mode_in+d] = eval_mode;
248*eaf62fffSJeremy L Thompson         num_eval_mode_in += dim;
249*eaf62fffSJeremy L Thompson         break;
250*eaf62fffSJeremy L Thompson       case CEED_EVAL_WEIGHT:
251*eaf62fffSJeremy L Thompson       case CEED_EVAL_DIV:
252*eaf62fffSJeremy L Thompson       case CEED_EVAL_CURL:
253*eaf62fffSJeremy L Thompson         break; // Caught by QF Assembly
254*eaf62fffSJeremy L Thompson       }
255*eaf62fffSJeremy L Thompson     }
256*eaf62fffSJeremy L Thompson   }
257*eaf62fffSJeremy L Thompson 
258*eaf62fffSJeremy L Thompson   // Determine active output basis
259*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetFields(op, NULL, &op_fields); CeedChk(ierr);
260*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields); CeedChk(ierr);
261*eaf62fffSJeremy L Thompson   CeedInt num_eval_mode_out = 0;
262*eaf62fffSJeremy L Thompson   CeedEvalMode *eval_mode_out = NULL;
263*eaf62fffSJeremy L Thompson   CeedBasis basis_out = NULL;
264*eaf62fffSJeremy L Thompson   CeedElemRestriction rstr_out = NULL;
265*eaf62fffSJeremy L Thompson   for (CeedInt i=0; i<num_output_fields; i++) {
266*eaf62fffSJeremy L Thompson     CeedVector vec;
267*eaf62fffSJeremy L Thompson     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
268*eaf62fffSJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
269*eaf62fffSJeremy L Thompson       CeedElemRestriction rstr;
270*eaf62fffSJeremy L Thompson       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis_out); CeedChk(ierr);
271*eaf62fffSJeremy L Thompson       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
272*eaf62fffSJeremy L Thompson       if (rstr_out && rstr_out != rstr)
273*eaf62fffSJeremy L Thompson         // LCOV_EXCL_START
274*eaf62fffSJeremy L Thompson         return CeedError(ceed, CEED_ERROR_BACKEND,
275*eaf62fffSJeremy L Thompson                          "Multi-field non-composite operator diagonal assembly not supported");
276*eaf62fffSJeremy L Thompson       // LCOV_EXCL_STOP
277*eaf62fffSJeremy L Thompson       rstr_out = rstr;
278*eaf62fffSJeremy L Thompson       CeedEvalMode eval_mode;
279*eaf62fffSJeremy L Thompson       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
280*eaf62fffSJeremy L Thompson       switch (eval_mode) {
281*eaf62fffSJeremy L Thompson       case CEED_EVAL_NONE:
282*eaf62fffSJeremy L Thompson       case CEED_EVAL_INTERP:
283*eaf62fffSJeremy L Thompson         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
284*eaf62fffSJeremy L Thompson         eval_mode_out[num_eval_mode_out] = eval_mode;
285*eaf62fffSJeremy L Thompson         num_eval_mode_out += 1;
286*eaf62fffSJeremy L Thompson         break;
287*eaf62fffSJeremy L Thompson       case CEED_EVAL_GRAD:
288*eaf62fffSJeremy L Thompson         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
289*eaf62fffSJeremy L Thompson         for (CeedInt d=0; d<dim; d++)
290*eaf62fffSJeremy L Thompson           eval_mode_out[num_eval_mode_out+d] = eval_mode;
291*eaf62fffSJeremy L Thompson         num_eval_mode_out += dim;
292*eaf62fffSJeremy L Thompson         break;
293*eaf62fffSJeremy L Thompson       case CEED_EVAL_WEIGHT:
294*eaf62fffSJeremy L Thompson       case CEED_EVAL_DIV:
295*eaf62fffSJeremy L Thompson       case CEED_EVAL_CURL:
296*eaf62fffSJeremy L Thompson         break; // Caught by QF Assembly
297*eaf62fffSJeremy L Thompson       }
298*eaf62fffSJeremy L Thompson     }
299*eaf62fffSJeremy L Thompson   }
300*eaf62fffSJeremy L Thompson 
301*eaf62fffSJeremy L Thompson   // Assemble point block diagonal restriction, if needed
302*eaf62fffSJeremy L Thompson   CeedElemRestriction diag_rstr = rstr_out;
303*eaf62fffSJeremy L Thompson   if (is_pointblock) {
304*eaf62fffSJeremy L Thompson     ierr = CeedOperatorCreateActivePointBlockRestriction(rstr_out, &diag_rstr);
305*eaf62fffSJeremy L Thompson     CeedChk(ierr);
306*eaf62fffSJeremy L Thompson   }
307*eaf62fffSJeremy L Thompson 
308*eaf62fffSJeremy L Thompson   // Create diagonal vector
309*eaf62fffSJeremy L Thompson   CeedVector elem_diag;
310*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionCreateVector(diag_rstr, NULL, &elem_diag);
311*eaf62fffSJeremy L Thompson   CeedChk(ierr);
312*eaf62fffSJeremy L Thompson 
313*eaf62fffSJeremy L Thompson   // Assemble element operator diagonals
314*eaf62fffSJeremy L Thompson   CeedScalar *elem_diag_array, *assembled_qf_array;
315*eaf62fffSJeremy L Thompson   ierr = CeedVectorSetValue(elem_diag, 0.0); CeedChk(ierr);
316*eaf62fffSJeremy L Thompson   ierr = CeedVectorGetArray(elem_diag, CEED_MEM_HOST, &elem_diag_array);
317*eaf62fffSJeremy L Thompson   CeedChk(ierr);
318*eaf62fffSJeremy L Thompson   ierr = CeedVectorGetArray(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
319*eaf62fffSJeremy L Thompson   CeedChk(ierr);
320*eaf62fffSJeremy L Thompson   CeedInt num_elem, num_nodes, num_qpts;
321*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetNumElements(diag_rstr, &num_elem); CeedChk(ierr);
322*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumNodes(basis_in, &num_nodes); CeedChk(ierr);
323*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
324*eaf62fffSJeremy L Thompson   // Basis matrices
325*eaf62fffSJeremy L Thompson   const CeedScalar *interp_in, *interp_out, *grad_in, *grad_out;
326*eaf62fffSJeremy L Thompson   CeedScalar *identity = NULL;
327*eaf62fffSJeremy L Thompson   bool evalNone = false;
328*eaf62fffSJeremy L Thompson   for (CeedInt i=0; i<num_eval_mode_in; i++)
329*eaf62fffSJeremy L Thompson     evalNone = evalNone || (eval_mode_in[i] == CEED_EVAL_NONE);
330*eaf62fffSJeremy L Thompson   for (CeedInt i=0; i<num_eval_mode_out; i++)
331*eaf62fffSJeremy L Thompson     evalNone = evalNone || (eval_mode_out[i] == CEED_EVAL_NONE);
332*eaf62fffSJeremy L Thompson   if (evalNone) {
333*eaf62fffSJeremy L Thompson     ierr = CeedCalloc(num_qpts*num_nodes, &identity); CeedChk(ierr);
334*eaf62fffSJeremy L Thompson     for (CeedInt i=0; i<(num_nodes<num_qpts?num_nodes:num_qpts); i++)
335*eaf62fffSJeremy L Thompson       identity[i*num_nodes+i] = 1.0;
336*eaf62fffSJeremy L Thompson   }
337*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
338*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetInterp(basis_out, &interp_out); CeedChk(ierr);
339*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
340*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetGrad(basis_out, &grad_out); CeedChk(ierr);
341*eaf62fffSJeremy L Thompson   // Compute the diagonal of B^T D B
342*eaf62fffSJeremy L Thompson   // Each element
343*eaf62fffSJeremy L Thompson   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
344*eaf62fffSJeremy L Thompson   for (CeedInt e=0; e<num_elem; e++) {
345*eaf62fffSJeremy L Thompson     CeedInt d_out = -1;
346*eaf62fffSJeremy L Thompson     // Each basis eval mode pair
347*eaf62fffSJeremy L Thompson     for (CeedInt e_out=0; e_out<num_eval_mode_out; e_out++) {
348*eaf62fffSJeremy L Thompson       const CeedScalar *bt = NULL;
349*eaf62fffSJeremy L Thompson       if (eval_mode_out[e_out] == CEED_EVAL_GRAD)
350*eaf62fffSJeremy L Thompson         d_out += 1;
351*eaf62fffSJeremy L Thompson       CeedOperatorGetBasisPointer(eval_mode_out[e_out], identity, interp_out,
352*eaf62fffSJeremy L Thompson                                   &grad_out[d_out*num_qpts*num_nodes], &bt);
353*eaf62fffSJeremy L Thompson       CeedInt d_in = -1;
354*eaf62fffSJeremy L Thompson       for (CeedInt e_in=0; e_in<num_eval_mode_in; e_in++) {
355*eaf62fffSJeremy L Thompson         const CeedScalar *b = NULL;
356*eaf62fffSJeremy L Thompson         if (eval_mode_in[e_in] == CEED_EVAL_GRAD)
357*eaf62fffSJeremy L Thompson           d_in += 1;
358*eaf62fffSJeremy L Thompson         CeedOperatorGetBasisPointer(eval_mode_in[e_in], identity, interp_in,
359*eaf62fffSJeremy L Thompson                                     &grad_in[d_in*num_qpts*num_nodes], &b);
360*eaf62fffSJeremy L Thompson         // Each component
361*eaf62fffSJeremy L Thompson         for (CeedInt c_out=0; c_out<num_comp; c_out++)
362*eaf62fffSJeremy L Thompson           // Each qpoint/node pair
363*eaf62fffSJeremy L Thompson           for (CeedInt q=0; q<num_qpts; q++)
364*eaf62fffSJeremy L Thompson             if (is_pointblock) {
365*eaf62fffSJeremy L Thompson               // Point Block Diagonal
366*eaf62fffSJeremy L Thompson               for (CeedInt c_in=0; c_in<num_comp; c_in++) {
367*eaf62fffSJeremy L Thompson                 const CeedScalar qf_value =
368*eaf62fffSJeremy L Thompson                   assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_in)*
369*eaf62fffSJeremy L Thompson                                                      num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
370*eaf62fffSJeremy L Thompson                 if (fabs(qf_value) > qf_value_bound)
371*eaf62fffSJeremy L Thompson                   for (CeedInt n=0; n<num_nodes; n++)
372*eaf62fffSJeremy L Thompson                     elem_diag_array[((e*num_comp+c_out)*num_comp+c_in)*num_nodes+n] +=
373*eaf62fffSJeremy L Thompson                       bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
374*eaf62fffSJeremy L Thompson               }
375*eaf62fffSJeremy L Thompson             } else {
376*eaf62fffSJeremy L Thompson               // Diagonal Only
377*eaf62fffSJeremy L Thompson               const CeedScalar qf_value =
378*eaf62fffSJeremy L Thompson                 assembled_qf_array[q*layout[0] + (((e_in*num_comp+c_out)*
379*eaf62fffSJeremy L Thompson                                                    num_eval_mode_out+e_out)*num_comp+c_out)*layout[1] + e*layout[2]];
380*eaf62fffSJeremy L Thompson               if (fabs(qf_value) > qf_value_bound)
381*eaf62fffSJeremy L Thompson                 for (CeedInt n=0; n<num_nodes; n++)
382*eaf62fffSJeremy L Thompson                   elem_diag_array[(e*num_comp+c_out)*num_nodes+n] +=
383*eaf62fffSJeremy L Thompson                     bt[q*num_nodes+n] * qf_value * b[q*num_nodes+n];
384*eaf62fffSJeremy L Thompson             }
385*eaf62fffSJeremy L Thompson       }
386*eaf62fffSJeremy L Thompson     }
387*eaf62fffSJeremy L Thompson   }
388*eaf62fffSJeremy L Thompson   ierr = CeedVectorRestoreArray(elem_diag, &elem_diag_array); CeedChk(ierr);
389*eaf62fffSJeremy L Thompson   ierr = CeedVectorRestoreArray(assembled_qf, &assembled_qf_array); CeedChk(ierr);
390*eaf62fffSJeremy L Thompson 
391*eaf62fffSJeremy L Thompson   // Assemble local operator diagonal
392*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionApply(diag_rstr, CEED_TRANSPOSE, elem_diag,
393*eaf62fffSJeremy L Thompson                                   assembled, request); CeedChk(ierr);
394*eaf62fffSJeremy L Thompson 
395*eaf62fffSJeremy L Thompson   // Cleanup
396*eaf62fffSJeremy L Thompson   if (is_pointblock) {
397*eaf62fffSJeremy L Thompson     ierr = CeedElemRestrictionDestroy(&diag_rstr); CeedChk(ierr);
398*eaf62fffSJeremy L Thompson   }
399*eaf62fffSJeremy L Thompson   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
400*eaf62fffSJeremy L Thompson   ierr = CeedVectorDestroy(&elem_diag); CeedChk(ierr);
401*eaf62fffSJeremy L Thompson   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
402*eaf62fffSJeremy L Thompson   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
403*eaf62fffSJeremy L Thompson   ierr = CeedFree(&identity); CeedChk(ierr);
404*eaf62fffSJeremy L Thompson 
405*eaf62fffSJeremy L Thompson   return CEED_ERROR_SUCCESS;
406*eaf62fffSJeremy L Thompson }
407*eaf62fffSJeremy L Thompson 
408*eaf62fffSJeremy L Thompson /**
409*eaf62fffSJeremy L Thompson   @brief Core logic for assembling composite operator diagonal
410*eaf62fffSJeremy L Thompson 
411*eaf62fffSJeremy L Thompson   @param[in] op             CeedOperator to assemble point block diagonal
412*eaf62fffSJeremy L Thompson   @param[in] request        Address of CeedRequest for non-blocking completion, else
413*eaf62fffSJeremy L Thompson                             CEED_REQUEST_IMMEDIATE
414*eaf62fffSJeremy L Thompson   @param[in] is_pointblock  Boolean flag to assemble diagonal or point block diagonal
415*eaf62fffSJeremy L Thompson   @param[out] assembled     CeedVector to store assembled diagonal
416*eaf62fffSJeremy L Thompson 
417*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
418*eaf62fffSJeremy L Thompson 
419*eaf62fffSJeremy L Thompson   @ref Developer
420*eaf62fffSJeremy L Thompson **/
421*eaf62fffSJeremy L Thompson static inline int CeedCompositeOperatorLinearAssembleAddDiagonal(
422*eaf62fffSJeremy L Thompson   CeedOperator op, CeedRequest *request, const bool is_pointblock,
423*eaf62fffSJeremy L Thompson   CeedVector assembled) {
424*eaf62fffSJeremy L Thompson   int ierr;
425*eaf62fffSJeremy L Thompson   CeedInt num_sub;
426*eaf62fffSJeremy L Thompson   CeedOperator *suboperators;
427*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetNumSub(op, &num_sub); CeedChk(ierr);
428*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetSubList(op, &suboperators); CeedChk(ierr);
429*eaf62fffSJeremy L Thompson   for (CeedInt i = 0; i < num_sub; i++) {
430*eaf62fffSJeremy L Thompson     ierr = CeedSingleOperatorAssembleAddDiagonal(suboperators[i], request,
431*eaf62fffSJeremy L Thompson            is_pointblock, assembled); CeedChk(ierr);
432*eaf62fffSJeremy L Thompson   }
433*eaf62fffSJeremy L Thompson   return CEED_ERROR_SUCCESS;
434*eaf62fffSJeremy L Thompson }
435*eaf62fffSJeremy L Thompson 
436*eaf62fffSJeremy L Thompson /**
437*eaf62fffSJeremy L Thompson   @brief Build nonzero pattern for non-composite operator
438*eaf62fffSJeremy L Thompson 
439*eaf62fffSJeremy L Thompson   Users should generally use CeedOperatorLinearAssembleSymbolic()
440*eaf62fffSJeremy L Thompson 
441*eaf62fffSJeremy L Thompson   @param[in] op      CeedOperator to assemble nonzero pattern
442*eaf62fffSJeremy L Thompson   @param[in] offset  Offset for number of entries
443*eaf62fffSJeremy L Thompson   @param[out] rows   Row number for each entry
444*eaf62fffSJeremy L Thompson   @param[out] cols   Column number for each entry
445*eaf62fffSJeremy L Thompson 
446*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
447*eaf62fffSJeremy L Thompson 
448*eaf62fffSJeremy L Thompson   @ref Developer
449*eaf62fffSJeremy L Thompson **/
450*eaf62fffSJeremy L Thompson static int CeedSingleOperatorAssembleSymbolic(CeedOperator op, CeedInt offset,
451*eaf62fffSJeremy L Thompson     CeedInt *rows, CeedInt *cols) {
452*eaf62fffSJeremy L Thompson   int ierr;
453*eaf62fffSJeremy L Thompson   Ceed ceed = op->ceed;
454*eaf62fffSJeremy L Thompson   if (op->composite)
455*eaf62fffSJeremy L Thompson     // LCOV_EXCL_START
456*eaf62fffSJeremy L Thompson     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
457*eaf62fffSJeremy L Thompson                      "Composite operator not supported");
458*eaf62fffSJeremy L Thompson   // LCOV_EXCL_STOP
459*eaf62fffSJeremy L Thompson 
460*eaf62fffSJeremy L Thompson   CeedElemRestriction rstr_in;
461*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr_in); CeedChk(ierr);
462*eaf62fffSJeremy L Thompson   CeedInt num_elem, elem_size, num_nodes, num_comp;
463*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
464*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
465*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetLVectorSize(rstr_in, &num_nodes); CeedChk(ierr);
466*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
467*eaf62fffSJeremy L Thompson   CeedInt layout_er[3];
468*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetELayout(rstr_in, &layout_er); CeedChk(ierr);
469*eaf62fffSJeremy L Thompson 
470*eaf62fffSJeremy L Thompson   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
471*eaf62fffSJeremy L Thompson 
472*eaf62fffSJeremy L Thompson   // Determine elem_dof relation
473*eaf62fffSJeremy L Thompson   CeedVector index_vec;
474*eaf62fffSJeremy L Thompson   ierr = CeedVectorCreate(ceed, num_nodes, &index_vec); CeedChk(ierr);
475*eaf62fffSJeremy L Thompson   CeedScalar *array;
476*eaf62fffSJeremy L Thompson   ierr = CeedVectorGetArray(index_vec, CEED_MEM_HOST, &array); CeedChk(ierr);
477*eaf62fffSJeremy L Thompson   for (CeedInt i = 0; i < num_nodes; ++i) {
478*eaf62fffSJeremy L Thompson     array[i] = i;
479*eaf62fffSJeremy L Thompson   }
480*eaf62fffSJeremy L Thompson   ierr = CeedVectorRestoreArray(index_vec, &array); CeedChk(ierr);
481*eaf62fffSJeremy L Thompson   CeedVector elem_dof;
482*eaf62fffSJeremy L Thompson   ierr = CeedVectorCreate(ceed, num_elem * elem_size * num_comp, &elem_dof);
483*eaf62fffSJeremy L Thompson   CeedChk(ierr);
484*eaf62fffSJeremy L Thompson   ierr = CeedVectorSetValue(elem_dof, 0.0); CeedChk(ierr);
485*eaf62fffSJeremy L Thompson   CeedElemRestrictionApply(rstr_in, CEED_NOTRANSPOSE, index_vec,
486*eaf62fffSJeremy L Thompson                            elem_dof, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
487*eaf62fffSJeremy L Thompson   const CeedScalar *elem_dof_a;
488*eaf62fffSJeremy L Thompson   ierr = CeedVectorGetArrayRead(elem_dof, CEED_MEM_HOST, &elem_dof_a);
489*eaf62fffSJeremy L Thompson   CeedChk(ierr);
490*eaf62fffSJeremy L Thompson   ierr = CeedVectorDestroy(&index_vec); CeedChk(ierr);
491*eaf62fffSJeremy L Thompson 
492*eaf62fffSJeremy L Thompson   // Determine i, j locations for element matrices
493*eaf62fffSJeremy L Thompson   CeedInt count = 0;
494*eaf62fffSJeremy L Thompson   for (int e = 0; e < num_elem; ++e) {
495*eaf62fffSJeremy L Thompson     for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
496*eaf62fffSJeremy L Thompson       for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
497*eaf62fffSJeremy L Thompson         for (int i = 0; i < elem_size; ++i) {
498*eaf62fffSJeremy L Thompson           for (int j = 0; j < elem_size; ++j) {
499*eaf62fffSJeremy L Thompson             const CeedInt elem_dof_index_row = (i)*layout_er[0] +
500*eaf62fffSJeremy L Thompson                                                (comp_out)*layout_er[1] + e*layout_er[2];
501*eaf62fffSJeremy L Thompson             const CeedInt elem_dof_index_col = (j)*layout_er[0] +
502*eaf62fffSJeremy L Thompson                                                (comp_in)*layout_er[1] + e*layout_er[2];
503*eaf62fffSJeremy L Thompson 
504*eaf62fffSJeremy L Thompson             const CeedInt row = elem_dof_a[elem_dof_index_row];
505*eaf62fffSJeremy L Thompson             const CeedInt col = elem_dof_a[elem_dof_index_col];
506*eaf62fffSJeremy L Thompson 
507*eaf62fffSJeremy L Thompson             rows[offset + count] = row;
508*eaf62fffSJeremy L Thompson             cols[offset + count] = col;
509*eaf62fffSJeremy L Thompson             count++;
510*eaf62fffSJeremy L Thompson           }
511*eaf62fffSJeremy L Thompson         }
512*eaf62fffSJeremy L Thompson       }
513*eaf62fffSJeremy L Thompson     }
514*eaf62fffSJeremy L Thompson   }
515*eaf62fffSJeremy L Thompson   if (count != local_num_entries)
516*eaf62fffSJeremy L Thompson     // LCOV_EXCL_START
517*eaf62fffSJeremy L Thompson     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing assembled entries");
518*eaf62fffSJeremy L Thompson   // LCOV_EXCL_STOP
519*eaf62fffSJeremy L Thompson   ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr);
520*eaf62fffSJeremy L Thompson   ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr);
521*eaf62fffSJeremy L Thompson 
522*eaf62fffSJeremy L Thompson   return CEED_ERROR_SUCCESS;
523*eaf62fffSJeremy L Thompson }
524*eaf62fffSJeremy L Thompson 
525*eaf62fffSJeremy L Thompson /**
526*eaf62fffSJeremy L Thompson   @brief Assemble nonzero entries for non-composite operator
527*eaf62fffSJeremy L Thompson 
528*eaf62fffSJeremy L Thompson   Users should generally use CeedOperatorLinearAssemble()
529*eaf62fffSJeremy L Thompson 
530*eaf62fffSJeremy L Thompson   @param[in] op       CeedOperator to assemble
531*eaf62fffSJeremy L Thompson   @param[out] offset  Offest for number of entries
532*eaf62fffSJeremy L Thompson   @param[out] values  Values to assemble into matrix
533*eaf62fffSJeremy L Thompson 
534*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
535*eaf62fffSJeremy L Thompson 
536*eaf62fffSJeremy L Thompson   @ref Developer
537*eaf62fffSJeremy L Thompson **/
538*eaf62fffSJeremy L Thompson static int CeedSingleOperatorAssemble(CeedOperator op, CeedInt offset,
539*eaf62fffSJeremy L Thompson                                       CeedVector values) {
540*eaf62fffSJeremy L Thompson   int ierr;
541*eaf62fffSJeremy L Thompson   Ceed ceed = op->ceed;
542*eaf62fffSJeremy L Thompson   if (op->composite)
543*eaf62fffSJeremy L Thompson     // LCOV_EXCL_START
544*eaf62fffSJeremy L Thompson     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
545*eaf62fffSJeremy L Thompson                      "Composite operator not supported");
546*eaf62fffSJeremy L Thompson   // LCOV_EXCL_STOP
547*eaf62fffSJeremy L Thompson 
548*eaf62fffSJeremy L Thompson   // Assemble QFunction
549*eaf62fffSJeremy L Thompson   CeedQFunction qf;
550*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
551*eaf62fffSJeremy L Thompson   CeedInt num_input_fields, num_output_fields;
552*eaf62fffSJeremy L Thompson   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
553*eaf62fffSJeremy L Thompson   CeedChk(ierr);
554*eaf62fffSJeremy L Thompson   CeedVector assembled_qf;
555*eaf62fffSJeremy L Thompson   CeedElemRestriction rstr_q;
556*eaf62fffSJeremy L Thompson   ierr = CeedOperatorLinearAssembleQFunction(
557*eaf62fffSJeremy L Thompson            op, &assembled_qf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
558*eaf62fffSJeremy L Thompson 
559*eaf62fffSJeremy L Thompson   CeedInt qf_length;
560*eaf62fffSJeremy L Thompson   ierr = CeedVectorGetLength(assembled_qf, &qf_length); CeedChk(ierr);
561*eaf62fffSJeremy L Thompson 
562*eaf62fffSJeremy L Thompson   CeedOperatorField *input_fields;
563*eaf62fffSJeremy L Thompson   CeedOperatorField *output_fields;
564*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetFields(op, &input_fields, &output_fields); CeedChk(ierr);
565*eaf62fffSJeremy L Thompson 
566*eaf62fffSJeremy L Thompson   // Determine active input basis
567*eaf62fffSJeremy L Thompson   CeedQFunctionField *qf_fields;
568*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL); CeedChk(ierr);
569*eaf62fffSJeremy L Thompson   CeedInt num_eval_mode_in = 0, dim = 1;
570*eaf62fffSJeremy L Thompson   CeedEvalMode *eval_mode_in = NULL;
571*eaf62fffSJeremy L Thompson   CeedBasis basis_in = NULL;
572*eaf62fffSJeremy L Thompson   CeedElemRestriction rstr_in = NULL;
573*eaf62fffSJeremy L Thompson   for (CeedInt i=0; i<num_input_fields; i++) {
574*eaf62fffSJeremy L Thompson     CeedVector vec;
575*eaf62fffSJeremy L Thompson     ierr = CeedOperatorFieldGetVector(input_fields[i], &vec); CeedChk(ierr);
576*eaf62fffSJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
577*eaf62fffSJeremy L Thompson       ierr = CeedOperatorFieldGetBasis(input_fields[i], &basis_in);
578*eaf62fffSJeremy L Thompson       CeedChk(ierr);
579*eaf62fffSJeremy L Thompson       ierr = CeedBasisGetDimension(basis_in, &dim); CeedChk(ierr);
580*eaf62fffSJeremy L Thompson       ierr = CeedOperatorFieldGetElemRestriction(input_fields[i], &rstr_in);
581*eaf62fffSJeremy L Thompson       CeedChk(ierr);
582*eaf62fffSJeremy L Thompson       CeedEvalMode eval_mode;
583*eaf62fffSJeremy L Thompson       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
584*eaf62fffSJeremy L Thompson       CeedChk(ierr);
585*eaf62fffSJeremy L Thompson       switch (eval_mode) {
586*eaf62fffSJeremy L Thompson       case CEED_EVAL_NONE:
587*eaf62fffSJeremy L Thompson       case CEED_EVAL_INTERP:
588*eaf62fffSJeremy L Thompson         ierr = CeedRealloc(num_eval_mode_in + 1, &eval_mode_in); CeedChk(ierr);
589*eaf62fffSJeremy L Thompson         eval_mode_in[num_eval_mode_in] = eval_mode;
590*eaf62fffSJeremy L Thompson         num_eval_mode_in += 1;
591*eaf62fffSJeremy L Thompson         break;
592*eaf62fffSJeremy L Thompson       case CEED_EVAL_GRAD:
593*eaf62fffSJeremy L Thompson         ierr = CeedRealloc(num_eval_mode_in + dim, &eval_mode_in); CeedChk(ierr);
594*eaf62fffSJeremy L Thompson         for (CeedInt d=0; d<dim; d++) {
595*eaf62fffSJeremy L Thompson           eval_mode_in[num_eval_mode_in+d] = eval_mode;
596*eaf62fffSJeremy L Thompson         }
597*eaf62fffSJeremy L Thompson         num_eval_mode_in += dim;
598*eaf62fffSJeremy L Thompson         break;
599*eaf62fffSJeremy L Thompson       case CEED_EVAL_WEIGHT:
600*eaf62fffSJeremy L Thompson       case CEED_EVAL_DIV:
601*eaf62fffSJeremy L Thompson       case CEED_EVAL_CURL:
602*eaf62fffSJeremy L Thompson         break; // Caught by QF Assembly
603*eaf62fffSJeremy L Thompson       }
604*eaf62fffSJeremy L Thompson     }
605*eaf62fffSJeremy L Thompson   }
606*eaf62fffSJeremy L Thompson 
607*eaf62fffSJeremy L Thompson   // Determine active output basis
608*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields); CeedChk(ierr);
609*eaf62fffSJeremy L Thompson   CeedInt num_eval_mode_out = 0;
610*eaf62fffSJeremy L Thompson   CeedEvalMode *eval_mode_out = NULL;
611*eaf62fffSJeremy L Thompson   CeedBasis basis_out = NULL;
612*eaf62fffSJeremy L Thompson   CeedElemRestriction rstr_out = NULL;
613*eaf62fffSJeremy L Thompson   for (CeedInt i=0; i<num_output_fields; i++) {
614*eaf62fffSJeremy L Thompson     CeedVector vec;
615*eaf62fffSJeremy L Thompson     ierr = CeedOperatorFieldGetVector(output_fields[i], &vec); CeedChk(ierr);
616*eaf62fffSJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
617*eaf62fffSJeremy L Thompson       ierr = CeedOperatorFieldGetBasis(output_fields[i], &basis_out);
618*eaf62fffSJeremy L Thompson       CeedChk(ierr);
619*eaf62fffSJeremy L Thompson       ierr = CeedOperatorFieldGetElemRestriction(output_fields[i], &rstr_out);
620*eaf62fffSJeremy L Thompson       CeedChk(ierr);
621*eaf62fffSJeremy L Thompson       CeedEvalMode eval_mode;
622*eaf62fffSJeremy L Thompson       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
623*eaf62fffSJeremy L Thompson       CeedChk(ierr);
624*eaf62fffSJeremy L Thompson       switch (eval_mode) {
625*eaf62fffSJeremy L Thompson       case CEED_EVAL_NONE:
626*eaf62fffSJeremy L Thompson       case CEED_EVAL_INTERP:
627*eaf62fffSJeremy L Thompson         ierr = CeedRealloc(num_eval_mode_out + 1, &eval_mode_out); CeedChk(ierr);
628*eaf62fffSJeremy L Thompson         eval_mode_out[num_eval_mode_out] = eval_mode;
629*eaf62fffSJeremy L Thompson         num_eval_mode_out += 1;
630*eaf62fffSJeremy L Thompson         break;
631*eaf62fffSJeremy L Thompson       case CEED_EVAL_GRAD:
632*eaf62fffSJeremy L Thompson         ierr = CeedRealloc(num_eval_mode_out + dim, &eval_mode_out); CeedChk(ierr);
633*eaf62fffSJeremy L Thompson         for (CeedInt d=0; d<dim; d++) {
634*eaf62fffSJeremy L Thompson           eval_mode_out[num_eval_mode_out+d] = eval_mode;
635*eaf62fffSJeremy L Thompson         }
636*eaf62fffSJeremy L Thompson         num_eval_mode_out += dim;
637*eaf62fffSJeremy L Thompson         break;
638*eaf62fffSJeremy L Thompson       case CEED_EVAL_WEIGHT:
639*eaf62fffSJeremy L Thompson       case CEED_EVAL_DIV:
640*eaf62fffSJeremy L Thompson       case CEED_EVAL_CURL:
641*eaf62fffSJeremy L Thompson         break; // Caught by QF Assembly
642*eaf62fffSJeremy L Thompson       }
643*eaf62fffSJeremy L Thompson     }
644*eaf62fffSJeremy L Thompson   }
645*eaf62fffSJeremy L Thompson 
646*eaf62fffSJeremy L Thompson   if (num_eval_mode_in == 0 || num_eval_mode_out == 0)
647*eaf62fffSJeremy L Thompson     // LCOV_EXCL_START
648*eaf62fffSJeremy L Thompson     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
649*eaf62fffSJeremy L Thompson                      "Cannot assemble operator with out inputs/outputs");
650*eaf62fffSJeremy L Thompson   // LCOV_EXCL_STOP
651*eaf62fffSJeremy L Thompson 
652*eaf62fffSJeremy L Thompson   CeedInt num_elem, elem_size, num_qpts, num_comp;
653*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetNumElements(rstr_in, &num_elem); CeedChk(ierr);
654*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetElementSize(rstr_in, &elem_size); CeedChk(ierr);
655*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetNumComponents(rstr_in, &num_comp); CeedChk(ierr);
656*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumQuadraturePoints(basis_in, &num_qpts); CeedChk(ierr);
657*eaf62fffSJeremy L Thompson 
658*eaf62fffSJeremy L Thompson   CeedInt local_num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
659*eaf62fffSJeremy L Thompson 
660*eaf62fffSJeremy L Thompson   // loop over elements and put in data structure
661*eaf62fffSJeremy L Thompson   const CeedScalar *interp_in, *grad_in;
662*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetInterp(basis_in, &interp_in); CeedChk(ierr);
663*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetGrad(basis_in, &grad_in); CeedChk(ierr);
664*eaf62fffSJeremy L Thompson 
665*eaf62fffSJeremy L Thompson   const CeedScalar *assembled_qf_array;
666*eaf62fffSJeremy L Thompson   ierr = CeedVectorGetArrayRead(assembled_qf, CEED_MEM_HOST, &assembled_qf_array);
667*eaf62fffSJeremy L Thompson   CeedChk(ierr);
668*eaf62fffSJeremy L Thompson 
669*eaf62fffSJeremy L Thompson   CeedInt layout_qf[3];
670*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetELayout(rstr_q, &layout_qf); CeedChk(ierr);
671*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr);
672*eaf62fffSJeremy L Thompson 
673*eaf62fffSJeremy L Thompson   // we store B_mat_in, B_mat_out, BTD, elem_mat in row-major order
674*eaf62fffSJeremy L Thompson   CeedScalar B_mat_in[(num_qpts * num_eval_mode_in) * elem_size];
675*eaf62fffSJeremy L Thompson   CeedScalar B_mat_out[(num_qpts * num_eval_mode_out) * elem_size];
676*eaf62fffSJeremy L Thompson   CeedScalar D_mat[num_eval_mode_out * num_eval_mode_in *
677*eaf62fffSJeremy L Thompson                                      num_qpts]; // logically 3-tensor
678*eaf62fffSJeremy L Thompson   CeedScalar BTD[elem_size * num_qpts*num_eval_mode_in];
679*eaf62fffSJeremy L Thompson   CeedScalar elem_mat[elem_size * elem_size];
680*eaf62fffSJeremy L Thompson   int count = 0;
681*eaf62fffSJeremy L Thompson   CeedScalar *vals;
682*eaf62fffSJeremy L Thompson   ierr = CeedVectorGetArray(values, CEED_MEM_HOST, &vals); CeedChk(ierr);
683*eaf62fffSJeremy L Thompson   for (int e = 0; e < num_elem; ++e) {
684*eaf62fffSJeremy L Thompson     for (int comp_in = 0; comp_in < num_comp; ++comp_in) {
685*eaf62fffSJeremy L Thompson       for (int comp_out = 0; comp_out < num_comp; ++comp_out) {
686*eaf62fffSJeremy L Thompson         for (int ell = 0; ell < (num_qpts * num_eval_mode_in) * elem_size; ++ell) {
687*eaf62fffSJeremy L Thompson           B_mat_in[ell] = 0.0;
688*eaf62fffSJeremy L Thompson         }
689*eaf62fffSJeremy L Thompson         for (int ell = 0; ell < (num_qpts * num_eval_mode_out) * elem_size; ++ell) {
690*eaf62fffSJeremy L Thompson           B_mat_out[ell] = 0.0;
691*eaf62fffSJeremy L Thompson         }
692*eaf62fffSJeremy L Thompson         // Store block-diagonal D matrix as collection of small dense blocks
693*eaf62fffSJeremy L Thompson         for (int ell = 0; ell < num_eval_mode_in*num_eval_mode_out*num_qpts; ++ell) {
694*eaf62fffSJeremy L Thompson           D_mat[ell] = 0.0;
695*eaf62fffSJeremy L Thompson         }
696*eaf62fffSJeremy L Thompson         // form element matrix itself (for each block component)
697*eaf62fffSJeremy L Thompson         for (int ell = 0; ell < elem_size*elem_size; ++ell) {
698*eaf62fffSJeremy L Thompson           elem_mat[ell] = 0.0;
699*eaf62fffSJeremy L Thompson         }
700*eaf62fffSJeremy L Thompson         for (int q = 0; q < num_qpts; ++q) {
701*eaf62fffSJeremy L Thompson           for (int n = 0; n < elem_size; ++n) {
702*eaf62fffSJeremy L Thompson             CeedInt d_in = -1;
703*eaf62fffSJeremy L Thompson             for (int e_in = 0; e_in < num_eval_mode_in; ++e_in) {
704*eaf62fffSJeremy L Thompson               const int qq = num_eval_mode_in*q;
705*eaf62fffSJeremy L Thompson               if (eval_mode_in[e_in] == CEED_EVAL_INTERP) {
706*eaf62fffSJeremy L Thompson                 B_mat_in[(qq+e_in)*elem_size + n] += interp_in[q * elem_size + n];
707*eaf62fffSJeremy L Thompson               } else if (eval_mode_in[e_in] == CEED_EVAL_GRAD) {
708*eaf62fffSJeremy L Thompson                 d_in += 1;
709*eaf62fffSJeremy L Thompson                 B_mat_in[(qq+e_in)*elem_size + n] +=
710*eaf62fffSJeremy L Thompson                   grad_in[(d_in*num_qpts+q) * elem_size + n];
711*eaf62fffSJeremy L Thompson               } else {
712*eaf62fffSJeremy L Thompson                 // LCOV_EXCL_START
713*eaf62fffSJeremy L Thompson                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
714*eaf62fffSJeremy L Thompson                 // LCOV_EXCL_STOP
715*eaf62fffSJeremy L Thompson               }
716*eaf62fffSJeremy L Thompson             }
717*eaf62fffSJeremy L Thompson             CeedInt d_out = -1;
718*eaf62fffSJeremy L Thompson             for (int e_out = 0; e_out < num_eval_mode_out; ++e_out) {
719*eaf62fffSJeremy L Thompson               const int qq = num_eval_mode_out*q;
720*eaf62fffSJeremy L Thompson               if (eval_mode_out[e_out] == CEED_EVAL_INTERP) {
721*eaf62fffSJeremy L Thompson                 B_mat_out[(qq+e_out)*elem_size + n] += interp_in[q * elem_size + n];
722*eaf62fffSJeremy L Thompson               } else if (eval_mode_out[e_out] == CEED_EVAL_GRAD) {
723*eaf62fffSJeremy L Thompson                 d_out += 1;
724*eaf62fffSJeremy L Thompson                 B_mat_out[(qq+e_out)*elem_size + n] +=
725*eaf62fffSJeremy L Thompson                   grad_in[(d_out*num_qpts+q) * elem_size + n];
726*eaf62fffSJeremy L Thompson               } else {
727*eaf62fffSJeremy L Thompson                 // LCOV_EXCL_START
728*eaf62fffSJeremy L Thompson                 return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Not implemented!");
729*eaf62fffSJeremy L Thompson                 // LCOV_EXCL_STOP
730*eaf62fffSJeremy L Thompson               }
731*eaf62fffSJeremy L Thompson             }
732*eaf62fffSJeremy L Thompson           }
733*eaf62fffSJeremy L Thompson           for (int ei = 0; ei < num_eval_mode_out; ++ei) {
734*eaf62fffSJeremy L Thompson             for (int ej = 0; ej < num_eval_mode_in; ++ej) {
735*eaf62fffSJeremy L Thompson               const int eval_mode_index = ((ei*num_comp+comp_in)*num_eval_mode_in+ej)*num_comp
736*eaf62fffSJeremy L Thompson                                           +comp_out;
737*eaf62fffSJeremy L Thompson               const int index = q*layout_qf[0] + eval_mode_index*layout_qf[1] +
738*eaf62fffSJeremy L Thompson                                 e*layout_qf[2];
739*eaf62fffSJeremy L Thompson               D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q] += assembled_qf_array[index];
740*eaf62fffSJeremy L Thompson             }
741*eaf62fffSJeremy L Thompson           }
742*eaf62fffSJeremy L Thompson         }
743*eaf62fffSJeremy L Thompson         // Compute B^T*D
744*eaf62fffSJeremy L Thompson         for (int ell = 0; ell < elem_size*num_qpts*num_eval_mode_in; ++ell) {
745*eaf62fffSJeremy L Thompson           BTD[ell] = 0.0;
746*eaf62fffSJeremy L Thompson         }
747*eaf62fffSJeremy L Thompson         for (int j = 0; j<elem_size; ++j) {
748*eaf62fffSJeremy L Thompson           for (int q = 0; q<num_qpts; ++q) {
749*eaf62fffSJeremy L Thompson             int qq = num_eval_mode_out*q;
750*eaf62fffSJeremy L Thompson             for (int ei = 0; ei < num_eval_mode_in; ++ei) {
751*eaf62fffSJeremy L Thompson               for (int ej = 0; ej < num_eval_mode_out; ++ej) {
752*eaf62fffSJeremy L Thompson                 BTD[j*(num_qpts*num_eval_mode_in) + (qq+ei)] +=
753*eaf62fffSJeremy L Thompson                   B_mat_out[(qq+ej)*elem_size + j] * D_mat[(ei*num_eval_mode_in+ej)*num_qpts + q];
754*eaf62fffSJeremy L Thompson               }
755*eaf62fffSJeremy L Thompson             }
756*eaf62fffSJeremy L Thompson           }
757*eaf62fffSJeremy L Thompson         }
758*eaf62fffSJeremy L Thompson 
759*eaf62fffSJeremy L Thompson         ierr = CeedMatrixMultiply(ceed, BTD, B_mat_in, elem_mat, elem_size,
760*eaf62fffSJeremy L Thompson                                   elem_size, num_qpts*num_eval_mode_in); CeedChk(ierr);
761*eaf62fffSJeremy L Thompson 
762*eaf62fffSJeremy L Thompson         // put element matrix in coordinate data structure
763*eaf62fffSJeremy L Thompson         for (int i = 0; i < elem_size; ++i) {
764*eaf62fffSJeremy L Thompson           for (int j = 0; j < elem_size; ++j) {
765*eaf62fffSJeremy L Thompson             vals[offset + count] = elem_mat[i*elem_size + j];
766*eaf62fffSJeremy L Thompson             count++;
767*eaf62fffSJeremy L Thompson           }
768*eaf62fffSJeremy L Thompson         }
769*eaf62fffSJeremy L Thompson       }
770*eaf62fffSJeremy L Thompson     }
771*eaf62fffSJeremy L Thompson   }
772*eaf62fffSJeremy L Thompson   if (count != local_num_entries)
773*eaf62fffSJeremy L Thompson     // LCOV_EXCL_START
774*eaf62fffSJeremy L Thompson     return CeedError(ceed, CEED_ERROR_MAJOR, "Error computing entries");
775*eaf62fffSJeremy L Thompson   // LCOV_EXCL_STOP
776*eaf62fffSJeremy L Thompson   ierr = CeedVectorRestoreArray(values, &vals); CeedChk(ierr);
777*eaf62fffSJeremy L Thompson 
778*eaf62fffSJeremy L Thompson   ierr = CeedVectorRestoreArrayRead(assembled_qf, &assembled_qf_array);
779*eaf62fffSJeremy L Thompson   CeedChk(ierr);
780*eaf62fffSJeremy L Thompson   ierr = CeedVectorDestroy(&assembled_qf); CeedChk(ierr);
781*eaf62fffSJeremy L Thompson   ierr = CeedFree(&eval_mode_in); CeedChk(ierr);
782*eaf62fffSJeremy L Thompson   ierr = CeedFree(&eval_mode_out); CeedChk(ierr);
783*eaf62fffSJeremy L Thompson 
784*eaf62fffSJeremy L Thompson   return CEED_ERROR_SUCCESS;
785*eaf62fffSJeremy L Thompson }
786*eaf62fffSJeremy L Thompson 
787*eaf62fffSJeremy L Thompson /**
788*eaf62fffSJeremy L Thompson   @brief Count number of entries for assembled CeedOperator
789*eaf62fffSJeremy L Thompson 
790*eaf62fffSJeremy L Thompson   @param[in] op            CeedOperator to assemble
791*eaf62fffSJeremy L Thompson   @param[out] num_entries  Number of entries in assembled representation
792*eaf62fffSJeremy L Thompson 
793*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
794*eaf62fffSJeremy L Thompson 
795*eaf62fffSJeremy L Thompson   @ref Utility
796*eaf62fffSJeremy L Thompson **/
797*eaf62fffSJeremy L Thompson static int CeedSingleOperatorAssemblyCountEntries(CeedOperator op,
798*eaf62fffSJeremy L Thompson     CeedInt *num_entries) {
799*eaf62fffSJeremy L Thompson   int ierr;
800*eaf62fffSJeremy L Thompson   CeedElemRestriction rstr;
801*eaf62fffSJeremy L Thompson   CeedInt num_elem, elem_size, num_comp;
802*eaf62fffSJeremy L Thompson 
803*eaf62fffSJeremy L Thompson   if (op->composite)
804*eaf62fffSJeremy L Thompson     // LCOV_EXCL_START
805*eaf62fffSJeremy L Thompson     return CeedError(op->ceed, CEED_ERROR_UNSUPPORTED,
806*eaf62fffSJeremy L Thompson                      "Composite operator not supported");
807*eaf62fffSJeremy L Thompson   // LCOV_EXCL_STOP
808*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetActiveElemRestriction(op, &rstr); CeedChk(ierr);
809*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
810*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetElementSize(rstr, &elem_size); CeedChk(ierr);
811*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetNumComponents(rstr, &num_comp); CeedChk(ierr);
812*eaf62fffSJeremy L Thompson   *num_entries = elem_size*num_comp * elem_size*num_comp * num_elem;
813*eaf62fffSJeremy L Thompson 
814*eaf62fffSJeremy L Thompson   return CEED_ERROR_SUCCESS;
815*eaf62fffSJeremy L Thompson }
816*eaf62fffSJeremy L Thompson 
817*eaf62fffSJeremy L Thompson /**
818*eaf62fffSJeremy L Thompson   @brief Common code for creating a multigrid coarse operator and level
819*eaf62fffSJeremy L Thompson            transfer operators for a CeedOperator
820*eaf62fffSJeremy L Thompson 
821*eaf62fffSJeremy L Thompson   @param[in] op_fine       Fine grid operator
822*eaf62fffSJeremy L Thompson   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
823*eaf62fffSJeremy L Thompson   @param[in] rstr_coarse   Coarse grid restriction
824*eaf62fffSJeremy L Thompson   @param[in] basis_coarse  Coarse grid active vector basis
825*eaf62fffSJeremy L Thompson   @param[in] basis_c_to_f  Basis for coarse to fine interpolation
826*eaf62fffSJeremy L Thompson   @param[out] op_coarse    Coarse grid operator
827*eaf62fffSJeremy L Thompson   @param[out] op_prolong   Coarse to fine operator
828*eaf62fffSJeremy L Thompson   @param[out] op_restrict  Fine to coarse operator
829*eaf62fffSJeremy L Thompson 
830*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
831*eaf62fffSJeremy L Thompson 
832*eaf62fffSJeremy L Thompson   @ref Developer
833*eaf62fffSJeremy L Thompson **/
834*eaf62fffSJeremy L Thompson static int CeedSingleOperatorMultigridLevel(CeedOperator op_fine,
835*eaf62fffSJeremy L Thompson     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
836*eaf62fffSJeremy L Thompson     CeedBasis basis_c_to_f, CeedOperator *op_coarse, CeedOperator *op_prolong,
837*eaf62fffSJeremy L Thompson     CeedOperator *op_restrict) {
838*eaf62fffSJeremy L Thompson   int ierr;
839*eaf62fffSJeremy L Thompson   Ceed ceed;
840*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
841*eaf62fffSJeremy L Thompson 
842*eaf62fffSJeremy L Thompson   // Check for composite operator
843*eaf62fffSJeremy L Thompson   bool is_composite;
844*eaf62fffSJeremy L Thompson   ierr = CeedOperatorIsComposite(op_fine, &is_composite); CeedChk(ierr);
845*eaf62fffSJeremy L Thompson   if (is_composite)
846*eaf62fffSJeremy L Thompson     // LCOV_EXCL_START
847*eaf62fffSJeremy L Thompson     return CeedError(ceed, CEED_ERROR_UNSUPPORTED,
848*eaf62fffSJeremy L Thompson                      "Automatic multigrid setup for composite operators not supported");
849*eaf62fffSJeremy L Thompson   // LCOV_EXCL_STOP
850*eaf62fffSJeremy L Thompson 
851*eaf62fffSJeremy L Thompson   // Coarse Grid
852*eaf62fffSJeremy L Thompson   ierr = CeedOperatorCreate(ceed, op_fine->qf, op_fine->dqf, op_fine->dqfT,
853*eaf62fffSJeremy L Thompson                             op_coarse); CeedChk(ierr);
854*eaf62fffSJeremy L Thompson   CeedElemRestriction rstr_fine = NULL;
855*eaf62fffSJeremy L Thompson   // -- Clone input fields
856*eaf62fffSJeremy L Thompson   for (int i = 0; i < op_fine->qf->num_input_fields; i++) {
857*eaf62fffSJeremy L Thompson     if (op_fine->input_fields[i]->vec == CEED_VECTOR_ACTIVE) {
858*eaf62fffSJeremy L Thompson       rstr_fine = op_fine->input_fields[i]->elem_restr;
859*eaf62fffSJeremy L Thompson       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
860*eaf62fffSJeremy L Thompson                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
861*eaf62fffSJeremy L Thompson       CeedChk(ierr);
862*eaf62fffSJeremy L Thompson     } else {
863*eaf62fffSJeremy L Thompson       ierr = CeedOperatorSetField(*op_coarse, op_fine->input_fields[i]->field_name,
864*eaf62fffSJeremy L Thompson                                   op_fine->input_fields[i]->elem_restr,
865*eaf62fffSJeremy L Thompson                                   op_fine->input_fields[i]->basis,
866*eaf62fffSJeremy L Thompson                                   op_fine->input_fields[i]->vec); CeedChk(ierr);
867*eaf62fffSJeremy L Thompson     }
868*eaf62fffSJeremy L Thompson   }
869*eaf62fffSJeremy L Thompson   // -- Clone output fields
870*eaf62fffSJeremy L Thompson   for (int i = 0; i < op_fine->qf->num_output_fields; i++) {
871*eaf62fffSJeremy L Thompson     if (op_fine->output_fields[i]->vec == CEED_VECTOR_ACTIVE) {
872*eaf62fffSJeremy L Thompson       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
873*eaf62fffSJeremy L Thompson                                   rstr_coarse, basis_coarse, CEED_VECTOR_ACTIVE);
874*eaf62fffSJeremy L Thompson       CeedChk(ierr);
875*eaf62fffSJeremy L Thompson     } else {
876*eaf62fffSJeremy L Thompson       ierr = CeedOperatorSetField(*op_coarse, op_fine->output_fields[i]->field_name,
877*eaf62fffSJeremy L Thompson                                   op_fine->output_fields[i]->elem_restr,
878*eaf62fffSJeremy L Thompson                                   op_fine->output_fields[i]->basis,
879*eaf62fffSJeremy L Thompson                                   op_fine->output_fields[i]->vec); CeedChk(ierr);
880*eaf62fffSJeremy L Thompson     }
881*eaf62fffSJeremy L Thompson   }
882*eaf62fffSJeremy L Thompson 
883*eaf62fffSJeremy L Thompson   // Multiplicity vector
884*eaf62fffSJeremy L Thompson   CeedVector mult_vec, mult_e_vec;
885*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionCreateVector(rstr_fine, &mult_vec, &mult_e_vec);
886*eaf62fffSJeremy L Thompson   CeedChk(ierr);
887*eaf62fffSJeremy L Thompson   ierr = CeedVectorSetValue(mult_e_vec, 0.0); CeedChk(ierr);
888*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionApply(rstr_fine, CEED_NOTRANSPOSE, p_mult_fine,
889*eaf62fffSJeremy L Thompson                                   mult_e_vec, CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
890*eaf62fffSJeremy L Thompson   ierr = CeedVectorSetValue(mult_vec, 0.0); CeedChk(ierr);
891*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionApply(rstr_fine, CEED_TRANSPOSE, mult_e_vec, mult_vec,
892*eaf62fffSJeremy L Thompson                                   CEED_REQUEST_IMMEDIATE); CeedChk(ierr);
893*eaf62fffSJeremy L Thompson   ierr = CeedVectorDestroy(&mult_e_vec); CeedChk(ierr);
894*eaf62fffSJeremy L Thompson   ierr = CeedVectorReciprocal(mult_vec); CeedChk(ierr);
895*eaf62fffSJeremy L Thompson 
896*eaf62fffSJeremy L Thompson   // Restriction
897*eaf62fffSJeremy L Thompson   CeedInt num_comp;
898*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumComponents(basis_coarse, &num_comp); CeedChk(ierr);
899*eaf62fffSJeremy L Thompson   CeedQFunction qf_restrict;
900*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_restrict);
901*eaf62fffSJeremy L Thompson   CeedChk(ierr);
902*eaf62fffSJeremy L Thompson   CeedInt *num_comp_r_data;
903*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(1, &num_comp_r_data); CeedChk(ierr);
904*eaf62fffSJeremy L Thompson   num_comp_r_data[0] = num_comp;
905*eaf62fffSJeremy L Thompson   CeedQFunctionContext ctx_r;
906*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionContextCreate(ceed, &ctx_r); CeedChk(ierr);
907*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionContextSetData(ctx_r, CEED_MEM_HOST, CEED_OWN_POINTER,
908*eaf62fffSJeremy L Thompson                                      sizeof(*num_comp_r_data), num_comp_r_data);
909*eaf62fffSJeremy L Thompson   CeedChk(ierr);
910*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionSetContext(qf_restrict, ctx_r); CeedChk(ierr);
911*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionContextDestroy(&ctx_r); CeedChk(ierr);
912*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionAddInput(qf_restrict, "input", num_comp, CEED_EVAL_NONE);
913*eaf62fffSJeremy L Thompson   CeedChk(ierr);
914*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionAddInput(qf_restrict, "scale", num_comp, CEED_EVAL_NONE);
915*eaf62fffSJeremy L Thompson   CeedChk(ierr);
916*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionAddOutput(qf_restrict, "output", num_comp,
917*eaf62fffSJeremy L Thompson                                 CEED_EVAL_INTERP); CeedChk(ierr);
918*eaf62fffSJeremy L Thompson 
919*eaf62fffSJeremy L Thompson   ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE,
920*eaf62fffSJeremy L Thompson                             CEED_QFUNCTION_NONE, op_restrict);
921*eaf62fffSJeremy L Thompson   CeedChk(ierr);
922*eaf62fffSJeremy L Thompson   ierr = CeedOperatorSetField(*op_restrict, "input", rstr_fine,
923*eaf62fffSJeremy L Thompson                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
924*eaf62fffSJeremy L Thompson   CeedChk(ierr);
925*eaf62fffSJeremy L Thompson   ierr = CeedOperatorSetField(*op_restrict, "scale", rstr_fine,
926*eaf62fffSJeremy L Thompson                               CEED_BASIS_COLLOCATED, mult_vec);
927*eaf62fffSJeremy L Thompson   CeedChk(ierr);
928*eaf62fffSJeremy L Thompson   ierr = CeedOperatorSetField(*op_restrict, "output", rstr_coarse, basis_c_to_f,
929*eaf62fffSJeremy L Thompson                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
930*eaf62fffSJeremy L Thompson 
931*eaf62fffSJeremy L Thompson   // Prolongation
932*eaf62fffSJeremy L Thompson   CeedQFunction qf_prolong;
933*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionCreateInteriorByName(ceed, "Scale", &qf_prolong);
934*eaf62fffSJeremy L Thompson   CeedChk(ierr);
935*eaf62fffSJeremy L Thompson   CeedInt *num_comp_p_data;
936*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(1, &num_comp_p_data); CeedChk(ierr);
937*eaf62fffSJeremy L Thompson   num_comp_p_data[0] = num_comp;
938*eaf62fffSJeremy L Thompson   CeedQFunctionContext ctx_p;
939*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionContextCreate(ceed, &ctx_p); CeedChk(ierr);
940*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionContextSetData(ctx_p, CEED_MEM_HOST, CEED_OWN_POINTER,
941*eaf62fffSJeremy L Thompson                                      sizeof(*num_comp_p_data), num_comp_p_data);
942*eaf62fffSJeremy L Thompson   CeedChk(ierr);
943*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionSetContext(qf_prolong, ctx_p); CeedChk(ierr);
944*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionContextDestroy(&ctx_p); CeedChk(ierr);
945*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionAddInput(qf_prolong, "input", num_comp, CEED_EVAL_INTERP);
946*eaf62fffSJeremy L Thompson   CeedChk(ierr);
947*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionAddInput(qf_prolong, "scale", num_comp, CEED_EVAL_NONE);
948*eaf62fffSJeremy L Thompson   CeedChk(ierr);
949*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionAddOutput(qf_prolong, "output", num_comp, CEED_EVAL_NONE);
950*eaf62fffSJeremy L Thompson   CeedChk(ierr);
951*eaf62fffSJeremy L Thompson 
952*eaf62fffSJeremy L Thompson   ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE,
953*eaf62fffSJeremy L Thompson                             CEED_QFUNCTION_NONE, op_prolong);
954*eaf62fffSJeremy L Thompson   CeedChk(ierr);
955*eaf62fffSJeremy L Thompson   ierr = CeedOperatorSetField(*op_prolong, "input", rstr_coarse, basis_c_to_f,
956*eaf62fffSJeremy L Thompson                               CEED_VECTOR_ACTIVE); CeedChk(ierr);
957*eaf62fffSJeremy L Thompson   ierr = CeedOperatorSetField(*op_prolong, "scale", rstr_fine,
958*eaf62fffSJeremy L Thompson                               CEED_BASIS_COLLOCATED, mult_vec);
959*eaf62fffSJeremy L Thompson   CeedChk(ierr);
960*eaf62fffSJeremy L Thompson   ierr = CeedOperatorSetField(*op_prolong, "output", rstr_fine,
961*eaf62fffSJeremy L Thompson                               CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
962*eaf62fffSJeremy L Thompson   CeedChk(ierr);
963*eaf62fffSJeremy L Thompson 
964*eaf62fffSJeremy L Thompson   // Cleanup
965*eaf62fffSJeremy L Thompson   ierr = CeedVectorDestroy(&mult_vec); CeedChk(ierr);
966*eaf62fffSJeremy L Thompson   ierr = CeedBasisDestroy(&basis_c_to_f); CeedChk(ierr);
967*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr);
968*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr);
969*eaf62fffSJeremy L Thompson   return CEED_ERROR_SUCCESS;
970*eaf62fffSJeremy L Thompson }
971*eaf62fffSJeremy L Thompson 
972*eaf62fffSJeremy L Thompson /**
973*eaf62fffSJeremy L Thompson   @brief Build 1D mass matrix and Laplacian with perturbation
974*eaf62fffSJeremy L Thompson 
975*eaf62fffSJeremy L Thompson   @param[in] interp_1d    Interpolation matrix in one dimension
976*eaf62fffSJeremy L Thompson   @param[in] grad_1d      Gradient matrix in one dimension
977*eaf62fffSJeremy L Thompson   @param[in] q_weight_1d  Quadrature weights in one dimension
978*eaf62fffSJeremy L Thompson   @param[in] P_1d         Number of basis nodes in one dimension
979*eaf62fffSJeremy L Thompson   @param[in] Q_1d         Number of quadrature points in one dimension
980*eaf62fffSJeremy L Thompson   @param[in] dim          Dimension of basis
981*eaf62fffSJeremy L Thompson   @param[out] mass        Assembled mass matrix in one dimension
982*eaf62fffSJeremy L Thompson   @param[out] laplace     Assembled perturbed Laplacian in one dimension
983*eaf62fffSJeremy L Thompson 
984*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
985*eaf62fffSJeremy L Thompson 
986*eaf62fffSJeremy L Thompson   @ref Developer
987*eaf62fffSJeremy L Thompson **/
988*eaf62fffSJeremy L Thompson CeedPragmaOptimizeOff
989*eaf62fffSJeremy L Thompson static int CeedBuildMassLaplace(const CeedScalar *interp_1d,
990*eaf62fffSJeremy L Thompson                                 const CeedScalar *grad_1d,
991*eaf62fffSJeremy L Thompson                                 const CeedScalar *q_weight_1d, CeedInt P_1d,
992*eaf62fffSJeremy L Thompson                                 CeedInt Q_1d, CeedInt dim,
993*eaf62fffSJeremy L Thompson                                 CeedScalar *mass, CeedScalar *laplace) {
994*eaf62fffSJeremy L Thompson 
995*eaf62fffSJeremy L Thompson   for (CeedInt i=0; i<P_1d; i++)
996*eaf62fffSJeremy L Thompson     for (CeedInt j=0; j<P_1d; j++) {
997*eaf62fffSJeremy L Thompson       CeedScalar sum = 0.0;
998*eaf62fffSJeremy L Thompson       for (CeedInt k=0; k<Q_1d; k++)
999*eaf62fffSJeremy L Thompson         sum += interp_1d[k*P_1d+i]*q_weight_1d[k]*interp_1d[k*P_1d+j];
1000*eaf62fffSJeremy L Thompson       mass[i+j*P_1d] = sum;
1001*eaf62fffSJeremy L Thompson     }
1002*eaf62fffSJeremy L Thompson   // -- Laplacian
1003*eaf62fffSJeremy L Thompson   for (CeedInt i=0; i<P_1d; i++)
1004*eaf62fffSJeremy L Thompson     for (CeedInt j=0; j<P_1d; j++) {
1005*eaf62fffSJeremy L Thompson       CeedScalar sum = 0.0;
1006*eaf62fffSJeremy L Thompson       for (CeedInt k=0; k<Q_1d; k++)
1007*eaf62fffSJeremy L Thompson         sum += grad_1d[k*P_1d+i]*q_weight_1d[k]*grad_1d[k*P_1d+j];
1008*eaf62fffSJeremy L Thompson       laplace[i+j*P_1d] = sum;
1009*eaf62fffSJeremy L Thompson     }
1010*eaf62fffSJeremy L Thompson   CeedScalar perturbation = dim>2 ? 1e-6 : 1e-4;
1011*eaf62fffSJeremy L Thompson   for (CeedInt i=0; i<P_1d; i++)
1012*eaf62fffSJeremy L Thompson     laplace[i+P_1d*i] += perturbation;
1013*eaf62fffSJeremy L Thompson   return CEED_ERROR_SUCCESS;
1014*eaf62fffSJeremy L Thompson }
1015*eaf62fffSJeremy L Thompson CeedPragmaOptimizeOn
1016*eaf62fffSJeremy L Thompson 
1017*eaf62fffSJeremy L Thompson /// @}
1018*eaf62fffSJeremy L Thompson 
1019*eaf62fffSJeremy L Thompson /// ----------------------------------------------------------------------------
1020*eaf62fffSJeremy L Thompson /// CeedOperator Public API
1021*eaf62fffSJeremy L Thompson /// ----------------------------------------------------------------------------
1022*eaf62fffSJeremy L Thompson /// @addtogroup CeedOperatorUser
1023*eaf62fffSJeremy L Thompson /// @{
1024*eaf62fffSJeremy L Thompson 
1025*eaf62fffSJeremy L Thompson /**
1026*eaf62fffSJeremy L Thompson   @brief Assemble a linear CeedQFunction associated with a CeedOperator
1027*eaf62fffSJeremy L Thompson 
1028*eaf62fffSJeremy L Thompson   This returns a CeedVector containing a matrix at each quadrature point
1029*eaf62fffSJeremy L Thompson     providing the action of the CeedQFunction associated with the CeedOperator.
1030*eaf62fffSJeremy L Thompson     The vector 'assembled' is of shape
1031*eaf62fffSJeremy L Thompson       [num_elements, num_input_fields, num_output_fields, num_quad_points]
1032*eaf62fffSJeremy L Thompson     and contains column-major matrices representing the action of the
1033*eaf62fffSJeremy L Thompson     CeedQFunction for a corresponding quadrature point on an element. Inputs and
1034*eaf62fffSJeremy L Thompson     outputs are in the order provided by the user when adding CeedOperator fields.
1035*eaf62fffSJeremy L Thompson     For example, a CeedQFunction with inputs 'u' and 'gradu' and outputs 'gradv' and
1036*eaf62fffSJeremy L Thompson     'v', provided in that order, would result in an assembled QFunction that
1037*eaf62fffSJeremy L Thompson     consists of (1 + dim) x (dim + 1) matrices at each quadrature point acting
1038*eaf62fffSJeremy L Thompson     on the input [u, du_0, du_1] and producing the output [dv_0, dv_1, v].
1039*eaf62fffSJeremy L Thompson 
1040*eaf62fffSJeremy L Thompson   @param op              CeedOperator to assemble CeedQFunction
1041*eaf62fffSJeremy L Thompson   @param[out] assembled  CeedVector to store assembled CeedQFunction at
1042*eaf62fffSJeremy L Thompson                            quadrature points
1043*eaf62fffSJeremy L Thompson   @param[out] rstr       CeedElemRestriction for CeedVector containing assembled
1044*eaf62fffSJeremy L Thompson                            CeedQFunction
1045*eaf62fffSJeremy L Thompson   @param request         Address of CeedRequest for non-blocking completion, else
1046*eaf62fffSJeremy L Thompson                            @ref CEED_REQUEST_IMMEDIATE
1047*eaf62fffSJeremy L Thompson 
1048*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
1049*eaf62fffSJeremy L Thompson 
1050*eaf62fffSJeremy L Thompson   @ref User
1051*eaf62fffSJeremy L Thompson **/
1052*eaf62fffSJeremy L Thompson int CeedOperatorLinearAssembleQFunction(CeedOperator op, CeedVector *assembled,
1053*eaf62fffSJeremy L Thompson                                         CeedElemRestriction *rstr,
1054*eaf62fffSJeremy L Thompson                                         CeedRequest *request) {
1055*eaf62fffSJeremy L Thompson   int ierr;
1056*eaf62fffSJeremy L Thompson   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1057*eaf62fffSJeremy L Thompson 
1058*eaf62fffSJeremy L Thompson   // Backend version
1059*eaf62fffSJeremy L Thompson   if (op->LinearAssembleQFunction) {
1060*eaf62fffSJeremy L Thompson     ierr = op->LinearAssembleQFunction(op, assembled, rstr, request); CeedChk(ierr);
1061*eaf62fffSJeremy L Thompson     return CEED_ERROR_SUCCESS;
1062*eaf62fffSJeremy L Thompson   } else {
1063*eaf62fffSJeremy L Thompson     // Fallback to reference Ceed
1064*eaf62fffSJeremy L Thompson     if (!op->op_fallback) {
1065*eaf62fffSJeremy L Thompson       ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1066*eaf62fffSJeremy L Thompson     }
1067*eaf62fffSJeremy L Thompson     // Assemble
1068*eaf62fffSJeremy L Thompson     ierr = CeedOperatorLinearAssembleQFunction(op->op_fallback, assembled,
1069*eaf62fffSJeremy L Thompson            rstr, request); CeedChk(ierr);
1070*eaf62fffSJeremy L Thompson     return CEED_ERROR_SUCCESS;
1071*eaf62fffSJeremy L Thompson   }
1072*eaf62fffSJeremy L Thompson }
1073*eaf62fffSJeremy L Thompson 
1074*eaf62fffSJeremy L Thompson /**
1075*eaf62fffSJeremy L Thompson   @brief Assemble the diagonal of a square linear CeedOperator
1076*eaf62fffSJeremy L Thompson 
1077*eaf62fffSJeremy L Thompson   This overwrites a CeedVector with the diagonal of a linear CeedOperator.
1078*eaf62fffSJeremy L Thompson 
1079*eaf62fffSJeremy L Thompson   Note: Currently only non-composite CeedOperators with a single field and
1080*eaf62fffSJeremy L Thompson           composite CeedOperators with single field sub-operators are supported.
1081*eaf62fffSJeremy L Thompson 
1082*eaf62fffSJeremy L Thompson   @param op              CeedOperator to assemble CeedQFunction
1083*eaf62fffSJeremy L Thompson   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1084*eaf62fffSJeremy L Thompson   @param request         Address of CeedRequest for non-blocking completion, else
1085*eaf62fffSJeremy L Thompson                            @ref CEED_REQUEST_IMMEDIATE
1086*eaf62fffSJeremy L Thompson 
1087*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
1088*eaf62fffSJeremy L Thompson 
1089*eaf62fffSJeremy L Thompson   @ref User
1090*eaf62fffSJeremy L Thompson **/
1091*eaf62fffSJeremy L Thompson int CeedOperatorLinearAssembleDiagonal(CeedOperator op, CeedVector assembled,
1092*eaf62fffSJeremy L Thompson                                        CeedRequest *request) {
1093*eaf62fffSJeremy L Thompson   int ierr;
1094*eaf62fffSJeremy L Thompson   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1095*eaf62fffSJeremy L Thompson 
1096*eaf62fffSJeremy L Thompson   // Use backend version, if available
1097*eaf62fffSJeremy L Thompson   if (op->LinearAssembleDiagonal) {
1098*eaf62fffSJeremy L Thompson     ierr = op->LinearAssembleDiagonal(op, assembled, request); CeedChk(ierr);
1099*eaf62fffSJeremy L Thompson     return CEED_ERROR_SUCCESS;
1100*eaf62fffSJeremy L Thompson   } else if (op->LinearAssembleAddDiagonal) {
1101*eaf62fffSJeremy L Thompson     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1102*eaf62fffSJeremy L Thompson     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1103*eaf62fffSJeremy L Thompson     return CEED_ERROR_SUCCESS;
1104*eaf62fffSJeremy L Thompson   } else {
1105*eaf62fffSJeremy L Thompson     // Check for valid fallback resource
1106*eaf62fffSJeremy L Thompson     const char *resource, *fallback_resource;
1107*eaf62fffSJeremy L Thompson     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1108*eaf62fffSJeremy L Thompson     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1109*eaf62fffSJeremy L Thompson     CeedChk(ierr);
1110*eaf62fffSJeremy L Thompson     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1111*eaf62fffSJeremy L Thompson       // Fallback to reference Ceed
1112*eaf62fffSJeremy L Thompson       if (!op->op_fallback) {
1113*eaf62fffSJeremy L Thompson         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1114*eaf62fffSJeremy L Thompson       }
1115*eaf62fffSJeremy L Thompson       // Assemble
1116*eaf62fffSJeremy L Thompson       ierr = CeedOperatorLinearAssembleDiagonal(op->op_fallback, assembled, request);
1117*eaf62fffSJeremy L Thompson       CeedChk(ierr);
1118*eaf62fffSJeremy L Thompson       return CEED_ERROR_SUCCESS;
1119*eaf62fffSJeremy L Thompson     }
1120*eaf62fffSJeremy L Thompson   }
1121*eaf62fffSJeremy L Thompson 
1122*eaf62fffSJeremy L Thompson   // Default interface implementation
1123*eaf62fffSJeremy L Thompson   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1124*eaf62fffSJeremy L Thompson   ierr = CeedOperatorLinearAssembleAddDiagonal(op, assembled, request);
1125*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1126*eaf62fffSJeremy L Thompson   return CEED_ERROR_SUCCESS;
1127*eaf62fffSJeremy L Thompson }
1128*eaf62fffSJeremy L Thompson 
1129*eaf62fffSJeremy L Thompson /**
1130*eaf62fffSJeremy L Thompson   @brief Assemble the diagonal of a square linear CeedOperator
1131*eaf62fffSJeremy L Thompson 
1132*eaf62fffSJeremy L Thompson   This sums into a CeedVector the diagonal of a linear CeedOperator.
1133*eaf62fffSJeremy L Thompson 
1134*eaf62fffSJeremy L Thompson   Note: Currently only non-composite CeedOperators with a single field and
1135*eaf62fffSJeremy L Thompson           composite CeedOperators with single field sub-operators are supported.
1136*eaf62fffSJeremy L Thompson 
1137*eaf62fffSJeremy L Thompson   @param op              CeedOperator to assemble CeedQFunction
1138*eaf62fffSJeremy L Thompson   @param[out] assembled  CeedVector to store assembled CeedOperator diagonal
1139*eaf62fffSJeremy L Thompson   @param request         Address of CeedRequest for non-blocking completion, else
1140*eaf62fffSJeremy L Thompson                            @ref CEED_REQUEST_IMMEDIATE
1141*eaf62fffSJeremy L Thompson 
1142*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
1143*eaf62fffSJeremy L Thompson 
1144*eaf62fffSJeremy L Thompson   @ref User
1145*eaf62fffSJeremy L Thompson **/
1146*eaf62fffSJeremy L Thompson int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, CeedVector assembled,
1147*eaf62fffSJeremy L Thompson     CeedRequest *request) {
1148*eaf62fffSJeremy L Thompson   int ierr;
1149*eaf62fffSJeremy L Thompson   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1150*eaf62fffSJeremy L Thompson 
1151*eaf62fffSJeremy L Thompson   // Use backend version, if available
1152*eaf62fffSJeremy L Thompson   if (op->LinearAssembleAddDiagonal) {
1153*eaf62fffSJeremy L Thompson     ierr = op->LinearAssembleAddDiagonal(op, assembled, request); CeedChk(ierr);
1154*eaf62fffSJeremy L Thompson     return CEED_ERROR_SUCCESS;
1155*eaf62fffSJeremy L Thompson   } else {
1156*eaf62fffSJeremy L Thompson     // Check for valid fallback resource
1157*eaf62fffSJeremy L Thompson     const char *resource, *fallback_resource;
1158*eaf62fffSJeremy L Thompson     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1159*eaf62fffSJeremy L Thompson     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1160*eaf62fffSJeremy L Thompson     CeedChk(ierr);
1161*eaf62fffSJeremy L Thompson     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1162*eaf62fffSJeremy L Thompson       // Fallback to reference Ceed
1163*eaf62fffSJeremy L Thompson       if (!op->op_fallback) {
1164*eaf62fffSJeremy L Thompson         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1165*eaf62fffSJeremy L Thompson       }
1166*eaf62fffSJeremy L Thompson       // Assemble
1167*eaf62fffSJeremy L Thompson       ierr = CeedOperatorLinearAssembleAddDiagonal(op->op_fallback, assembled,
1168*eaf62fffSJeremy L Thompson              request); CeedChk(ierr);
1169*eaf62fffSJeremy L Thompson       return CEED_ERROR_SUCCESS;
1170*eaf62fffSJeremy L Thompson     }
1171*eaf62fffSJeremy L Thompson   }
1172*eaf62fffSJeremy L Thompson 
1173*eaf62fffSJeremy L Thompson   // Default interface implementation
1174*eaf62fffSJeremy L Thompson   bool is_composite;
1175*eaf62fffSJeremy L Thompson   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1176*eaf62fffSJeremy L Thompson   if (is_composite) {
1177*eaf62fffSJeremy L Thompson     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1178*eaf62fffSJeremy L Thompson            false, assembled); CeedChk(ierr);
1179*eaf62fffSJeremy L Thompson     return CEED_ERROR_SUCCESS;
1180*eaf62fffSJeremy L Thompson   } else {
1181*eaf62fffSJeremy L Thompson     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, false, assembled);
1182*eaf62fffSJeremy L Thompson     CeedChk(ierr);
1183*eaf62fffSJeremy L Thompson     return CEED_ERROR_SUCCESS;
1184*eaf62fffSJeremy L Thompson   }
1185*eaf62fffSJeremy L Thompson }
1186*eaf62fffSJeremy L Thompson 
1187*eaf62fffSJeremy L Thompson /**
1188*eaf62fffSJeremy L Thompson   @brief Assemble the point block diagonal of a square linear CeedOperator
1189*eaf62fffSJeremy L Thompson 
1190*eaf62fffSJeremy L Thompson   This overwrites a CeedVector with the point block diagonal of a linear
1191*eaf62fffSJeremy L Thompson     CeedOperator.
1192*eaf62fffSJeremy L Thompson 
1193*eaf62fffSJeremy L Thompson   Note: Currently only non-composite CeedOperators with a single field and
1194*eaf62fffSJeremy L Thompson           composite CeedOperators with single field sub-operators are supported.
1195*eaf62fffSJeremy L Thompson 
1196*eaf62fffSJeremy L Thompson   @param op              CeedOperator to assemble CeedQFunction
1197*eaf62fffSJeremy L Thompson   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1198*eaf62fffSJeremy L Thompson                            diagonal, provided in row-major form with an
1199*eaf62fffSJeremy L Thompson                            @a num_comp * @a num_comp block at each node. The dimensions
1200*eaf62fffSJeremy L Thompson                            of this vector are derived from the active vector
1201*eaf62fffSJeremy L Thompson                            for the CeedOperator. The array has shape
1202*eaf62fffSJeremy L Thompson                            [nodes, component out, component in].
1203*eaf62fffSJeremy L Thompson   @param request         Address of CeedRequest for non-blocking completion, else
1204*eaf62fffSJeremy L Thompson                            CEED_REQUEST_IMMEDIATE
1205*eaf62fffSJeremy L Thompson 
1206*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
1207*eaf62fffSJeremy L Thompson 
1208*eaf62fffSJeremy L Thompson   @ref User
1209*eaf62fffSJeremy L Thompson **/
1210*eaf62fffSJeremy L Thompson int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op,
1211*eaf62fffSJeremy L Thompson     CeedVector assembled, CeedRequest *request) {
1212*eaf62fffSJeremy L Thompson   int ierr;
1213*eaf62fffSJeremy L Thompson   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1214*eaf62fffSJeremy L Thompson 
1215*eaf62fffSJeremy L Thompson   // Use backend version, if available
1216*eaf62fffSJeremy L Thompson   if (op->LinearAssemblePointBlockDiagonal) {
1217*eaf62fffSJeremy L Thompson     ierr = op->LinearAssemblePointBlockDiagonal(op, assembled, request);
1218*eaf62fffSJeremy L Thompson     CeedChk(ierr);
1219*eaf62fffSJeremy L Thompson     return CEED_ERROR_SUCCESS;
1220*eaf62fffSJeremy L Thompson   } else if (op->LinearAssembleAddPointBlockDiagonal) {
1221*eaf62fffSJeremy L Thompson     ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1222*eaf62fffSJeremy L Thompson     ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled,
1223*eaf62fffSJeremy L Thompson            request); CeedChk(ierr);
1224*eaf62fffSJeremy L Thompson     return CEED_ERROR_SUCCESS;
1225*eaf62fffSJeremy L Thompson   } else {
1226*eaf62fffSJeremy L Thompson     // Check for valid fallback resource
1227*eaf62fffSJeremy L Thompson     const char *resource, *fallback_resource;
1228*eaf62fffSJeremy L Thompson     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1229*eaf62fffSJeremy L Thompson     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1230*eaf62fffSJeremy L Thompson     CeedChk(ierr);
1231*eaf62fffSJeremy L Thompson     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1232*eaf62fffSJeremy L Thompson       // Fallback to reference Ceed
1233*eaf62fffSJeremy L Thompson       if (!op->op_fallback) {
1234*eaf62fffSJeremy L Thompson         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1235*eaf62fffSJeremy L Thompson       }
1236*eaf62fffSJeremy L Thompson       // Assemble
1237*eaf62fffSJeremy L Thompson       ierr = CeedOperatorLinearAssemblePointBlockDiagonal(op->op_fallback,
1238*eaf62fffSJeremy L Thompson              assembled, request); CeedChk(ierr);
1239*eaf62fffSJeremy L Thompson       return CEED_ERROR_SUCCESS;
1240*eaf62fffSJeremy L Thompson     }
1241*eaf62fffSJeremy L Thompson   }
1242*eaf62fffSJeremy L Thompson 
1243*eaf62fffSJeremy L Thompson   // Default interface implementation
1244*eaf62fffSJeremy L Thompson   ierr = CeedVectorSetValue(assembled, 0.0); CeedChk(ierr);
1245*eaf62fffSJeremy L Thompson   ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op, assembled, request);
1246*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1247*eaf62fffSJeremy L Thompson   return CEED_ERROR_SUCCESS;
1248*eaf62fffSJeremy L Thompson }
1249*eaf62fffSJeremy L Thompson 
1250*eaf62fffSJeremy L Thompson /**
1251*eaf62fffSJeremy L Thompson   @brief Assemble the point block diagonal of a square linear CeedOperator
1252*eaf62fffSJeremy L Thompson 
1253*eaf62fffSJeremy L Thompson   This sums into a CeedVector with the point block diagonal of a linear
1254*eaf62fffSJeremy L Thompson     CeedOperator.
1255*eaf62fffSJeremy L Thompson 
1256*eaf62fffSJeremy L Thompson   Note: Currently only non-composite CeedOperators with a single field and
1257*eaf62fffSJeremy L Thompson           composite CeedOperators with single field sub-operators are supported.
1258*eaf62fffSJeremy L Thompson 
1259*eaf62fffSJeremy L Thompson   @param op              CeedOperator to assemble CeedQFunction
1260*eaf62fffSJeremy L Thompson   @param[out] assembled  CeedVector to store assembled CeedOperator point block
1261*eaf62fffSJeremy L Thompson                            diagonal, provided in row-major form with an
1262*eaf62fffSJeremy L Thompson                            @a num_comp * @a num_comp block at each node. The dimensions
1263*eaf62fffSJeremy L Thompson                            of this vector are derived from the active vector
1264*eaf62fffSJeremy L Thompson                            for the CeedOperator. The array has shape
1265*eaf62fffSJeremy L Thompson                            [nodes, component out, component in].
1266*eaf62fffSJeremy L Thompson   @param request         Address of CeedRequest for non-blocking completion, else
1267*eaf62fffSJeremy L Thompson                            CEED_REQUEST_IMMEDIATE
1268*eaf62fffSJeremy L Thompson 
1269*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
1270*eaf62fffSJeremy L Thompson 
1271*eaf62fffSJeremy L Thompson   @ref User
1272*eaf62fffSJeremy L Thompson **/
1273*eaf62fffSJeremy L Thompson int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op,
1274*eaf62fffSJeremy L Thompson     CeedVector assembled, CeedRequest *request) {
1275*eaf62fffSJeremy L Thompson   int ierr;
1276*eaf62fffSJeremy L Thompson   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1277*eaf62fffSJeremy L Thompson 
1278*eaf62fffSJeremy L Thompson   // Use backend version, if available
1279*eaf62fffSJeremy L Thompson   if (op->LinearAssembleAddPointBlockDiagonal) {
1280*eaf62fffSJeremy L Thompson     ierr = op->LinearAssembleAddPointBlockDiagonal(op, assembled, request);
1281*eaf62fffSJeremy L Thompson     CeedChk(ierr);
1282*eaf62fffSJeremy L Thompson     return CEED_ERROR_SUCCESS;
1283*eaf62fffSJeremy L Thompson   } else {
1284*eaf62fffSJeremy L Thompson     // Check for valid fallback resource
1285*eaf62fffSJeremy L Thompson     const char *resource, *fallback_resource;
1286*eaf62fffSJeremy L Thompson     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1287*eaf62fffSJeremy L Thompson     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1288*eaf62fffSJeremy L Thompson     CeedChk(ierr);
1289*eaf62fffSJeremy L Thompson     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1290*eaf62fffSJeremy L Thompson       // Fallback to reference Ceed
1291*eaf62fffSJeremy L Thompson       if (!op->op_fallback) {
1292*eaf62fffSJeremy L Thompson         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1293*eaf62fffSJeremy L Thompson       }
1294*eaf62fffSJeremy L Thompson       // Assemble
1295*eaf62fffSJeremy L Thompson       ierr = CeedOperatorLinearAssembleAddPointBlockDiagonal(op->op_fallback,
1296*eaf62fffSJeremy L Thompson              assembled, request); CeedChk(ierr);
1297*eaf62fffSJeremy L Thompson       return CEED_ERROR_SUCCESS;
1298*eaf62fffSJeremy L Thompson     }
1299*eaf62fffSJeremy L Thompson   }
1300*eaf62fffSJeremy L Thompson 
1301*eaf62fffSJeremy L Thompson   // Default interface implemenation
1302*eaf62fffSJeremy L Thompson   bool is_composite;
1303*eaf62fffSJeremy L Thompson   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1304*eaf62fffSJeremy L Thompson   if (is_composite) {
1305*eaf62fffSJeremy L Thompson     ierr = CeedCompositeOperatorLinearAssembleAddDiagonal(op, request,
1306*eaf62fffSJeremy L Thompson            true, assembled); CeedChk(ierr);
1307*eaf62fffSJeremy L Thompson     return CEED_ERROR_SUCCESS;
1308*eaf62fffSJeremy L Thompson   } else {
1309*eaf62fffSJeremy L Thompson     ierr = CeedSingleOperatorAssembleAddDiagonal(op, request, true, assembled);
1310*eaf62fffSJeremy L Thompson     CeedChk(ierr);
1311*eaf62fffSJeremy L Thompson     return CEED_ERROR_SUCCESS;
1312*eaf62fffSJeremy L Thompson   }
1313*eaf62fffSJeremy L Thompson }
1314*eaf62fffSJeremy L Thompson 
1315*eaf62fffSJeremy L Thompson /**
1316*eaf62fffSJeremy L Thompson    @brief Fully assemble the nonzero pattern of a linear operator.
1317*eaf62fffSJeremy L Thompson 
1318*eaf62fffSJeremy L Thompson    Expected to be used in conjunction with CeedOperatorLinearAssemble()
1319*eaf62fffSJeremy L Thompson 
1320*eaf62fffSJeremy L Thompson    The assembly routines use coordinate format, with num_entries tuples of the
1321*eaf62fffSJeremy L Thompson    form (i, j, value) which indicate that value should be added to the matrix
1322*eaf62fffSJeremy L Thompson    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1323*eaf62fffSJeremy L Thompson    This function returns the number of entries and their (i, j) locations,
1324*eaf62fffSJeremy L Thompson    while CeedOperatorLinearAssemble() provides the values in the same
1325*eaf62fffSJeremy L Thompson    ordering.
1326*eaf62fffSJeremy L Thompson 
1327*eaf62fffSJeremy L Thompson    This will generally be slow unless your operator is low-order.
1328*eaf62fffSJeremy L Thompson 
1329*eaf62fffSJeremy L Thompson    @param[in]  op           CeedOperator to assemble
1330*eaf62fffSJeremy L Thompson    @param[out] num_entries  Number of entries in coordinate nonzero pattern
1331*eaf62fffSJeremy L Thompson    @param[out] rows         Row number for each entry
1332*eaf62fffSJeremy L Thompson    @param[out] cols         Column number for each entry
1333*eaf62fffSJeremy L Thompson 
1334*eaf62fffSJeremy L Thompson    @ref User
1335*eaf62fffSJeremy L Thompson **/
1336*eaf62fffSJeremy L Thompson int CeedOperatorLinearAssembleSymbolic(CeedOperator op, CeedInt *num_entries,
1337*eaf62fffSJeremy L Thompson                                        CeedInt **rows, CeedInt **cols) {
1338*eaf62fffSJeremy L Thompson   int ierr;
1339*eaf62fffSJeremy L Thompson   CeedInt num_suboperators, single_entries;
1340*eaf62fffSJeremy L Thompson   CeedOperator *sub_operators;
1341*eaf62fffSJeremy L Thompson   bool is_composite;
1342*eaf62fffSJeremy L Thompson   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1343*eaf62fffSJeremy L Thompson 
1344*eaf62fffSJeremy L Thompson   // Use backend version, if available
1345*eaf62fffSJeremy L Thompson   if (op->LinearAssembleSymbolic) {
1346*eaf62fffSJeremy L Thompson     ierr = op->LinearAssembleSymbolic(op, num_entries, rows, cols); CeedChk(ierr);
1347*eaf62fffSJeremy L Thompson     return CEED_ERROR_SUCCESS;
1348*eaf62fffSJeremy L Thompson   } else {
1349*eaf62fffSJeremy L Thompson     // Check for valid fallback resource
1350*eaf62fffSJeremy L Thompson     const char *resource, *fallback_resource;
1351*eaf62fffSJeremy L Thompson     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1352*eaf62fffSJeremy L Thompson     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1353*eaf62fffSJeremy L Thompson     CeedChk(ierr);
1354*eaf62fffSJeremy L Thompson     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1355*eaf62fffSJeremy L Thompson       // Fallback to reference Ceed
1356*eaf62fffSJeremy L Thompson       if (!op->op_fallback) {
1357*eaf62fffSJeremy L Thompson         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1358*eaf62fffSJeremy L Thompson       }
1359*eaf62fffSJeremy L Thompson       // Assemble
1360*eaf62fffSJeremy L Thompson       ierr = CeedOperatorLinearAssembleSymbolic(op->op_fallback, num_entries, rows,
1361*eaf62fffSJeremy L Thompson              cols); CeedChk(ierr);
1362*eaf62fffSJeremy L Thompson       return CEED_ERROR_SUCCESS;
1363*eaf62fffSJeremy L Thompson     }
1364*eaf62fffSJeremy L Thompson   }
1365*eaf62fffSJeremy L Thompson 
1366*eaf62fffSJeremy L Thompson   // Default interface implementation
1367*eaf62fffSJeremy L Thompson 
1368*eaf62fffSJeremy L Thompson   // count entries and allocate rows, cols arrays
1369*eaf62fffSJeremy L Thompson   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1370*eaf62fffSJeremy L Thompson   *num_entries = 0;
1371*eaf62fffSJeremy L Thompson   if (is_composite) {
1372*eaf62fffSJeremy L Thompson     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1373*eaf62fffSJeremy L Thompson     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1374*eaf62fffSJeremy L Thompson     for (int k = 0; k < num_suboperators; ++k) {
1375*eaf62fffSJeremy L Thompson       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1376*eaf62fffSJeremy L Thompson              &single_entries); CeedChk(ierr);
1377*eaf62fffSJeremy L Thompson       *num_entries += single_entries;
1378*eaf62fffSJeremy L Thompson     }
1379*eaf62fffSJeremy L Thompson   } else {
1380*eaf62fffSJeremy L Thompson     ierr = CeedSingleOperatorAssemblyCountEntries(op,
1381*eaf62fffSJeremy L Thompson            &single_entries); CeedChk(ierr);
1382*eaf62fffSJeremy L Thompson     *num_entries += single_entries;
1383*eaf62fffSJeremy L Thompson   }
1384*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(*num_entries, rows); CeedChk(ierr);
1385*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(*num_entries, cols); CeedChk(ierr);
1386*eaf62fffSJeremy L Thompson 
1387*eaf62fffSJeremy L Thompson   // assemble nonzero locations
1388*eaf62fffSJeremy L Thompson   CeedInt offset = 0;
1389*eaf62fffSJeremy L Thompson   if (is_composite) {
1390*eaf62fffSJeremy L Thompson     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1391*eaf62fffSJeremy L Thompson     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1392*eaf62fffSJeremy L Thompson     for (int k = 0; k < num_suboperators; ++k) {
1393*eaf62fffSJeremy L Thompson       ierr = CeedSingleOperatorAssembleSymbolic(sub_operators[k], offset, *rows,
1394*eaf62fffSJeremy L Thompson              *cols); CeedChk(ierr);
1395*eaf62fffSJeremy L Thompson       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1396*eaf62fffSJeremy L Thompson              &single_entries);
1397*eaf62fffSJeremy L Thompson       CeedChk(ierr);
1398*eaf62fffSJeremy L Thompson       offset += single_entries;
1399*eaf62fffSJeremy L Thompson     }
1400*eaf62fffSJeremy L Thompson   } else {
1401*eaf62fffSJeremy L Thompson     ierr = CeedSingleOperatorAssembleSymbolic(op, offset, *rows, *cols);
1402*eaf62fffSJeremy L Thompson     CeedChk(ierr);
1403*eaf62fffSJeremy L Thompson   }
1404*eaf62fffSJeremy L Thompson 
1405*eaf62fffSJeremy L Thompson   return CEED_ERROR_SUCCESS;
1406*eaf62fffSJeremy L Thompson }
1407*eaf62fffSJeremy L Thompson 
1408*eaf62fffSJeremy L Thompson /**
1409*eaf62fffSJeremy L Thompson    @brief Fully assemble the nonzero entries of a linear operator.
1410*eaf62fffSJeremy L Thompson 
1411*eaf62fffSJeremy L Thompson    Expected to be used in conjunction with CeedOperatorLinearAssembleSymbolic()
1412*eaf62fffSJeremy L Thompson 
1413*eaf62fffSJeremy L Thompson    The assembly routines use coordinate format, with num_entries tuples of the
1414*eaf62fffSJeremy L Thompson    form (i, j, value) which indicate that value should be added to the matrix
1415*eaf62fffSJeremy L Thompson    in entry (i, j). Note that the (i, j) pairs are not unique and may repeat.
1416*eaf62fffSJeremy L Thompson    This function returns the values of the nonzero entries to be added, their
1417*eaf62fffSJeremy L Thompson    (i, j) locations are provided by CeedOperatorLinearAssembleSymbolic()
1418*eaf62fffSJeremy L Thompson 
1419*eaf62fffSJeremy L Thompson    This will generally be slow unless your operator is low-order.
1420*eaf62fffSJeremy L Thompson 
1421*eaf62fffSJeremy L Thompson    @param[in]  op      CeedOperator to assemble
1422*eaf62fffSJeremy L Thompson    @param[out] values  Values to assemble into matrix
1423*eaf62fffSJeremy L Thompson 
1424*eaf62fffSJeremy L Thompson    @ref User
1425*eaf62fffSJeremy L Thompson **/
1426*eaf62fffSJeremy L Thompson int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values) {
1427*eaf62fffSJeremy L Thompson   int ierr;
1428*eaf62fffSJeremy L Thompson   CeedInt num_suboperators, single_entries = 0;
1429*eaf62fffSJeremy L Thompson   CeedOperator *sub_operators;
1430*eaf62fffSJeremy L Thompson   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1431*eaf62fffSJeremy L Thompson 
1432*eaf62fffSJeremy L Thompson   // Use backend version, if available
1433*eaf62fffSJeremy L Thompson   if (op->LinearAssemble) {
1434*eaf62fffSJeremy L Thompson     ierr = op->LinearAssemble(op, values); CeedChk(ierr);
1435*eaf62fffSJeremy L Thompson     return CEED_ERROR_SUCCESS;
1436*eaf62fffSJeremy L Thompson   } else {
1437*eaf62fffSJeremy L Thompson     // Check for valid fallback resource
1438*eaf62fffSJeremy L Thompson     const char *resource, *fallback_resource;
1439*eaf62fffSJeremy L Thompson     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1440*eaf62fffSJeremy L Thompson     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1441*eaf62fffSJeremy L Thompson     CeedChk(ierr);
1442*eaf62fffSJeremy L Thompson     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1443*eaf62fffSJeremy L Thompson       // Fallback to reference Ceed
1444*eaf62fffSJeremy L Thompson       if (!op->op_fallback) {
1445*eaf62fffSJeremy L Thompson         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1446*eaf62fffSJeremy L Thompson       }
1447*eaf62fffSJeremy L Thompson       // Assemble
1448*eaf62fffSJeremy L Thompson       ierr = CeedOperatorLinearAssemble(op->op_fallback, values); CeedChk(ierr);
1449*eaf62fffSJeremy L Thompson       return CEED_ERROR_SUCCESS;
1450*eaf62fffSJeremy L Thompson     }
1451*eaf62fffSJeremy L Thompson   }
1452*eaf62fffSJeremy L Thompson 
1453*eaf62fffSJeremy L Thompson   // Default interface implementation
1454*eaf62fffSJeremy L Thompson   bool is_composite;
1455*eaf62fffSJeremy L Thompson   ierr = CeedOperatorIsComposite(op, &is_composite); CeedChk(ierr);
1456*eaf62fffSJeremy L Thompson 
1457*eaf62fffSJeremy L Thompson   CeedInt offset = 0;
1458*eaf62fffSJeremy L Thompson   if (is_composite) {
1459*eaf62fffSJeremy L Thompson     ierr = CeedOperatorGetNumSub(op, &num_suboperators); CeedChk(ierr);
1460*eaf62fffSJeremy L Thompson     ierr = CeedOperatorGetSubList(op, &sub_operators); CeedChk(ierr);
1461*eaf62fffSJeremy L Thompson     for (int k = 0; k < num_suboperators; ++k) {
1462*eaf62fffSJeremy L Thompson       ierr = CeedSingleOperatorAssemble(sub_operators[k], offset, values);
1463*eaf62fffSJeremy L Thompson       CeedChk(ierr);
1464*eaf62fffSJeremy L Thompson       ierr = CeedSingleOperatorAssemblyCountEntries(sub_operators[k],
1465*eaf62fffSJeremy L Thompson              &single_entries);
1466*eaf62fffSJeremy L Thompson       CeedChk(ierr);
1467*eaf62fffSJeremy L Thompson       offset += single_entries;
1468*eaf62fffSJeremy L Thompson     }
1469*eaf62fffSJeremy L Thompson   } else {
1470*eaf62fffSJeremy L Thompson     ierr = CeedSingleOperatorAssemble(op, offset, values); CeedChk(ierr);
1471*eaf62fffSJeremy L Thompson   }
1472*eaf62fffSJeremy L Thompson 
1473*eaf62fffSJeremy L Thompson   return CEED_ERROR_SUCCESS;
1474*eaf62fffSJeremy L Thompson }
1475*eaf62fffSJeremy L Thompson 
1476*eaf62fffSJeremy L Thompson /**
1477*eaf62fffSJeremy L Thompson   @brief Create a multigrid coarse operator and level transfer operators
1478*eaf62fffSJeremy L Thompson            for a CeedOperator, creating the prolongation basis from the
1479*eaf62fffSJeremy L Thompson            fine and coarse grid interpolation
1480*eaf62fffSJeremy L Thompson 
1481*eaf62fffSJeremy L Thompson   @param[in] op_fine       Fine grid operator
1482*eaf62fffSJeremy L Thompson   @param[in] p_mult_fine   L-vector multiplicity in parallel gather/scatter
1483*eaf62fffSJeremy L Thompson   @param[in] rstr_coarse   Coarse grid restriction
1484*eaf62fffSJeremy L Thompson   @param[in] basis_coarse  Coarse grid active vector basis
1485*eaf62fffSJeremy L Thompson   @param[out] op_coarse    Coarse grid operator
1486*eaf62fffSJeremy L Thompson   @param[out] op_prolong   Coarse to fine operator
1487*eaf62fffSJeremy L Thompson   @param[out] op_restrict  Fine to coarse operator
1488*eaf62fffSJeremy L Thompson 
1489*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
1490*eaf62fffSJeremy L Thompson 
1491*eaf62fffSJeremy L Thompson   @ref User
1492*eaf62fffSJeremy L Thompson **/
1493*eaf62fffSJeremy L Thompson int CeedOperatorMultigridLevelCreate(CeedOperator op_fine,
1494*eaf62fffSJeremy L Thompson                                      CeedVector p_mult_fine,
1495*eaf62fffSJeremy L Thompson                                      CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1496*eaf62fffSJeremy L Thompson                                      CeedOperator *op_coarse, CeedOperator *op_prolong,
1497*eaf62fffSJeremy L Thompson                                      CeedOperator *op_restrict) {
1498*eaf62fffSJeremy L Thompson   int ierr;
1499*eaf62fffSJeremy L Thompson   Ceed ceed;
1500*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1501*eaf62fffSJeremy L Thompson 
1502*eaf62fffSJeremy L Thompson   // Check for compatible quadrature spaces
1503*eaf62fffSJeremy L Thompson   CeedBasis basis_fine;
1504*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1505*eaf62fffSJeremy L Thompson   CeedInt Q_f, Q_c;
1506*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1507*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1508*eaf62fffSJeremy L Thompson   if (Q_f != Q_c)
1509*eaf62fffSJeremy L Thompson     // LCOV_EXCL_START
1510*eaf62fffSJeremy L Thompson     return CeedError(ceed, CEED_ERROR_DIMENSION,
1511*eaf62fffSJeremy L Thompson                      "Bases must have compatible quadrature spaces");
1512*eaf62fffSJeremy L Thompson   // LCOV_EXCL_STOP
1513*eaf62fffSJeremy L Thompson 
1514*eaf62fffSJeremy L Thompson   // Coarse to fine basis
1515*eaf62fffSJeremy L Thompson   CeedInt P_f, P_c, Q = Q_f;
1516*eaf62fffSJeremy L Thompson   bool is_tensor_f, is_tensor_c;
1517*eaf62fffSJeremy L Thompson   ierr = CeedBasisIsTensor(basis_fine, &is_tensor_f); CeedChk(ierr);
1518*eaf62fffSJeremy L Thompson   ierr = CeedBasisIsTensor(basis_coarse, &is_tensor_c); CeedChk(ierr);
1519*eaf62fffSJeremy L Thompson   CeedScalar *interp_c, *interp_f, *interp_c_to_f, *tau;
1520*eaf62fffSJeremy L Thompson   if (is_tensor_f && is_tensor_c) {
1521*eaf62fffSJeremy L Thompson     ierr = CeedBasisGetNumNodes1D(basis_fine, &P_f); CeedChk(ierr);
1522*eaf62fffSJeremy L Thompson     ierr = CeedBasisGetNumNodes1D(basis_coarse, &P_c); CeedChk(ierr);
1523*eaf62fffSJeremy L Thompson     ierr = CeedBasisGetNumQuadraturePoints1D(basis_coarse, &Q); CeedChk(ierr);
1524*eaf62fffSJeremy L Thompson   } else if (!is_tensor_f && !is_tensor_c) {
1525*eaf62fffSJeremy L Thompson     ierr = CeedBasisGetNumNodes(basis_fine, &P_f); CeedChk(ierr);
1526*eaf62fffSJeremy L Thompson     ierr = CeedBasisGetNumNodes(basis_coarse, &P_c); CeedChk(ierr);
1527*eaf62fffSJeremy L Thompson   } else {
1528*eaf62fffSJeremy L Thompson     // LCOV_EXCL_START
1529*eaf62fffSJeremy L Thompson     return CeedError(ceed, CEED_ERROR_MINOR,
1530*eaf62fffSJeremy L Thompson                      "Bases must both be tensor or non-tensor");
1531*eaf62fffSJeremy L Thompson     // LCOV_EXCL_STOP
1532*eaf62fffSJeremy L Thompson   }
1533*eaf62fffSJeremy L Thompson 
1534*eaf62fffSJeremy L Thompson   ierr = CeedMalloc(Q*P_f, &interp_f); CeedChk(ierr);
1535*eaf62fffSJeremy L Thompson   ierr = CeedMalloc(Q*P_c, &interp_c); CeedChk(ierr);
1536*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(P_c*P_f, &interp_c_to_f); CeedChk(ierr);
1537*eaf62fffSJeremy L Thompson   ierr = CeedMalloc(Q, &tau); CeedChk(ierr);
1538*eaf62fffSJeremy L Thompson   if (is_tensor_f) {
1539*eaf62fffSJeremy L Thompson     memcpy(interp_f, basis_fine->interp_1d, Q*P_f*sizeof basis_fine->interp_1d[0]);
1540*eaf62fffSJeremy L Thompson     memcpy(interp_c, basis_coarse->interp_1d,
1541*eaf62fffSJeremy L Thompson            Q*P_c*sizeof basis_coarse->interp_1d[0]);
1542*eaf62fffSJeremy L Thompson   } else {
1543*eaf62fffSJeremy L Thompson     memcpy(interp_f, basis_fine->interp, Q*P_f*sizeof basis_fine->interp[0]);
1544*eaf62fffSJeremy L Thompson     memcpy(interp_c, basis_coarse->interp, Q*P_c*sizeof basis_coarse->interp[0]);
1545*eaf62fffSJeremy L Thompson   }
1546*eaf62fffSJeremy L Thompson 
1547*eaf62fffSJeremy L Thompson   // -- QR Factorization, interp_f = Q R
1548*eaf62fffSJeremy L Thompson   ierr = CeedQRFactorization(ceed, interp_f, tau, Q, P_f); CeedChk(ierr);
1549*eaf62fffSJeremy L Thompson 
1550*eaf62fffSJeremy L Thompson   // -- Apply Qtranspose, interp_c = Qtranspose interp_c
1551*eaf62fffSJeremy L Thompson   ierr = CeedHouseholderApplyQ(interp_c, interp_f, tau, CEED_TRANSPOSE,
1552*eaf62fffSJeremy L Thompson                                Q, P_c, P_f, P_c, 1); CeedChk(ierr);
1553*eaf62fffSJeremy L Thompson 
1554*eaf62fffSJeremy L Thompson   // -- Apply Rinv, interp_c_to_f = Rinv interp_c
1555*eaf62fffSJeremy L Thompson   for (CeedInt j=0; j<P_c; j++) { // Column j
1556*eaf62fffSJeremy L Thompson     interp_c_to_f[j+P_c*(P_f-1)] = interp_c[j+P_c*(P_f-1)]/interp_f[P_f*P_f-1];
1557*eaf62fffSJeremy L Thompson     for (CeedInt i=P_f-2; i>=0; i--) { // Row i
1558*eaf62fffSJeremy L Thompson       interp_c_to_f[j+P_c*i] = interp_c[j+P_c*i];
1559*eaf62fffSJeremy L Thompson       for (CeedInt k=i+1; k<P_f; k++)
1560*eaf62fffSJeremy L Thompson         interp_c_to_f[j+P_c*i] -= interp_f[k+P_f*i]*interp_c_to_f[j+P_c*k];
1561*eaf62fffSJeremy L Thompson       interp_c_to_f[j+P_c*i] /= interp_f[i+P_f*i];
1562*eaf62fffSJeremy L Thompson     }
1563*eaf62fffSJeremy L Thompson   }
1564*eaf62fffSJeremy L Thompson   ierr = CeedFree(&tau); CeedChk(ierr);
1565*eaf62fffSJeremy L Thompson   ierr = CeedFree(&interp_c); CeedChk(ierr);
1566*eaf62fffSJeremy L Thompson   ierr = CeedFree(&interp_f); CeedChk(ierr);
1567*eaf62fffSJeremy L Thompson 
1568*eaf62fffSJeremy L Thompson   // Complete with interp_c_to_f versions of code
1569*eaf62fffSJeremy L Thompson   if (is_tensor_f) {
1570*eaf62fffSJeremy L Thompson     ierr = CeedOperatorMultigridLevelCreateTensorH1(op_fine, p_mult_fine,
1571*eaf62fffSJeremy L Thompson            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
1572*eaf62fffSJeremy L Thompson     CeedChk(ierr);
1573*eaf62fffSJeremy L Thompson   } else {
1574*eaf62fffSJeremy L Thompson     ierr = CeedOperatorMultigridLevelCreateH1(op_fine, p_mult_fine,
1575*eaf62fffSJeremy L Thompson            rstr_coarse, basis_coarse, interp_c_to_f, op_coarse, op_prolong, op_restrict);
1576*eaf62fffSJeremy L Thompson     CeedChk(ierr);
1577*eaf62fffSJeremy L Thompson   }
1578*eaf62fffSJeremy L Thompson 
1579*eaf62fffSJeremy L Thompson   // Cleanup
1580*eaf62fffSJeremy L Thompson   ierr = CeedFree(&interp_c_to_f); CeedChk(ierr);
1581*eaf62fffSJeremy L Thompson   return CEED_ERROR_SUCCESS;
1582*eaf62fffSJeremy L Thompson }
1583*eaf62fffSJeremy L Thompson 
1584*eaf62fffSJeremy L Thompson /**
1585*eaf62fffSJeremy L Thompson   @brief Create a multigrid coarse operator and level transfer operators
1586*eaf62fffSJeremy L Thompson            for a CeedOperator with a tensor basis for the active basis
1587*eaf62fffSJeremy L Thompson 
1588*eaf62fffSJeremy L Thompson   @param[in] op_fine        Fine grid operator
1589*eaf62fffSJeremy L Thompson   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1590*eaf62fffSJeremy L Thompson   @param[in] rstr_coarse    Coarse grid restriction
1591*eaf62fffSJeremy L Thompson   @param[in] basis_coarse   Coarse grid active vector basis
1592*eaf62fffSJeremy L Thompson   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1593*eaf62fffSJeremy L Thompson   @param[out] op_coarse     Coarse grid operator
1594*eaf62fffSJeremy L Thompson   @param[out] op_prolong    Coarse to fine operator
1595*eaf62fffSJeremy L Thompson   @param[out] op_restrict   Fine to coarse operator
1596*eaf62fffSJeremy L Thompson 
1597*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
1598*eaf62fffSJeremy L Thompson 
1599*eaf62fffSJeremy L Thompson   @ref User
1600*eaf62fffSJeremy L Thompson **/
1601*eaf62fffSJeremy L Thompson int CeedOperatorMultigridLevelCreateTensorH1(CeedOperator op_fine,
1602*eaf62fffSJeremy L Thompson     CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse,
1603*eaf62fffSJeremy L Thompson     const CeedScalar *interp_c_to_f, CeedOperator *op_coarse,
1604*eaf62fffSJeremy L Thompson     CeedOperator *op_prolong, CeedOperator *op_restrict) {
1605*eaf62fffSJeremy L Thompson   int ierr;
1606*eaf62fffSJeremy L Thompson   Ceed ceed;
1607*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1608*eaf62fffSJeremy L Thompson 
1609*eaf62fffSJeremy L Thompson   // Check for compatible quadrature spaces
1610*eaf62fffSJeremy L Thompson   CeedBasis basis_fine;
1611*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1612*eaf62fffSJeremy L Thompson   CeedInt Q_f, Q_c;
1613*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1614*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1615*eaf62fffSJeremy L Thompson   if (Q_f != Q_c)
1616*eaf62fffSJeremy L Thompson     // LCOV_EXCL_START
1617*eaf62fffSJeremy L Thompson     return CeedError(ceed, CEED_ERROR_DIMENSION,
1618*eaf62fffSJeremy L Thompson                      "Bases must have compatible quadrature spaces");
1619*eaf62fffSJeremy L Thompson   // LCOV_EXCL_STOP
1620*eaf62fffSJeremy L Thompson 
1621*eaf62fffSJeremy L Thompson   // Coarse to fine basis
1622*eaf62fffSJeremy L Thompson   CeedInt dim, num_comp, num_nodes_c, P_1d_f, P_1d_c;
1623*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
1624*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
1625*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumNodes1D(basis_fine, &P_1d_f); CeedChk(ierr);
1626*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
1627*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1628*eaf62fffSJeremy L Thompson   P_1d_c = dim == 1 ? num_nodes_c :
1629*eaf62fffSJeremy L Thompson            dim == 2 ? sqrt(num_nodes_c) :
1630*eaf62fffSJeremy L Thompson            cbrt(num_nodes_c);
1631*eaf62fffSJeremy L Thompson   CeedScalar *q_ref, *q_weight, *grad;
1632*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(P_1d_f, &q_ref); CeedChk(ierr);
1633*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(P_1d_f, &q_weight); CeedChk(ierr);
1634*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(P_1d_f*P_1d_c*dim, &grad); CeedChk(ierr);
1635*eaf62fffSJeremy L Thompson   CeedBasis basis_c_to_f;
1636*eaf62fffSJeremy L Thompson   ierr = CeedBasisCreateTensorH1(ceed, dim, num_comp, P_1d_c, P_1d_f,
1637*eaf62fffSJeremy L Thompson                                  interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
1638*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1639*eaf62fffSJeremy L Thompson   ierr = CeedFree(&q_ref); CeedChk(ierr);
1640*eaf62fffSJeremy L Thompson   ierr = CeedFree(&q_weight); CeedChk(ierr);
1641*eaf62fffSJeremy L Thompson   ierr = CeedFree(&grad); CeedChk(ierr);
1642*eaf62fffSJeremy L Thompson 
1643*eaf62fffSJeremy L Thompson   // Core code
1644*eaf62fffSJeremy L Thompson   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
1645*eaf62fffSJeremy L Thompson                                           basis_coarse, basis_c_to_f, op_coarse,
1646*eaf62fffSJeremy L Thompson                                           op_prolong, op_restrict);
1647*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1648*eaf62fffSJeremy L Thompson   return CEED_ERROR_SUCCESS;
1649*eaf62fffSJeremy L Thompson }
1650*eaf62fffSJeremy L Thompson 
1651*eaf62fffSJeremy L Thompson /**
1652*eaf62fffSJeremy L Thompson   @brief Create a multigrid coarse operator and level transfer operators
1653*eaf62fffSJeremy L Thompson            for a CeedOperator with a non-tensor basis for the active vector
1654*eaf62fffSJeremy L Thompson 
1655*eaf62fffSJeremy L Thompson   @param[in] op_fine        Fine grid operator
1656*eaf62fffSJeremy L Thompson   @param[in] p_mult_fine    L-vector multiplicity in parallel gather/scatter
1657*eaf62fffSJeremy L Thompson   @param[in] rstr_coarse    Coarse grid restriction
1658*eaf62fffSJeremy L Thompson   @param[in] basis_coarse   Coarse grid active vector basis
1659*eaf62fffSJeremy L Thompson   @param[in] interp_c_to_f  Matrix for coarse to fine interpolation
1660*eaf62fffSJeremy L Thompson   @param[out] op_coarse     Coarse grid operator
1661*eaf62fffSJeremy L Thompson   @param[out] op_prolong    Coarse to fine operator
1662*eaf62fffSJeremy L Thompson   @param[out] op_restrict   Fine to coarse operator
1663*eaf62fffSJeremy L Thompson 
1664*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
1665*eaf62fffSJeremy L Thompson 
1666*eaf62fffSJeremy L Thompson   @ref User
1667*eaf62fffSJeremy L Thompson **/
1668*eaf62fffSJeremy L Thompson int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine,
1669*eaf62fffSJeremy L Thompson                                        CeedVector p_mult_fine,
1670*eaf62fffSJeremy L Thompson                                        CeedElemRestriction rstr_coarse,
1671*eaf62fffSJeremy L Thompson                                        CeedBasis basis_coarse,
1672*eaf62fffSJeremy L Thompson                                        const CeedScalar *interp_c_to_f,
1673*eaf62fffSJeremy L Thompson                                        CeedOperator *op_coarse,
1674*eaf62fffSJeremy L Thompson                                        CeedOperator *op_prolong,
1675*eaf62fffSJeremy L Thompson                                        CeedOperator *op_restrict) {
1676*eaf62fffSJeremy L Thompson   int ierr;
1677*eaf62fffSJeremy L Thompson   Ceed ceed;
1678*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetCeed(op_fine, &ceed); CeedChk(ierr);
1679*eaf62fffSJeremy L Thompson 
1680*eaf62fffSJeremy L Thompson   // Check for compatible quadrature spaces
1681*eaf62fffSJeremy L Thompson   CeedBasis basis_fine;
1682*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetActiveBasis(op_fine, &basis_fine); CeedChk(ierr);
1683*eaf62fffSJeremy L Thompson   CeedInt Q_f, Q_c;
1684*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumQuadraturePoints(basis_fine, &Q_f); CeedChk(ierr);
1685*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumQuadraturePoints(basis_coarse, &Q_c); CeedChk(ierr);
1686*eaf62fffSJeremy L Thompson   if (Q_f != Q_c)
1687*eaf62fffSJeremy L Thompson     // LCOV_EXCL_START
1688*eaf62fffSJeremy L Thompson     return CeedError(ceed, CEED_ERROR_DIMENSION,
1689*eaf62fffSJeremy L Thompson                      "Bases must have compatible quadrature spaces");
1690*eaf62fffSJeremy L Thompson   // LCOV_EXCL_STOP
1691*eaf62fffSJeremy L Thompson 
1692*eaf62fffSJeremy L Thompson   // Coarse to fine basis
1693*eaf62fffSJeremy L Thompson   CeedElemTopology topo;
1694*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetTopology(basis_fine, &topo); CeedChk(ierr);
1695*eaf62fffSJeremy L Thompson   CeedInt dim, num_comp, num_nodes_c, num_nodes_f;
1696*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetDimension(basis_fine, &dim); CeedChk(ierr);
1697*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumComponents(basis_fine, &num_comp); CeedChk(ierr);
1698*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumNodes(basis_fine, &num_nodes_f); CeedChk(ierr);
1699*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetElementSize(rstr_coarse, &num_nodes_c);
1700*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1701*eaf62fffSJeremy L Thompson   CeedScalar *q_ref, *q_weight, *grad;
1702*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(num_nodes_f, &q_ref); CeedChk(ierr);
1703*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(num_nodes_f, &q_weight); CeedChk(ierr);
1704*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(num_nodes_f*num_nodes_c*dim, &grad); CeedChk(ierr);
1705*eaf62fffSJeremy L Thompson   CeedBasis basis_c_to_f;
1706*eaf62fffSJeremy L Thompson   ierr = CeedBasisCreateH1(ceed, topo, num_comp, num_nodes_c, num_nodes_f,
1707*eaf62fffSJeremy L Thompson                            interp_c_to_f, grad, q_ref, q_weight, &basis_c_to_f);
1708*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1709*eaf62fffSJeremy L Thompson   ierr = CeedFree(&q_ref); CeedChk(ierr);
1710*eaf62fffSJeremy L Thompson   ierr = CeedFree(&q_weight); CeedChk(ierr);
1711*eaf62fffSJeremy L Thompson   ierr = CeedFree(&grad); CeedChk(ierr);
1712*eaf62fffSJeremy L Thompson 
1713*eaf62fffSJeremy L Thompson   // Core code
1714*eaf62fffSJeremy L Thompson   ierr = CeedSingleOperatorMultigridLevel(op_fine, p_mult_fine, rstr_coarse,
1715*eaf62fffSJeremy L Thompson                                           basis_coarse, basis_c_to_f, op_coarse,
1716*eaf62fffSJeremy L Thompson                                           op_prolong, op_restrict);
1717*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1718*eaf62fffSJeremy L Thompson   return CEED_ERROR_SUCCESS;
1719*eaf62fffSJeremy L Thompson }
1720*eaf62fffSJeremy L Thompson 
1721*eaf62fffSJeremy L Thompson /**
1722*eaf62fffSJeremy L Thompson   @brief Build a FDM based approximate inverse for each element for a
1723*eaf62fffSJeremy L Thompson            CeedOperator
1724*eaf62fffSJeremy L Thompson 
1725*eaf62fffSJeremy L Thompson   This returns a CeedOperator and CeedVector to apply a Fast Diagonalization
1726*eaf62fffSJeremy L Thompson     Method based approximate inverse. This function obtains the simultaneous
1727*eaf62fffSJeremy L Thompson     diagonalization for the 1D mass and Laplacian operators,
1728*eaf62fffSJeremy L Thompson       M = V^T V, K = V^T S V.
1729*eaf62fffSJeremy L Thompson     The assembled QFunction is used to modify the eigenvalues from simultaneous
1730*eaf62fffSJeremy L Thompson     diagonalization and obtain an approximate inverse of the form
1731*eaf62fffSJeremy L Thompson       V^T S^hat V. The CeedOperator must be linear and non-composite. The
1732*eaf62fffSJeremy L Thompson     associated CeedQFunction must therefore also be linear.
1733*eaf62fffSJeremy L Thompson 
1734*eaf62fffSJeremy L Thompson   @param op            CeedOperator to create element inverses
1735*eaf62fffSJeremy L Thompson   @param[out] fdm_inv  CeedOperator to apply the action of a FDM based inverse
1736*eaf62fffSJeremy L Thompson                          for each element
1737*eaf62fffSJeremy L Thompson   @param request       Address of CeedRequest for non-blocking completion, else
1738*eaf62fffSJeremy L Thompson                          @ref CEED_REQUEST_IMMEDIATE
1739*eaf62fffSJeremy L Thompson 
1740*eaf62fffSJeremy L Thompson   @return An error code: 0 - success, otherwise - failure
1741*eaf62fffSJeremy L Thompson 
1742*eaf62fffSJeremy L Thompson   @ref Backend
1743*eaf62fffSJeremy L Thompson **/
1744*eaf62fffSJeremy L Thompson int CeedOperatorCreateFDMElementInverse(CeedOperator op, CeedOperator *fdm_inv,
1745*eaf62fffSJeremy L Thompson                                         CeedRequest *request) {
1746*eaf62fffSJeremy L Thompson   int ierr;
1747*eaf62fffSJeremy L Thompson   ierr = CeedOperatorCheckReady(op); CeedChk(ierr);
1748*eaf62fffSJeremy L Thompson 
1749*eaf62fffSJeremy L Thompson   // Use backend version, if available
1750*eaf62fffSJeremy L Thompson   if (op->CreateFDMElementInverse) {
1751*eaf62fffSJeremy L Thompson     ierr = op->CreateFDMElementInverse(op, fdm_inv, request); CeedChk(ierr);
1752*eaf62fffSJeremy L Thompson     return CEED_ERROR_SUCCESS;
1753*eaf62fffSJeremy L Thompson   } else {
1754*eaf62fffSJeremy L Thompson     // Check for valid fallback resource
1755*eaf62fffSJeremy L Thompson     const char *resource, *fallback_resource;
1756*eaf62fffSJeremy L Thompson     ierr = CeedGetResource(op->ceed, &resource); CeedChk(ierr);
1757*eaf62fffSJeremy L Thompson     ierr = CeedGetOperatorFallbackResource(op->ceed, &fallback_resource);
1758*eaf62fffSJeremy L Thompson     CeedChk(ierr);
1759*eaf62fffSJeremy L Thompson     if (strcmp(fallback_resource, "") && strcmp(resource, fallback_resource)) {
1760*eaf62fffSJeremy L Thompson       // Fallback to reference Ceed
1761*eaf62fffSJeremy L Thompson       if (!op->op_fallback) {
1762*eaf62fffSJeremy L Thompson         ierr = CeedOperatorCreateFallback(op); CeedChk(ierr);
1763*eaf62fffSJeremy L Thompson       }
1764*eaf62fffSJeremy L Thompson       // Assemble
1765*eaf62fffSJeremy L Thompson       ierr = CeedOperatorCreateFDMElementInverse(op->op_fallback, fdm_inv, request);
1766*eaf62fffSJeremy L Thompson       CeedChk(ierr);
1767*eaf62fffSJeremy L Thompson       return CEED_ERROR_SUCCESS;
1768*eaf62fffSJeremy L Thompson     }
1769*eaf62fffSJeremy L Thompson   }
1770*eaf62fffSJeremy L Thompson 
1771*eaf62fffSJeremy L Thompson   // Interface implementation
1772*eaf62fffSJeremy L Thompson   Ceed ceed, ceed_parent;
1773*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
1774*eaf62fffSJeremy L Thompson   ierr = CeedGetOperatorFallbackParentCeed(ceed, &ceed_parent); CeedChk(ierr);
1775*eaf62fffSJeremy L Thompson   ceed_parent = ceed_parent ? ceed_parent : ceed;
1776*eaf62fffSJeremy L Thompson   CeedQFunction qf;
1777*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
1778*eaf62fffSJeremy L Thompson 
1779*eaf62fffSJeremy L Thompson   // Determine active input basis
1780*eaf62fffSJeremy L Thompson   bool interp = false, grad = false;
1781*eaf62fffSJeremy L Thompson   CeedBasis basis = NULL;
1782*eaf62fffSJeremy L Thompson   CeedElemRestriction rstr = NULL;
1783*eaf62fffSJeremy L Thompson   CeedOperatorField *op_fields;
1784*eaf62fffSJeremy L Thompson   CeedQFunctionField *qf_fields;
1785*eaf62fffSJeremy L Thompson   ierr = CeedOperatorGetFields(op, &op_fields, NULL); CeedChk(ierr);
1786*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL); CeedChk(ierr);
1787*eaf62fffSJeremy L Thompson   CeedInt num_input_fields;
1788*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionGetNumArgs(qf, &num_input_fields, NULL); CeedChk(ierr);
1789*eaf62fffSJeremy L Thompson   for (CeedInt i=0; i<num_input_fields; i++) {
1790*eaf62fffSJeremy L Thompson     CeedVector vec;
1791*eaf62fffSJeremy L Thompson     ierr = CeedOperatorFieldGetVector(op_fields[i], &vec); CeedChk(ierr);
1792*eaf62fffSJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
1793*eaf62fffSJeremy L Thompson       CeedEvalMode eval_mode;
1794*eaf62fffSJeremy L Thompson       ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode); CeedChk(ierr);
1795*eaf62fffSJeremy L Thompson       interp = interp || eval_mode == CEED_EVAL_INTERP;
1796*eaf62fffSJeremy L Thompson       grad = grad || eval_mode == CEED_EVAL_GRAD;
1797*eaf62fffSJeremy L Thompson       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChk(ierr);
1798*eaf62fffSJeremy L Thompson       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &rstr); CeedChk(ierr);
1799*eaf62fffSJeremy L Thompson     }
1800*eaf62fffSJeremy L Thompson   }
1801*eaf62fffSJeremy L Thompson   if (!basis)
1802*eaf62fffSJeremy L Thompson     // LCOV_EXCL_START
1803*eaf62fffSJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "No active field set");
1804*eaf62fffSJeremy L Thompson   // LCOV_EXCL_STOP
1805*eaf62fffSJeremy L Thompson   CeedInt P_1d, Q_1d, elem_size, num_qpts, dim, num_comp = 1, num_elem = 1,
1806*eaf62fffSJeremy L Thompson                                                 l_size = 1;
1807*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumNodes1D(basis, &P_1d); CeedChk(ierr);
1808*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumNodes(basis, &elem_size); CeedChk(ierr);
1809*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChk(ierr);
1810*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChk(ierr);
1811*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
1812*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetNumComponents(basis, &num_comp); CeedChk(ierr);
1813*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetNumElements(rstr, &num_elem); CeedChk(ierr);
1814*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetLVectorSize(rstr, &l_size); CeedChk(ierr);
1815*eaf62fffSJeremy L Thompson 
1816*eaf62fffSJeremy L Thompson   // Build and diagonalize 1D Mass and Laplacian
1817*eaf62fffSJeremy L Thompson   bool tensor_basis;
1818*eaf62fffSJeremy L Thompson   ierr = CeedBasisIsTensor(basis, &tensor_basis); CeedChk(ierr);
1819*eaf62fffSJeremy L Thompson   if (!tensor_basis)
1820*eaf62fffSJeremy L Thompson     // LCOV_EXCL_START
1821*eaf62fffSJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
1822*eaf62fffSJeremy L Thompson                      "FDMElementInverse only supported for tensor "
1823*eaf62fffSJeremy L Thompson                      "bases");
1824*eaf62fffSJeremy L Thompson   // LCOV_EXCL_STOP
1825*eaf62fffSJeremy L Thompson   CeedScalar *mass, *laplace, *x, *fdm_interp, *lambda;
1826*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(P_1d*P_1d, &mass); CeedChk(ierr);
1827*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(P_1d*P_1d, &laplace); CeedChk(ierr);
1828*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(P_1d*P_1d, &x); CeedChk(ierr);
1829*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(P_1d*P_1d, &fdm_interp); CeedChk(ierr);
1830*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(P_1d, &lambda); CeedChk(ierr);
1831*eaf62fffSJeremy L Thompson   // -- Build matrices
1832*eaf62fffSJeremy L Thompson   const CeedScalar *interp_1d, *grad_1d, *q_weight_1d;
1833*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetInterp1D(basis, &interp_1d); CeedChk(ierr);
1834*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetGrad1D(basis, &grad_1d); CeedChk(ierr);
1835*eaf62fffSJeremy L Thompson   ierr = CeedBasisGetQWeights(basis, &q_weight_1d); CeedChk(ierr);
1836*eaf62fffSJeremy L Thompson   ierr = CeedBuildMassLaplace(interp_1d, grad_1d, q_weight_1d, P_1d, Q_1d, dim,
1837*eaf62fffSJeremy L Thompson                               mass, laplace); CeedChk(ierr);
1838*eaf62fffSJeremy L Thompson 
1839*eaf62fffSJeremy L Thompson   // -- Diagonalize
1840*eaf62fffSJeremy L Thompson   ierr = CeedSimultaneousDiagonalization(ceed, laplace, mass, x, lambda, P_1d);
1841*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1842*eaf62fffSJeremy L Thompson   ierr = CeedFree(&mass); CeedChk(ierr);
1843*eaf62fffSJeremy L Thompson   ierr = CeedFree(&laplace); CeedChk(ierr);
1844*eaf62fffSJeremy L Thompson   for (CeedInt i=0; i<P_1d; i++)
1845*eaf62fffSJeremy L Thompson     for (CeedInt j=0; j<P_1d; j++)
1846*eaf62fffSJeremy L Thompson       fdm_interp[i+j*P_1d] = x[j+i*P_1d];
1847*eaf62fffSJeremy L Thompson   ierr = CeedFree(&x); CeedChk(ierr);
1848*eaf62fffSJeremy L Thompson 
1849*eaf62fffSJeremy L Thompson   // Assemble QFunction
1850*eaf62fffSJeremy L Thompson   CeedVector assembled;
1851*eaf62fffSJeremy L Thompson   CeedElemRestriction rstr_qf;
1852*eaf62fffSJeremy L Thompson   ierr =  CeedOperatorLinearAssembleQFunction(op, &assembled, &rstr_qf, request);
1853*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1854*eaf62fffSJeremy L Thompson   CeedInt layout[3];
1855*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionGetELayout(rstr_qf, &layout); CeedChk(ierr);
1856*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionDestroy(&rstr_qf); CeedChk(ierr);
1857*eaf62fffSJeremy L Thompson   CeedScalar max_norm = 0;
1858*eaf62fffSJeremy L Thompson   ierr = CeedVectorNorm(assembled, CEED_NORM_MAX, &max_norm); CeedChk(ierr);
1859*eaf62fffSJeremy L Thompson 
1860*eaf62fffSJeremy L Thompson   // Calculate element averages
1861*eaf62fffSJeremy L Thompson   CeedInt num_modes = (interp?1:0) + (grad?dim:0);
1862*eaf62fffSJeremy L Thompson   CeedScalar *elem_avg;
1863*eaf62fffSJeremy L Thompson   const CeedScalar *assembled_array, *q_weight_array;
1864*eaf62fffSJeremy L Thompson   CeedVector q_weight;
1865*eaf62fffSJeremy L Thompson   ierr = CeedVectorCreate(ceed_parent, num_qpts, &q_weight); CeedChk(ierr);
1866*eaf62fffSJeremy L Thompson   ierr = CeedBasisApply(basis, 1, CEED_NOTRANSPOSE, CEED_EVAL_WEIGHT,
1867*eaf62fffSJeremy L Thompson                         CEED_VECTOR_NONE, q_weight); CeedChk(ierr);
1868*eaf62fffSJeremy L Thompson   ierr = CeedVectorGetArrayRead(assembled, CEED_MEM_HOST, &assembled_array);
1869*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1870*eaf62fffSJeremy L Thompson   ierr = CeedVectorGetArrayRead(q_weight, CEED_MEM_HOST, &q_weight_array);
1871*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1872*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(num_elem, &elem_avg); CeedChk(ierr);
1873*eaf62fffSJeremy L Thompson   const CeedScalar qf_value_bound = max_norm*100*CEED_EPSILON;
1874*eaf62fffSJeremy L Thompson   for (CeedInt e=0; e<num_elem; e++) {
1875*eaf62fffSJeremy L Thompson     CeedInt count = 0;
1876*eaf62fffSJeremy L Thompson     for (CeedInt q=0; q<num_qpts; q++)
1877*eaf62fffSJeremy L Thompson       for (CeedInt i=0; i<num_comp*num_comp*num_modes*num_modes; i++)
1878*eaf62fffSJeremy L Thompson         if (fabs(assembled_array[q*layout[0] + i*layout[1] + e*layout[2]]) >
1879*eaf62fffSJeremy L Thompson             qf_value_bound) {
1880*eaf62fffSJeremy L Thompson           elem_avg[e] += assembled_array[q*layout[0] + i*layout[1] + e*layout[2]] /
1881*eaf62fffSJeremy L Thompson                          q_weight_array[q];
1882*eaf62fffSJeremy L Thompson           count++;
1883*eaf62fffSJeremy L Thompson         }
1884*eaf62fffSJeremy L Thompson     if (count) {
1885*eaf62fffSJeremy L Thompson       elem_avg[e] /= count;
1886*eaf62fffSJeremy L Thompson     } else {
1887*eaf62fffSJeremy L Thompson       elem_avg[e] = 1.0;
1888*eaf62fffSJeremy L Thompson     }
1889*eaf62fffSJeremy L Thompson   }
1890*eaf62fffSJeremy L Thompson   ierr = CeedVectorRestoreArrayRead(assembled, &assembled_array); CeedChk(ierr);
1891*eaf62fffSJeremy L Thompson   ierr = CeedVectorDestroy(&assembled); CeedChk(ierr);
1892*eaf62fffSJeremy L Thompson   ierr = CeedVectorRestoreArrayRead(q_weight, &q_weight_array); CeedChk(ierr);
1893*eaf62fffSJeremy L Thompson   ierr = CeedVectorDestroy(&q_weight); CeedChk(ierr);
1894*eaf62fffSJeremy L Thompson 
1895*eaf62fffSJeremy L Thompson   // Build FDM diagonal
1896*eaf62fffSJeremy L Thompson   CeedVector q_data;
1897*eaf62fffSJeremy L Thompson   CeedScalar *q_data_array, *fdm_diagonal;
1898*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(num_comp*elem_size, &fdm_diagonal); CeedChk(ierr);
1899*eaf62fffSJeremy L Thompson   const CeedScalar fdm_diagonal_bound = elem_size*CEED_EPSILON;
1900*eaf62fffSJeremy L Thompson   for (CeedInt c=0; c<num_comp; c++)
1901*eaf62fffSJeremy L Thompson     for (CeedInt n=0; n<elem_size; n++) {
1902*eaf62fffSJeremy L Thompson       if (interp)
1903*eaf62fffSJeremy L Thompson         fdm_diagonal[c*elem_size + n] = 1.0;
1904*eaf62fffSJeremy L Thompson       if (grad)
1905*eaf62fffSJeremy L Thompson         for (CeedInt d=0; d<dim; d++) {
1906*eaf62fffSJeremy L Thompson           CeedInt i = (n / CeedIntPow(P_1d, d)) % P_1d;
1907*eaf62fffSJeremy L Thompson           fdm_diagonal[c*elem_size + n] += lambda[i];
1908*eaf62fffSJeremy L Thompson         }
1909*eaf62fffSJeremy L Thompson       if (fabs(fdm_diagonal[c*elem_size + n]) < fdm_diagonal_bound)
1910*eaf62fffSJeremy L Thompson         fdm_diagonal[c*elem_size + n] = fdm_diagonal_bound;
1911*eaf62fffSJeremy L Thompson     }
1912*eaf62fffSJeremy L Thompson   ierr = CeedVectorCreate(ceed_parent, num_elem*num_comp*elem_size, &q_data);
1913*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1914*eaf62fffSJeremy L Thompson   ierr = CeedVectorSetValue(q_data, 0.0); CeedChk(ierr);
1915*eaf62fffSJeremy L Thompson   ierr = CeedVectorGetArray(q_data, CEED_MEM_HOST, &q_data_array); CeedChk(ierr);
1916*eaf62fffSJeremy L Thompson   for (CeedInt e=0; e<num_elem; e++)
1917*eaf62fffSJeremy L Thompson     for (CeedInt c=0; c<num_comp; c++)
1918*eaf62fffSJeremy L Thompson       for (CeedInt n=0; n<elem_size; n++)
1919*eaf62fffSJeremy L Thompson         q_data_array[(e*num_comp+c)*elem_size+n] = 1. / (elem_avg[e] *
1920*eaf62fffSJeremy L Thompson             fdm_diagonal[c*elem_size + n]);
1921*eaf62fffSJeremy L Thompson   ierr = CeedFree(&elem_avg); CeedChk(ierr);
1922*eaf62fffSJeremy L Thompson   ierr = CeedFree(&fdm_diagonal); CeedChk(ierr);
1923*eaf62fffSJeremy L Thompson   ierr = CeedVectorRestoreArray(q_data, &q_data_array); CeedChk(ierr);
1924*eaf62fffSJeremy L Thompson 
1925*eaf62fffSJeremy L Thompson   // Setup FDM operator
1926*eaf62fffSJeremy L Thompson   // -- Basis
1927*eaf62fffSJeremy L Thompson   CeedBasis fdm_basis;
1928*eaf62fffSJeremy L Thompson   CeedScalar *grad_dummy, *q_ref_dummy, *q_weight_dummy;
1929*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(P_1d*P_1d, &grad_dummy); CeedChk(ierr);
1930*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(P_1d, &q_ref_dummy); CeedChk(ierr);
1931*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(P_1d, &q_weight_dummy); CeedChk(ierr);
1932*eaf62fffSJeremy L Thompson   ierr = CeedBasisCreateTensorH1(ceed_parent, dim, num_comp, P_1d, P_1d,
1933*eaf62fffSJeremy L Thompson                                  fdm_interp, grad_dummy, q_ref_dummy,
1934*eaf62fffSJeremy L Thompson                                  q_weight_dummy, &fdm_basis); CeedChk(ierr);
1935*eaf62fffSJeremy L Thompson   ierr = CeedFree(&fdm_interp); CeedChk(ierr);
1936*eaf62fffSJeremy L Thompson   ierr = CeedFree(&grad_dummy); CeedChk(ierr);
1937*eaf62fffSJeremy L Thompson   ierr = CeedFree(&q_ref_dummy); CeedChk(ierr);
1938*eaf62fffSJeremy L Thompson   ierr = CeedFree(&q_weight_dummy); CeedChk(ierr);
1939*eaf62fffSJeremy L Thompson   ierr = CeedFree(&lambda); CeedChk(ierr);
1940*eaf62fffSJeremy L Thompson 
1941*eaf62fffSJeremy L Thompson   // -- Restriction
1942*eaf62fffSJeremy L Thompson   CeedElemRestriction rstr_qd_i;
1943*eaf62fffSJeremy L Thompson   CeedInt strides[3] = {1, elem_size, elem_size*num_comp};
1944*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionCreateStrided(ceed_parent, num_elem, elem_size,
1945*eaf62fffSJeremy L Thompson                                           num_comp, num_elem*num_comp*elem_size,
1946*eaf62fffSJeremy L Thompson                                           strides, &rstr_qd_i); CeedChk(ierr);
1947*eaf62fffSJeremy L Thompson   // -- QFunction
1948*eaf62fffSJeremy L Thompson   CeedQFunction qf_fdm;
1949*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionCreateInteriorByName(ceed_parent, "Scale", &qf_fdm);
1950*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1951*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionAddInput(qf_fdm, "input", num_comp, CEED_EVAL_INTERP);
1952*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1953*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionAddInput(qf_fdm, "scale", num_comp, CEED_EVAL_NONE);
1954*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1955*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionAddOutput(qf_fdm, "output", num_comp, CEED_EVAL_INTERP);
1956*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1957*eaf62fffSJeremy L Thompson   // -- QFunction context
1958*eaf62fffSJeremy L Thompson   CeedInt *num_comp_data;
1959*eaf62fffSJeremy L Thompson   ierr = CeedCalloc(1, &num_comp_data); CeedChk(ierr);
1960*eaf62fffSJeremy L Thompson   num_comp_data[0] = num_comp;
1961*eaf62fffSJeremy L Thompson   CeedQFunctionContext ctx_fdm;
1962*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionContextCreate(ceed, &ctx_fdm); CeedChk(ierr);
1963*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionContextSetData(ctx_fdm, CEED_MEM_HOST, CEED_OWN_POINTER,
1964*eaf62fffSJeremy L Thompson                                      sizeof(*num_comp_data), num_comp_data);
1965*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1966*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionSetContext(qf_fdm, ctx_fdm); CeedChk(ierr);
1967*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionContextDestroy(&ctx_fdm); CeedChk(ierr);
1968*eaf62fffSJeremy L Thompson   // -- Operator
1969*eaf62fffSJeremy L Thompson   ierr = CeedOperatorCreate(ceed_parent, qf_fdm, NULL, NULL, fdm_inv);
1970*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1971*eaf62fffSJeremy L Thompson   CeedOperatorSetField(*fdm_inv, "input", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
1972*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1973*eaf62fffSJeremy L Thompson   CeedOperatorSetField(*fdm_inv, "scale", rstr_qd_i, CEED_BASIS_COLLOCATED,
1974*eaf62fffSJeremy L Thompson                        q_data); CeedChk(ierr);
1975*eaf62fffSJeremy L Thompson   CeedOperatorSetField(*fdm_inv, "output", rstr, fdm_basis, CEED_VECTOR_ACTIVE);
1976*eaf62fffSJeremy L Thompson   CeedChk(ierr);
1977*eaf62fffSJeremy L Thompson 
1978*eaf62fffSJeremy L Thompson   // Cleanup
1979*eaf62fffSJeremy L Thompson   ierr = CeedVectorDestroy(&q_data); CeedChk(ierr);
1980*eaf62fffSJeremy L Thompson   ierr = CeedBasisDestroy(&fdm_basis); CeedChk(ierr);
1981*eaf62fffSJeremy L Thompson   ierr = CeedElemRestrictionDestroy(&rstr_qd_i); CeedChk(ierr);
1982*eaf62fffSJeremy L Thompson   ierr = CeedQFunctionDestroy(&qf_fdm); CeedChk(ierr);
1983*eaf62fffSJeremy L Thompson 
1984*eaf62fffSJeremy L Thompson   return CEED_ERROR_SUCCESS;
1985*eaf62fffSJeremy L Thompson }
1986*eaf62fffSJeremy L Thompson 
1987*eaf62fffSJeremy L Thompson /// @}
1988