xref: /libCEED/backends/opt/ceed-opt-operator.c (revision d1d35e2f02dc969aee8debf3fd943dd784aa847a)
189c6efa4Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
289c6efa4Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
389c6efa4Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
489c6efa4Sjeremylt //
589c6efa4Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
689c6efa4Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
789c6efa4Sjeremylt // element discretizations for exascale applications. For more information and
889c6efa4Sjeremylt // source code availability see http://github.com/ceed.
989c6efa4Sjeremylt //
1089c6efa4Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1189c6efa4Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
1289c6efa4Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
1389c6efa4Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
1489c6efa4Sjeremylt // software, applications, hardware, advanced system engineering and early
1589c6efa4Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
1689c6efa4Sjeremylt 
17ec3da8bcSJed Brown #include <ceed/ceed.h>
18ec3da8bcSJed Brown #include <ceed/backend.h>
193d576824SJeremy L Thompson #include <stdbool.h>
203d576824SJeremy L Thompson #include <stdint.h>
2189c6efa4Sjeremylt #include <string.h>
2289c6efa4Sjeremylt #include "ceed-opt.h"
2389c6efa4Sjeremylt 
24f10650afSjeremylt //------------------------------------------------------------------------------
25f10650afSjeremylt // Setup Input/Output Fields
26f10650afSjeremylt //------------------------------------------------------------------------------
2789c6efa4Sjeremylt static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
28*d1d35e2fSjeremylt                                        bool inOrOut, const CeedInt blk_size,
29*d1d35e2fSjeremylt                                        CeedElemRestriction *blk_restr,
30*d1d35e2fSjeremylt                                        CeedVector *full_evecs, CeedVector *e_vecs,
31*d1d35e2fSjeremylt                                        CeedVector *q_vecs, CeedInt start_e,
32*d1d35e2fSjeremylt                                        CeedInt num_fields, CeedInt Q) {
33*d1d35e2fSjeremylt   CeedInt dim, ierr, num_comp, size, P;
3489c6efa4Sjeremylt   Ceed ceed;
35e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
3689c6efa4Sjeremylt   CeedBasis basis;
3789c6efa4Sjeremylt   CeedElemRestriction r;
38*d1d35e2fSjeremylt   CeedOperatorField *op_fields;
39*d1d35e2fSjeremylt   CeedQFunctionField *qf_fields;
4089c6efa4Sjeremylt   if (inOrOut) {
41*d1d35e2fSjeremylt     ierr = CeedOperatorGetFields(op, NULL, &op_fields);
42e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
43*d1d35e2fSjeremylt     ierr = CeedQFunctionGetFields(qf, NULL, &qf_fields);
44e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
4589c6efa4Sjeremylt   } else {
46*d1d35e2fSjeremylt     ierr = CeedOperatorGetFields(op, &op_fields, NULL);
47e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
48*d1d35e2fSjeremylt     ierr = CeedQFunctionGetFields(qf, &qf_fields, NULL);
49e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
5089c6efa4Sjeremylt   }
5189c6efa4Sjeremylt 
5289c6efa4Sjeremylt   // Loop over fields
53*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_fields; i++) {
54*d1d35e2fSjeremylt     CeedEvalMode eval_mode;
55*d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_fields[i], &eval_mode);
56*d1d35e2fSjeremylt     CeedChkBackend(ierr);
5789c6efa4Sjeremylt 
58*d1d35e2fSjeremylt     if (eval_mode != CEED_EVAL_WEIGHT) {
59*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetElemRestriction(op_fields[i], &r);
60e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
6189c6efa4Sjeremylt       Ceed ceed;
62e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
63*d1d35e2fSjeremylt       CeedInt num_elem, elem_size, l_size, comp_stride;
64*d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
65*d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
66*d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
67*d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
68bd33150aSjeremylt 
693ac43b2cSJeremy L Thompson       bool strided;
70e15f9bd0SJeremy L Thompson       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
713ac43b2cSJeremy L Thompson       if (strided) {
723ac43b2cSJeremy L Thompson         CeedInt strides[3];
73e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
74*d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, elem_size,
75*d1d35e2fSjeremylt                blk_size, num_comp, l_size, strides, &blk_restr[i+start_e]);
76e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
773ac43b2cSJeremy L Thompson       } else {
78bd33150aSjeremylt         const CeedInt *offsets = NULL;
79bd33150aSjeremylt         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
80e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
81*d1d35e2fSjeremylt         ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
82*d1d35e2fSjeremylt         ierr = CeedElemRestrictionCreateBlocked(ceed, num_elem, elem_size,
83*d1d35e2fSjeremylt                                                 blk_size, num_comp, comp_stride,
84*d1d35e2fSjeremylt                                                 l_size, CEED_MEM_HOST,
85bd33150aSjeremylt                                                 CEED_COPY_VALUES, offsets,
86*d1d35e2fSjeremylt                                                 &blk_restr[i+start_e]);
87e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
88e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
893ac43b2cSJeremy L Thompson       }
90*d1d35e2fSjeremylt       ierr = CeedElemRestrictionCreateVector(blk_restr[i+start_e], NULL,
91*d1d35e2fSjeremylt                                              &full_evecs[i+start_e]);
92e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
9389c6efa4Sjeremylt     }
9489c6efa4Sjeremylt 
95*d1d35e2fSjeremylt     switch(eval_mode) {
9689c6efa4Sjeremylt     case CEED_EVAL_NONE:
97*d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
98*d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &e_vecs[i]);
99*d1d35e2fSjeremylt       CeedChkBackend(ierr);
100*d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
101*d1d35e2fSjeremylt       CeedChkBackend(ierr);
10289c6efa4Sjeremylt       break;
10389c6efa4Sjeremylt     case CEED_EVAL_INTERP:
104*d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
10589c6efa4Sjeremylt       ierr = CeedElemRestrictionGetElementSize(r, &P);
106e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
107*d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, P*size*blk_size, &e_vecs[i]);
108*d1d35e2fSjeremylt       CeedChkBackend(ierr);
109*d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
110*d1d35e2fSjeremylt       CeedChkBackend(ierr);
11189c6efa4Sjeremylt       break;
11289c6efa4Sjeremylt     case CEED_EVAL_GRAD:
113*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
114*d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_fields[i], &size); CeedChkBackend(ierr);
115e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
11689c6efa4Sjeremylt       ierr = CeedElemRestrictionGetElementSize(r, &P);
117e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
118*d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, P*size/dim*blk_size, &e_vecs[i]);
119e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
120*d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*size*blk_size, &q_vecs[i]);
121*d1d35e2fSjeremylt       CeedChkBackend(ierr);
12289c6efa4Sjeremylt       break;
12389c6efa4Sjeremylt     case CEED_EVAL_WEIGHT: // Only on input fields
124*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_fields[i], &basis); CeedChkBackend(ierr);
125*d1d35e2fSjeremylt       ierr = CeedVectorCreate(ceed, Q*blk_size, &q_vecs[i]); CeedChkBackend(ierr);
126*d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
127*d1d35e2fSjeremylt                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, q_vecs[i]);
128e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
12989c6efa4Sjeremylt 
13089c6efa4Sjeremylt       break;
13189c6efa4Sjeremylt     case CEED_EVAL_DIV:
1325f67fadeSJeremy L Thompson       break; // Not implemented
13389c6efa4Sjeremylt     case CEED_EVAL_CURL:
1345f67fadeSJeremy L Thompson       break; // Not implemented
13589c6efa4Sjeremylt     }
13689c6efa4Sjeremylt   }
137e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
13889c6efa4Sjeremylt }
13989c6efa4Sjeremylt 
140f10650afSjeremylt //------------------------------------------------------------------------------
141f10650afSjeremylt // Setup Operator
142f10650afSjeremylt //------------------------------------------------------------------------------
14389c6efa4Sjeremylt static int CeedOperatorSetup_Opt(CeedOperator op) {
14489c6efa4Sjeremylt   int ierr;
145*d1d35e2fSjeremylt   bool setup_done;
146*d1d35e2fSjeremylt   ierr = CeedOperatorIsSetupDone(op, &setup_done); CeedChkBackend(ierr);
147*d1d35e2fSjeremylt   if (setup_done) return CEED_ERROR_SUCCESS;
14889c6efa4Sjeremylt   Ceed ceed;
149e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
150*d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
151*d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
152*d1d35e2fSjeremylt   const CeedInt blk_size = ceed_impl->blk_size;
15389c6efa4Sjeremylt   CeedOperator_Opt *impl;
154e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
15589c6efa4Sjeremylt   CeedQFunction qf;
156e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
157*d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields;
158e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
159*d1d35e2fSjeremylt   ierr = CeedQFunctionIsIdentity(qf, &impl->identity_qf); CeedChkBackend(ierr);
160*d1d35e2fSjeremylt   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
161e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
162*d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
163*d1d35e2fSjeremylt   ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields);
164e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
165*d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
166*d1d35e2fSjeremylt   ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields);
167e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
16889c6efa4Sjeremylt 
16989c6efa4Sjeremylt   // Allocate
170*d1d35e2fSjeremylt   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->blk_restr);
171e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
172*d1d35e2fSjeremylt   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_vecs);
173e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
174*d1d35e2fSjeremylt   ierr = CeedCalloc(num_input_fields + num_output_fields, &impl->e_data);
175e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
17689c6efa4Sjeremylt 
177*d1d35e2fSjeremylt   ierr = CeedCalloc(16, &impl->input_state); CeedChkBackend(ierr);
178*d1d35e2fSjeremylt   ierr = CeedCalloc(16, &impl->e_vecs_in); CeedChkBackend(ierr);
179*d1d35e2fSjeremylt   ierr = CeedCalloc(16, &impl->e_vecs_out); CeedChkBackend(ierr);
180*d1d35e2fSjeremylt   ierr = CeedCalloc(16, &impl->q_vecs_in); CeedChkBackend(ierr);
181*d1d35e2fSjeremylt   ierr = CeedCalloc(16, &impl->q_vecs_out); CeedChkBackend(ierr);
18289c6efa4Sjeremylt 
183*d1d35e2fSjeremylt   impl->num_e_vecs_in = num_input_fields;
184*d1d35e2fSjeremylt   impl->num_e_vecs_out = num_output_fields;
18589c6efa4Sjeremylt 
18689c6efa4Sjeremylt   // Set up infield and outfield pointer arrays
18789c6efa4Sjeremylt   // Infields
188*d1d35e2fSjeremylt   ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blk_size, impl->blk_restr,
189*d1d35e2fSjeremylt                                      impl->e_vecs, impl->e_vecs_in,
190*d1d35e2fSjeremylt                                      impl->q_vecs_in, 0, num_input_fields, Q);
191e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
19289c6efa4Sjeremylt   // Outfields
193*d1d35e2fSjeremylt   ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blk_size, impl->blk_restr,
194*d1d35e2fSjeremylt                                      impl->e_vecs, impl->e_vecs_out,
195*d1d35e2fSjeremylt                                      impl->q_vecs_out, num_input_fields,
196*d1d35e2fSjeremylt                                      num_output_fields, Q);
197e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
19889c6efa4Sjeremylt 
19916911fdaSjeremylt   // Identity QFunctions
200*d1d35e2fSjeremylt   if (impl->identity_qf) {
201*d1d35e2fSjeremylt     CeedEvalMode in_mode, out_mode;
202*d1d35e2fSjeremylt     CeedQFunctionField *in_fields, *out_fields;
203*d1d35e2fSjeremylt     ierr = CeedQFunctionGetFields(qf, &in_fields, &out_fields);
204e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
20516911fdaSjeremylt 
206*d1d35e2fSjeremylt     for (CeedInt i=0; i<num_input_fields; i++) {
207*d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetEvalMode(in_fields[i], &in_mode);
208*d1d35e2fSjeremylt       CeedChkBackend(ierr);
209*d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetEvalMode(out_fields[i], &out_mode);
210*d1d35e2fSjeremylt       CeedChkBackend(ierr);
211*d1d35e2fSjeremylt 
212*d1d35e2fSjeremylt       ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
213*d1d35e2fSjeremylt       impl->q_vecs_out[i] = impl->q_vecs_in[i];
214*d1d35e2fSjeremylt       ierr = CeedVectorAddReference(impl->q_vecs_in[i]); CeedChkBackend(ierr);
21516911fdaSjeremylt     }
21616911fdaSjeremylt   }
21716911fdaSjeremylt 
218e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
21989c6efa4Sjeremylt 
220e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
22189c6efa4Sjeremylt }
22289c6efa4Sjeremylt 
223f10650afSjeremylt //------------------------------------------------------------------------------
224f10650afSjeremylt // Setup Input Fields
225f10650afSjeremylt //------------------------------------------------------------------------------
226*d1d35e2fSjeremylt static inline int CeedOperatorSetupInputs_Opt(CeedInt num_input_fields,
227*d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
228*d1d35e2fSjeremylt     CeedVector in_vec, CeedOperator_Opt *impl, CeedRequest *request) {
2291d102b48SJeremy L Thompson   CeedInt ierr;
230*d1d35e2fSjeremylt   CeedEvalMode eval_mode;
23189c6efa4Sjeremylt   CeedVector vec;
23289c6efa4Sjeremylt   uint64_t state;
23389c6efa4Sjeremylt 
234*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
235*d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
236e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
237*d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
23889c6efa4Sjeremylt     } else {
23989c6efa4Sjeremylt       // Get input vector
240*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
241*d1d35e2fSjeremylt       CeedChkBackend(ierr);
24289c6efa4Sjeremylt       if (vec != CEED_VECTOR_ACTIVE) {
24389c6efa4Sjeremylt         // Restrict
244e15f9bd0SJeremy L Thompson         ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
245*d1d35e2fSjeremylt         if (state != impl->input_state[i]) {
246*d1d35e2fSjeremylt           ierr = CeedElemRestrictionApply(impl->blk_restr[i], CEED_NOTRANSPOSE,
247*d1d35e2fSjeremylt                                           vec, impl->e_vecs[i], request);
248e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
249*d1d35e2fSjeremylt           impl->input_state[i] = state;
25089c6efa4Sjeremylt         }
25189c6efa4Sjeremylt       } else {
25289c6efa4Sjeremylt         // Set Qvec for CEED_EVAL_NONE
253*d1d35e2fSjeremylt         if (eval_mode == CEED_EVAL_NONE) {
254*d1d35e2fSjeremylt           ierr = CeedVectorGetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
255*d1d35e2fSjeremylt                                     &impl->e_data[i]); CeedChkBackend(ierr);
256*d1d35e2fSjeremylt           ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
25789c6efa4Sjeremylt                                     CEED_USE_POINTER,
258*d1d35e2fSjeremylt                                     impl->e_data[i]); CeedChkBackend(ierr);
259*d1d35e2fSjeremylt           ierr = CeedVectorRestoreArray(impl->e_vecs_in[i],
260*d1d35e2fSjeremylt                                         &impl->e_data[i]); CeedChkBackend(ierr);
26189c6efa4Sjeremylt         }
26289c6efa4Sjeremylt       }
26389c6efa4Sjeremylt       // Get evec
264*d1d35e2fSjeremylt       ierr = CeedVectorGetArrayRead(impl->e_vecs[i], CEED_MEM_HOST,
265*d1d35e2fSjeremylt                                     (const CeedScalar **) &impl->e_data[i]);
266e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
26789c6efa4Sjeremylt     }
26889c6efa4Sjeremylt   }
269e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2701d102b48SJeremy L Thompson }
27189c6efa4Sjeremylt 
272f10650afSjeremylt //------------------------------------------------------------------------------
273f10650afSjeremylt // Input Basis Action
274f10650afSjeremylt //------------------------------------------------------------------------------
2751d102b48SJeremy L Thompson static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
276*d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
277*d1d35e2fSjeremylt     CeedInt num_input_fields, CeedInt blk_size, CeedVector in_vec, bool skip_active,
2781d102b48SJeremy L Thompson     CeedOperator_Opt *impl, CeedRequest *request) {
2791d102b48SJeremy L Thompson   CeedInt ierr;
280*d1d35e2fSjeremylt   CeedInt dim, elem_size, size;
281*d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
282*d1d35e2fSjeremylt   CeedEvalMode eval_mode;
2831d102b48SJeremy L Thompson   CeedBasis basis;
2841d102b48SJeremy L Thompson   CeedVector vec;
28589c6efa4Sjeremylt 
286*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
287*d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
288*d1d35e2fSjeremylt     CeedChkBackend(ierr);
2891d102b48SJeremy L Thompson     // Skip active input
290*d1d35e2fSjeremylt     if (skip_active) {
2911d102b48SJeremy L Thompson       if (vec == CEED_VECTOR_ACTIVE)
2921d102b48SJeremy L Thompson         continue;
2931d102b48SJeremy L Thompson     }
2941d102b48SJeremy L Thompson 
295*d1d35e2fSjeremylt     CeedInt active_in = 0;
296*d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
297*d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_restr);
298e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
299*d1d35e2fSjeremylt     ierr = CeedElemRestrictionGetElementSize(elem_restr, &elem_size);
300e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
301*d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
302e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
303*d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
304*d1d35e2fSjeremylt     CeedChkBackend(ierr);
30589c6efa4Sjeremylt     // Restrict block active input
30689c6efa4Sjeremylt     if (vec == CEED_VECTOR_ACTIVE) {
307*d1d35e2fSjeremylt       ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i], e/blk_size,
308*d1d35e2fSjeremylt                                            CEED_NOTRANSPOSE, in_vec,
309*d1d35e2fSjeremylt                                            impl->e_vecs_in[i], request);
310e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
311*d1d35e2fSjeremylt       active_in = 1;
31289c6efa4Sjeremylt     }
31389c6efa4Sjeremylt     // Basis action
314*d1d35e2fSjeremylt     switch(eval_mode) {
31589c6efa4Sjeremylt     case CEED_EVAL_NONE:
316*d1d35e2fSjeremylt       if (!active_in) {
317*d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->q_vecs_in[i], CEED_MEM_HOST,
31889c6efa4Sjeremylt                                   CEED_USE_POINTER,
319*d1d35e2fSjeremylt                                   &impl->e_data[i][e*Q*size]); CeedChkBackend(ierr);
32089c6efa4Sjeremylt       }
32189c6efa4Sjeremylt       break;
32289c6efa4Sjeremylt     case CEED_EVAL_INTERP:
323*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
324e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
325*d1d35e2fSjeremylt       if (!active_in) {
326*d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
32789c6efa4Sjeremylt                                   CEED_USE_POINTER,
328*d1d35e2fSjeremylt                                   &impl->e_data[i][e*elem_size*size]);
329e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
33089c6efa4Sjeremylt       }
331*d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
332*d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->e_vecs_in[i],
333*d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
33489c6efa4Sjeremylt       break;
33589c6efa4Sjeremylt     case CEED_EVAL_GRAD:
336*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
337e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
338*d1d35e2fSjeremylt       if (!active_in) {
339e15f9bd0SJeremy L Thompson         ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
340*d1d35e2fSjeremylt         ierr = CeedVectorSetArray(impl->e_vecs_in[i], CEED_MEM_HOST,
34189c6efa4Sjeremylt                                   CEED_USE_POINTER,
342*d1d35e2fSjeremylt                                   &impl->e_data[i][e*elem_size*size/dim]);
343e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
34489c6efa4Sjeremylt       }
345*d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_NOTRANSPOSE,
346*d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->e_vecs_in[i],
347*d1d35e2fSjeremylt                             impl->q_vecs_in[i]); CeedChkBackend(ierr);
34889c6efa4Sjeremylt       break;
34989c6efa4Sjeremylt     case CEED_EVAL_WEIGHT:
35089c6efa4Sjeremylt       break;  // No action
351bbfacfcdSjeremylt     // LCOV_EXCL_START
35289c6efa4Sjeremylt     case CEED_EVAL_DIV:
3531d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
354*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_input_fields[i], &basis);
355e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
3561d102b48SJeremy L Thompson       Ceed ceed;
357e15f9bd0SJeremy L Thompson       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
358e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
359e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
360bbfacfcdSjeremylt       // LCOV_EXCL_STOP
3611d102b48SJeremy L Thompson     }
3621d102b48SJeremy L Thompson     }
3631d102b48SJeremy L Thompson   }
364e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
3651d102b48SJeremy L Thompson }
36689c6efa4Sjeremylt 
367f10650afSjeremylt //------------------------------------------------------------------------------
368f10650afSjeremylt // Output Basis Action
369f10650afSjeremylt //------------------------------------------------------------------------------
3701d102b48SJeremy L Thompson static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
371*d1d35e2fSjeremylt     CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields,
372*d1d35e2fSjeremylt     CeedInt blk_size, CeedInt num_input_fields, CeedInt num_output_fields,
373*d1d35e2fSjeremylt     CeedOperator op, CeedVector out_vec, CeedOperator_Opt *impl,
3741d102b48SJeremy L Thompson     CeedRequest *request) {
3751d102b48SJeremy L Thompson   CeedInt ierr;
376*d1d35e2fSjeremylt   CeedElemRestriction elem_restr;
377*d1d35e2fSjeremylt   CeedEvalMode eval_mode;
3781d102b48SJeremy L Thompson   CeedBasis basis;
3791d102b48SJeremy L Thompson   CeedVector vec;
3801d102b48SJeremy L Thompson 
381*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
382*d1d35e2fSjeremylt     // Get elem_size, eval_mode, size
383*d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_restr);
384e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
385*d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
386e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
38789c6efa4Sjeremylt     // Basis action
388*d1d35e2fSjeremylt     switch(eval_mode) {
38989c6efa4Sjeremylt     case CEED_EVAL_NONE:
39089c6efa4Sjeremylt       break; // No action
39189c6efa4Sjeremylt     case CEED_EVAL_INTERP:
392*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
393e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
394*d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
395*d1d35e2fSjeremylt                             CEED_EVAL_INTERP, impl->q_vecs_out[i],
396*d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
39789c6efa4Sjeremylt       break;
39889c6efa4Sjeremylt     case CEED_EVAL_GRAD:
399*d1d35e2fSjeremylt       ierr = CeedOperatorFieldGetBasis(op_output_fields[i], &basis);
400e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
401*d1d35e2fSjeremylt       ierr = CeedBasisApply(basis, blk_size, CEED_TRANSPOSE,
402*d1d35e2fSjeremylt                             CEED_EVAL_GRAD, impl->q_vecs_out[i],
403*d1d35e2fSjeremylt                             impl->e_vecs_out[i]); CeedChkBackend(ierr);
40489c6efa4Sjeremylt       break;
405c042f62fSJeremy L Thompson     // LCOV_EXCL_START
406bbfacfcdSjeremylt     case CEED_EVAL_WEIGHT: {
40789c6efa4Sjeremylt       Ceed ceed;
408e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
409e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
410e15f9bd0SJeremy L Thompson                        "CEED_EVAL_WEIGHT cannot be an output "
4111d102b48SJeremy L Thompson                        "evaluation mode");
41289c6efa4Sjeremylt     }
41389c6efa4Sjeremylt     case CEED_EVAL_DIV:
4141d102b48SJeremy L Thompson     case CEED_EVAL_CURL: {
4151d102b48SJeremy L Thompson       Ceed ceed;
416e15f9bd0SJeremy L Thompson       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
417e15f9bd0SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND,
418e15f9bd0SJeremy L Thompson                        "Ceed evaluation mode not implemented");
419bbfacfcdSjeremylt       // LCOV_EXCL_STOP
4201d102b48SJeremy L Thompson     }
42189c6efa4Sjeremylt     }
42289c6efa4Sjeremylt     // Restrict output block
42389c6efa4Sjeremylt     // Get output vector
424*d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
425e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
42689c6efa4Sjeremylt     if (vec == CEED_VECTOR_ACTIVE)
427*d1d35e2fSjeremylt       vec = out_vec;
42889c6efa4Sjeremylt     // Restrict
429*d1d35e2fSjeremylt     ierr = CeedElemRestrictionApplyBlock(impl->blk_restr[i+impl->num_e_vecs_in],
430*d1d35e2fSjeremylt                                          e/blk_size, CEED_TRANSPOSE,
431*d1d35e2fSjeremylt                                          impl->e_vecs_out[i], vec, request);
432e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
43389c6efa4Sjeremylt   }
434e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
43589c6efa4Sjeremylt }
43689c6efa4Sjeremylt 
437f10650afSjeremylt //------------------------------------------------------------------------------
438f10650afSjeremylt // Restore Input Vectors
439f10650afSjeremylt //------------------------------------------------------------------------------
440*d1d35e2fSjeremylt static inline int CeedOperatorRestoreInputs_Opt(CeedInt num_input_fields,
441*d1d35e2fSjeremylt     CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields,
442187168c7SJeremy L Thompson     CeedOperator_Opt *impl) {
4431d102b48SJeremy L Thompson   CeedInt ierr;
444*d1d35e2fSjeremylt   CeedEvalMode eval_mode;
4451d102b48SJeremy L Thompson 
446*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
447*d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode);
448e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
449*d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_WEIGHT) { // Skip
45089c6efa4Sjeremylt     } else {
451*d1d35e2fSjeremylt       ierr = CeedVectorRestoreArrayRead(impl->e_vecs[i],
452*d1d35e2fSjeremylt                                         (const CeedScalar **) &impl->e_data[i]);
453e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
45489c6efa4Sjeremylt     }
45589c6efa4Sjeremylt   }
456e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4571d102b48SJeremy L Thompson }
4581d102b48SJeremy L Thompson 
459f10650afSjeremylt //------------------------------------------------------------------------------
460f10650afSjeremylt // Operator Apply
461f10650afSjeremylt //------------------------------------------------------------------------------
462*d1d35e2fSjeremylt static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector in_vec,
463*d1d35e2fSjeremylt                                     CeedVector out_vec, CeedRequest *request) {
4641d102b48SJeremy L Thompson   int ierr;
4651d102b48SJeremy L Thompson   Ceed ceed;
466e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
467*d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
468*d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
469*d1d35e2fSjeremylt   CeedInt blk_size = ceed_impl->blk_size;
4701d102b48SJeremy L Thompson   CeedOperator_Opt *impl;
471e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
472*d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem;
473*d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
474e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
475*d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
4761d102b48SJeremy L Thompson   CeedQFunction qf;
477e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
478*d1d35e2fSjeremylt   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
479e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
480*d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
481*d1d35e2fSjeremylt   ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields);
482e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
483*d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
484*d1d35e2fSjeremylt   ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields);
485e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
486*d1d35e2fSjeremylt   CeedEvalMode eval_mode;
4871d102b48SJeremy L Thompson 
4881d102b48SJeremy L Thompson   // Setup
489e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
4901d102b48SJeremy L Thompson 
4911d102b48SJeremy L Thompson   // Input Evecs and Restriction
492*d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
493*d1d35e2fSjeremylt                                      op_input_fields, in_vec, impl, request);
494e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
4951d102b48SJeremy L Thompson 
4961d102b48SJeremy L Thompson   // Output Lvecs, Evecs, and Qvecs
497*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
4981d102b48SJeremy L Thompson     // Set Qvec if needed
499*d1d35e2fSjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode);
500e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
501*d1d35e2fSjeremylt     if (eval_mode == CEED_EVAL_NONE) {
5021d102b48SJeremy L Thompson       // Set qvec to single block evec
503*d1d35e2fSjeremylt       ierr = CeedVectorGetArray(impl->e_vecs_out[i], CEED_MEM_HOST,
504*d1d35e2fSjeremylt                                 &impl->e_data[i + num_input_fields]);
505e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
506*d1d35e2fSjeremylt       ierr = CeedVectorSetArray(impl->q_vecs_out[i], CEED_MEM_HOST,
507*d1d35e2fSjeremylt                                 CEED_USE_POINTER, impl->e_data[i + num_input_fields]); CeedChkBackend(ierr);
508*d1d35e2fSjeremylt       ierr = CeedVectorRestoreArray(impl->e_vecs_out[i],
509*d1d35e2fSjeremylt                                     &impl->e_data[i + num_input_fields]);
510e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
5111d102b48SJeremy L Thompson     }
5121d102b48SJeremy L Thompson   }
5131d102b48SJeremy L Thompson 
5141d102b48SJeremy L Thompson   // Loop through elements
515*d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
5161d102b48SJeremy L Thompson     // Input basis apply
517*d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
518*d1d35e2fSjeremylt                                       num_input_fields, blk_size, in_vec, false,
519e15f9bd0SJeremy L Thompson                                       impl, request); CeedChkBackend(ierr);
5201d102b48SJeremy L Thompson 
5211d102b48SJeremy L Thompson     // Q function
522*d1d35e2fSjeremylt     if (!impl->identity_qf) {
523*d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
524e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
52516911fdaSjeremylt     }
5261d102b48SJeremy L Thompson 
5271d102b48SJeremy L Thompson     // Output basis apply and restrict
528*d1d35e2fSjeremylt     ierr = CeedOperatorOutputBasis_Opt(e, Q, qf_output_fields, op_output_fields,
529*d1d35e2fSjeremylt                                        blk_size, num_input_fields, num_output_fields,
530*d1d35e2fSjeremylt                                        op, out_vec, impl, request);
531e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
5321d102b48SJeremy L Thompson   }
5331d102b48SJeremy L Thompson 
5341d102b48SJeremy L Thompson   // Restore input arrays
535*d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
536*d1d35e2fSjeremylt                                        op_input_fields, impl);
537e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
53889c6efa4Sjeremylt 
539e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
54089c6efa4Sjeremylt }
54189c6efa4Sjeremylt 
542f10650afSjeremylt //------------------------------------------------------------------------------
5431d102b48SJeremy L Thompson // Assemble Linear QFunction
544f10650afSjeremylt //------------------------------------------------------------------------------
54580ac2e43SJeremy L Thompson static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op,
5461d102b48SJeremy L Thompson     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
5471d102b48SJeremy L Thompson   int ierr;
5481d102b48SJeremy L Thompson   Ceed ceed;
549e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
550*d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
551*d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
552*d1d35e2fSjeremylt   const CeedInt blk_size = ceed_impl->blk_size;
5531d102b48SJeremy L Thompson   CeedOperator_Opt *impl;
554e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
555*d1d35e2fSjeremylt   CeedInt Q, num_input_fields, num_output_fields, num_elem, size;
556*d1d35e2fSjeremylt   ierr = CeedOperatorGetNumElements(op, &num_elem); CeedChkBackend(ierr);
557e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
558*d1d35e2fSjeremylt   CeedInt num_blks = (num_elem/blk_size) + !!(num_elem%blk_size);
5591d102b48SJeremy L Thompson   CeedQFunction qf;
560e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
561*d1d35e2fSjeremylt   ierr= CeedQFunctionGetNumArgs(qf, &num_input_fields, &num_output_fields);
562e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
563*d1d35e2fSjeremylt   CeedOperatorField *op_input_fields, *op_output_fields;
564*d1d35e2fSjeremylt   ierr = CeedOperatorGetFields(op, &op_input_fields, &op_output_fields);
565e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
566*d1d35e2fSjeremylt   CeedQFunctionField *qf_input_fields, *qf_output_fields;
567*d1d35e2fSjeremylt   ierr = CeedQFunctionGetFields(qf, &qf_input_fields, &qf_output_fields);
568e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
5691d102b48SJeremy L Thompson   CeedVector vec, lvec;
570*d1d35e2fSjeremylt   CeedInt num_active_in = 0, num_active_out = 0;
571*d1d35e2fSjeremylt   CeedVector *active_in = NULL;
5721d102b48SJeremy L Thompson   CeedScalar *a, *tmp;
5731d102b48SJeremy L Thompson 
5741d102b48SJeremy L Thompson   // Setup
575e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
5761d102b48SJeremy L Thompson 
57716911fdaSjeremylt   // Check for identity
578*d1d35e2fSjeremylt   if (impl->identity_qf)
57916911fdaSjeremylt     // LCOV_EXCL_START
580e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
581e15f9bd0SJeremy L Thompson                      "Assembling identity qfunctions not supported");
58216911fdaSjeremylt   // LCOV_EXCL_STOP
58316911fdaSjeremylt 
5841d102b48SJeremy L Thompson   // Input Evecs and Restriction
585*d1d35e2fSjeremylt   ierr = CeedOperatorSetupInputs_Opt(num_input_fields, qf_input_fields,
586*d1d35e2fSjeremylt                                      op_input_fields, NULL, impl, request);
587e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
5881d102b48SJeremy L Thompson 
5891d102b48SJeremy L Thompson   // Count number of active input fields
590*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_input_fields; i++) {
5911d102b48SJeremy L Thompson     // Get input vector
592*d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_input_fields[i], &vec);
593*d1d35e2fSjeremylt     CeedChkBackend(ierr);
5941d102b48SJeremy L Thompson     // Check if active input
5951d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
596*d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_input_fields[i], &size);
597e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
598*d1d35e2fSjeremylt       ierr = CeedVectorSetValue(impl->q_vecs_in[i], 0.0); CeedChkBackend(ierr);
599*d1d35e2fSjeremylt       ierr = CeedVectorGetArray(impl->q_vecs_in[i], CEED_MEM_HOST, &tmp);
600*d1d35e2fSjeremylt       CeedChkBackend(ierr);
601*d1d35e2fSjeremylt       ierr = CeedRealloc(num_active_in + size, &active_in); CeedChkBackend(ierr);
6021d102b48SJeremy L Thompson       for (CeedInt field=0; field<size; field++) {
603*d1d35e2fSjeremylt         ierr = CeedVectorCreate(ceed, Q*blk_size, &active_in[num_active_in+field]);
604e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
605*d1d35e2fSjeremylt         ierr = CeedVectorSetArray(active_in[num_active_in+field], CEED_MEM_HOST,
606*d1d35e2fSjeremylt                                   CEED_USE_POINTER, &tmp[field*Q*blk_size]);
607e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
6081d102b48SJeremy L Thompson       }
609*d1d35e2fSjeremylt       num_active_in += size;
610*d1d35e2fSjeremylt       ierr = CeedVectorRestoreArray(impl->q_vecs_in[i], &tmp); CeedChkBackend(ierr);
6111d102b48SJeremy L Thompson     }
61289c6efa4Sjeremylt   }
61389c6efa4Sjeremylt 
6141d102b48SJeremy L Thompson   // Count number of active output fields
615*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_output_fields; i++) {
6161d102b48SJeremy L Thompson     // Get output vector
617*d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[i], &vec);
618e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
6191d102b48SJeremy L Thompson     // Check if active output
6201d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
621*d1d35e2fSjeremylt       ierr = CeedQFunctionFieldGetSize(qf_output_fields[i], &size);
622e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
623*d1d35e2fSjeremylt       num_active_out += size;
6241d102b48SJeremy L Thompson     }
6251d102b48SJeremy L Thompson   }
6261d102b48SJeremy L Thompson 
6271d102b48SJeremy L Thompson   // Check sizes
628*d1d35e2fSjeremylt   if (!num_active_in || !num_active_out)
6291d102b48SJeremy L Thompson     // LCOV_EXCL_START
630e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
631e15f9bd0SJeremy L Thompson                      "Cannot assemble QFunction without active inputs "
6321d102b48SJeremy L Thompson                      "and outputs");
6331d102b48SJeremy L Thompson   // LCOV_EXCL_STOP
6341d102b48SJeremy L Thompson 
6351d102b48SJeremy L Thompson   // Setup lvec
636*d1d35e2fSjeremylt   ierr = CeedVectorCreate(ceed, num_blks*blk_size*Q*num_active_in*num_active_out,
637e15f9bd0SJeremy L Thompson                           &lvec); CeedChkBackend(ierr);
638e15f9bd0SJeremy L Thompson   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
6391d102b48SJeremy L Thompson 
6401d102b48SJeremy L Thompson   // Create output restriction
641*d1d35e2fSjeremylt   CeedInt strides[3] = {1, Q, num_active_in *num_active_out*Q};
642*d1d35e2fSjeremylt   ierr = CeedElemRestrictionCreateStrided(ceed, num_elem, Q,
643*d1d35e2fSjeremylt                                           num_active_in*num_active_out,
644*d1d35e2fSjeremylt                                           num_active_in*num_active_out*num_elem*Q,
645e15f9bd0SJeremy L Thompson                                           strides, rstr); CeedChkBackend(ierr);
6461d102b48SJeremy L Thompson   // Create assembled vector
647*d1d35e2fSjeremylt   ierr = CeedVectorCreate(ceed, num_elem*Q*num_active_in*num_active_out,
648e15f9bd0SJeremy L Thompson                           assembled); CeedChkBackend(ierr);
6491d102b48SJeremy L Thompson 
6501d102b48SJeremy L Thompson   // Loop through elements
651*d1d35e2fSjeremylt   for (CeedInt e=0; e<num_blks*blk_size; e+=blk_size) {
6521d102b48SJeremy L Thompson     // Input basis apply
653*d1d35e2fSjeremylt     ierr = CeedOperatorInputBasis_Opt(e, Q, qf_input_fields, op_input_fields,
654*d1d35e2fSjeremylt                                       num_input_fields, blk_size, NULL, true,
655e15f9bd0SJeremy L Thompson                                       impl, request); CeedChkBackend(ierr);
6561d102b48SJeremy L Thompson 
6571d102b48SJeremy L Thompson     // Assemble QFunction
658*d1d35e2fSjeremylt     for (CeedInt in=0; in<num_active_in; in++) {
6591d102b48SJeremy L Thompson       // Set Inputs
660*d1d35e2fSjeremylt       ierr = CeedVectorSetValue(active_in[in], 1.0); CeedChkBackend(ierr);
661*d1d35e2fSjeremylt       if (num_active_in > 1) {
662*d1d35e2fSjeremylt         ierr = CeedVectorSetValue(active_in[(in+num_active_in-1)%num_active_in],
663e15f9bd0SJeremy L Thompson                                   0.0); CeedChkBackend(ierr);
66442ea3801Sjeremylt       }
6651d102b48SJeremy L Thompson       // Set Outputs
666*d1d35e2fSjeremylt       for (CeedInt out=0; out<num_output_fields; out++) {
6671d102b48SJeremy L Thompson         // Get output vector
668*d1d35e2fSjeremylt         ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
669e15f9bd0SJeremy L Thompson         CeedChkBackend(ierr);
6701d102b48SJeremy L Thompson         // Check if active output
6711d102b48SJeremy L Thompson         if (vec == CEED_VECTOR_ACTIVE) {
672*d1d35e2fSjeremylt           CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST,
673e15f9bd0SJeremy L Thompson                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
674*d1d35e2fSjeremylt           ierr = CeedQFunctionFieldGetSize(qf_output_fields[out], &size);
675e15f9bd0SJeremy L Thompson           CeedChkBackend(ierr);
676*d1d35e2fSjeremylt           a += size*Q*blk_size; // Advance the pointer by the size of the output
6771d102b48SJeremy L Thompson         }
6781d102b48SJeremy L Thompson       }
6791d102b48SJeremy L Thompson       // Apply QFunction
680*d1d35e2fSjeremylt       ierr = CeedQFunctionApply(qf, Q*blk_size, impl->q_vecs_in, impl->q_vecs_out);
681e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
6821d102b48SJeremy L Thompson     }
6831d102b48SJeremy L Thompson   }
6841d102b48SJeremy L Thompson 
6851d102b48SJeremy L Thompson   // Un-set output Qvecs to prevent accidental overwrite of Assembled
686*d1d35e2fSjeremylt   for (CeedInt out=0; out<num_output_fields; out++) {
6871d102b48SJeremy L Thompson     // Get output vector
688*d1d35e2fSjeremylt     ierr = CeedOperatorFieldGetVector(op_output_fields[out], &vec);
689e15f9bd0SJeremy L Thompson     CeedChkBackend(ierr);
6901d102b48SJeremy L Thompson     // Check if active output
6911d102b48SJeremy L Thompson     if (vec == CEED_VECTOR_ACTIVE) {
692*d1d35e2fSjeremylt       CeedVectorSetArray(impl->q_vecs_out[out], CEED_MEM_HOST, CEED_COPY_VALUES,
693e15f9bd0SJeremy L Thompson                          NULL); CeedChkBackend(ierr);
6941d102b48SJeremy L Thompson     }
6951d102b48SJeremy L Thompson   }
6961d102b48SJeremy L Thompson 
6971d102b48SJeremy L Thompson   // Restore input arrays
698*d1d35e2fSjeremylt   ierr = CeedOperatorRestoreInputs_Opt(num_input_fields, qf_input_fields,
699*d1d35e2fSjeremylt                                        op_input_fields, impl);
700e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
7011d102b48SJeremy L Thompson 
7021d102b48SJeremy L Thompson   // Output blocked restriction
703e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArray(lvec, &a); CeedChkBackend(ierr);
704e15f9bd0SJeremy L Thompson   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
705*d1d35e2fSjeremylt   CeedElemRestriction blk_rstr;
706*d1d35e2fSjeremylt   ierr = CeedElemRestrictionCreateBlockedStrided(ceed, num_elem, Q, blk_size,
707*d1d35e2fSjeremylt          num_active_in*num_active_out, num_active_in*num_active_out*num_elem*Q,
708*d1d35e2fSjeremylt          strides, &blk_rstr); CeedChkBackend(ierr);
709*d1d35e2fSjeremylt   ierr = CeedElemRestrictionApply(blk_rstr, CEED_TRANSPOSE, lvec, *assembled,
710e15f9bd0SJeremy L Thompson                                   request); CeedChkBackend(ierr);
7111d102b48SJeremy L Thompson 
7121d102b48SJeremy L Thompson   // Cleanup
713*d1d35e2fSjeremylt   for (CeedInt i=0; i<num_active_in; i++) {
714*d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&active_in[i]); CeedChkBackend(ierr);
71542ea3801Sjeremylt   }
716*d1d35e2fSjeremylt   ierr = CeedFree(&active_in); CeedChkBackend(ierr);
717e15f9bd0SJeremy L Thompson   ierr = CeedVectorDestroy(&lvec); CeedChkBackend(ierr);
718*d1d35e2fSjeremylt   ierr = CeedElemRestrictionDestroy(&blk_rstr); CeedChkBackend(ierr);
7191d102b48SJeremy L Thompson 
720e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
72189c6efa4Sjeremylt }
72289c6efa4Sjeremylt 
723f10650afSjeremylt //------------------------------------------------------------------------------
724f10650afSjeremylt // Operator Destroy
725f10650afSjeremylt //------------------------------------------------------------------------------
726f10650afSjeremylt static int CeedOperatorDestroy_Opt(CeedOperator op) {
727f10650afSjeremylt   int ierr;
728f10650afSjeremylt   CeedOperator_Opt *impl;
729e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
730f10650afSjeremylt 
731*d1d35e2fSjeremylt   for (CeedInt i=0; i<impl->num_e_vecs_in+impl->num_e_vecs_out; i++) {
732*d1d35e2fSjeremylt     ierr = CeedElemRestrictionDestroy(&impl->blk_restr[i]); CeedChkBackend(ierr);
733*d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs[i]); CeedChkBackend(ierr);
734f10650afSjeremylt   }
735*d1d35e2fSjeremylt   ierr = CeedFree(&impl->blk_restr); CeedChkBackend(ierr);
736*d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs); CeedChkBackend(ierr);
737*d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_data); CeedChkBackend(ierr);
738*d1d35e2fSjeremylt   ierr = CeedFree(&impl->input_state); CeedChkBackend(ierr);
739f10650afSjeremylt 
740*d1d35e2fSjeremylt   for (CeedInt i=0; i<impl->num_e_vecs_in; i++) {
741*d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_in[i]); CeedChkBackend(ierr);
742*d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_in[i]); CeedChkBackend(ierr);
743f10650afSjeremylt   }
744*d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_in); CeedChkBackend(ierr);
745*d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_in); CeedChkBackend(ierr);
746f10650afSjeremylt 
747*d1d35e2fSjeremylt   for (CeedInt i=0; i<impl->num_e_vecs_out; i++) {
748*d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->e_vecs_out[i]); CeedChkBackend(ierr);
749*d1d35e2fSjeremylt     ierr = CeedVectorDestroy(&impl->q_vecs_out[i]); CeedChkBackend(ierr);
750f10650afSjeremylt   }
751*d1d35e2fSjeremylt   ierr = CeedFree(&impl->e_vecs_out); CeedChkBackend(ierr);
752*d1d35e2fSjeremylt   ierr = CeedFree(&impl->q_vecs_out); CeedChkBackend(ierr);
753f10650afSjeremylt 
754e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
755e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
756f10650afSjeremylt }
757f10650afSjeremylt 
758f10650afSjeremylt //------------------------------------------------------------------------------
759f10650afSjeremylt // Operator Create
760f10650afSjeremylt //------------------------------------------------------------------------------
76189c6efa4Sjeremylt int CeedOperatorCreate_Opt(CeedOperator op) {
76289c6efa4Sjeremylt   int ierr;
76389c6efa4Sjeremylt   Ceed ceed;
764e15f9bd0SJeremy L Thompson   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
765*d1d35e2fSjeremylt   Ceed_Opt *ceed_impl;
766*d1d35e2fSjeremylt   ierr = CeedGetData(ceed, &ceed_impl); CeedChkBackend(ierr);
767*d1d35e2fSjeremylt   CeedInt blk_size = ceed_impl->blk_size;
76889c6efa4Sjeremylt   CeedOperator_Opt *impl;
76989c6efa4Sjeremylt 
770e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
771e15f9bd0SJeremy L Thompson   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
77289c6efa4Sjeremylt 
773*d1d35e2fSjeremylt   if (blk_size != 1 && blk_size != 8)
77482946b17Sjeremylt     // LCOV_EXCL_START
775e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
776*d1d35e2fSjeremylt                      "Opt backend cannot use blocksize: %d", blk_size);
77782946b17Sjeremylt   // LCOV_EXCL_STOP
77882946b17Sjeremylt 
77980ac2e43SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
78080ac2e43SJeremy L Thompson                                 CeedOperatorLinearAssembleQFunction_Opt);
781e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
782cae8b89aSjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
783e15f9bd0SJeremy L Thompson                                 CeedOperatorApplyAdd_Opt); CeedChkBackend(ierr);
78489c6efa4Sjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
785e15f9bd0SJeremy L Thompson                                 CeedOperatorDestroy_Opt); CeedChkBackend(ierr);
786e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
78789c6efa4Sjeremylt }
788f10650afSjeremylt //------------------------------------------------------------------------------
789