xref: /libCEED/backends/opt/ceed-opt-operator.c (revision 89c6efa49859cd40e0a2f99b4a91b922f3b4c9b2)
1*89c6efa4Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2*89c6efa4Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3*89c6efa4Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
4*89c6efa4Sjeremylt //
5*89c6efa4Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
6*89c6efa4Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
7*89c6efa4Sjeremylt // element discretizations for exascale applications. For more information and
8*89c6efa4Sjeremylt // source code availability see http://github.com/ceed.
9*89c6efa4Sjeremylt //
10*89c6efa4Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11*89c6efa4Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
12*89c6efa4Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
13*89c6efa4Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
14*89c6efa4Sjeremylt // software, applications, hardware, advanced system engineering and early
15*89c6efa4Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
16*89c6efa4Sjeremylt 
17*89c6efa4Sjeremylt #include <string.h>
18*89c6efa4Sjeremylt #include "ceed-opt.h"
19*89c6efa4Sjeremylt #include "../ref/ceed-ref.h"
20*89c6efa4Sjeremylt 
21*89c6efa4Sjeremylt static int CeedOperatorDestroy_Opt(CeedOperator op) {
22*89c6efa4Sjeremylt   int ierr;
23*89c6efa4Sjeremylt   CeedOperator_Opt *impl;
24*89c6efa4Sjeremylt   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
25*89c6efa4Sjeremylt 
26*89c6efa4Sjeremylt   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
27*89c6efa4Sjeremylt     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
28*89c6efa4Sjeremylt     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
29*89c6efa4Sjeremylt   }
30*89c6efa4Sjeremylt   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
31*89c6efa4Sjeremylt   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
32*89c6efa4Sjeremylt   ierr = CeedFree(&impl->edata); CeedChk(ierr);
33*89c6efa4Sjeremylt   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
34*89c6efa4Sjeremylt 
35*89c6efa4Sjeremylt   for (CeedInt i=0; i<impl->numein; i++) {
36*89c6efa4Sjeremylt     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
37*89c6efa4Sjeremylt     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
38*89c6efa4Sjeremylt   }
39*89c6efa4Sjeremylt   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
40*89c6efa4Sjeremylt   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
41*89c6efa4Sjeremylt 
42*89c6efa4Sjeremylt   for (CeedInt i=0; i<impl->numeout; i++) {
43*89c6efa4Sjeremylt     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
44*89c6efa4Sjeremylt     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
45*89c6efa4Sjeremylt   }
46*89c6efa4Sjeremylt   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
47*89c6efa4Sjeremylt   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
48*89c6efa4Sjeremylt 
49*89c6efa4Sjeremylt   ierr = CeedFree(&impl); CeedChk(ierr);
50*89c6efa4Sjeremylt   return 0;
51*89c6efa4Sjeremylt }
52*89c6efa4Sjeremylt 
53*89c6efa4Sjeremylt /*
54*89c6efa4Sjeremylt   Setup infields or outfields
55*89c6efa4Sjeremylt  */
56*89c6efa4Sjeremylt static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
57*89c6efa4Sjeremylt     bool inOrOut, const CeedInt blksize,
58*89c6efa4Sjeremylt     CeedElemRestriction *blkrestr,
59*89c6efa4Sjeremylt     CeedVector *fullevecs, CeedVector *evecs,
60*89c6efa4Sjeremylt     CeedVector *qvecs, CeedInt starte,
61*89c6efa4Sjeremylt     CeedInt numfields, CeedInt Q) {
62*89c6efa4Sjeremylt   CeedInt dim, ierr, ncomp, P;
63*89c6efa4Sjeremylt   Ceed ceed;
64*89c6efa4Sjeremylt   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
65*89c6efa4Sjeremylt   CeedBasis basis;
66*89c6efa4Sjeremylt   CeedElemRestriction r;
67*89c6efa4Sjeremylt   CeedOperatorField *opfields;
68*89c6efa4Sjeremylt   CeedQFunctionField *qffields;
69*89c6efa4Sjeremylt   if (inOrOut) {
70*89c6efa4Sjeremylt     ierr = CeedOperatorGetFields(op, NULL, &opfields);
71*89c6efa4Sjeremylt     CeedChk(ierr);
72*89c6efa4Sjeremylt     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
73*89c6efa4Sjeremylt     CeedChk(ierr);
74*89c6efa4Sjeremylt   } else {
75*89c6efa4Sjeremylt     ierr = CeedOperatorGetFields(op, &opfields, NULL);
76*89c6efa4Sjeremylt     CeedChk(ierr);
77*89c6efa4Sjeremylt     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
78*89c6efa4Sjeremylt     CeedChk(ierr);
79*89c6efa4Sjeremylt   }
80*89c6efa4Sjeremylt 
81*89c6efa4Sjeremylt   // Loop over fields
82*89c6efa4Sjeremylt   for (CeedInt i=0; i<numfields; i++) {
83*89c6efa4Sjeremylt     CeedEvalMode emode;
84*89c6efa4Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
85*89c6efa4Sjeremylt 
86*89c6efa4Sjeremylt     if (emode != CEED_EVAL_WEIGHT) {
87*89c6efa4Sjeremylt       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
88*89c6efa4Sjeremylt       CeedChk(ierr);
89*89c6efa4Sjeremylt       CeedElemRestriction_Ref *data;
90*89c6efa4Sjeremylt       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
91*89c6efa4Sjeremylt       Ceed ceed;
92*89c6efa4Sjeremylt       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
93*89c6efa4Sjeremylt       CeedInt nelem, elemsize, ndof;
94*89c6efa4Sjeremylt       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
95*89c6efa4Sjeremylt       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
96*89c6efa4Sjeremylt       ierr = CeedElemRestrictionGetNumDoF(r, &ndof); CeedChk(ierr);
97*89c6efa4Sjeremylt       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
98*89c6efa4Sjeremylt       ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
99*89c6efa4Sjeremylt                                               blksize, ndof, ncomp,
100*89c6efa4Sjeremylt                                               CEED_MEM_HOST, CEED_COPY_VALUES,
101*89c6efa4Sjeremylt                                               data->indices, &blkrestr[i+starte]);
102*89c6efa4Sjeremylt       CeedChk(ierr);
103*89c6efa4Sjeremylt       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
104*89c6efa4Sjeremylt                                              &fullevecs[i+starte]);
105*89c6efa4Sjeremylt       CeedChk(ierr);
106*89c6efa4Sjeremylt     }
107*89c6efa4Sjeremylt 
108*89c6efa4Sjeremylt     switch(emode) {
109*89c6efa4Sjeremylt     case CEED_EVAL_NONE:
110*89c6efa4Sjeremylt       ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp);
111*89c6efa4Sjeremylt       CeedChk(ierr);
112*89c6efa4Sjeremylt       ierr = CeedVectorCreate(ceed, Q*ncomp*blksize, &evecs[i]); CeedChk(ierr);
113*89c6efa4Sjeremylt       ierr = CeedVectorCreate(ceed, Q*ncomp*blksize, &qvecs[i]); CeedChk(ierr);
114*89c6efa4Sjeremylt       break;
115*89c6efa4Sjeremylt     case CEED_EVAL_INTERP:
116*89c6efa4Sjeremylt       ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp);
117*89c6efa4Sjeremylt       CeedChk(ierr);
118*89c6efa4Sjeremylt       ierr = CeedElemRestrictionGetElementSize(r, &P);
119*89c6efa4Sjeremylt       CeedChk(ierr);
120*89c6efa4Sjeremylt       ierr = CeedVectorCreate(ceed, P*ncomp*blksize, &evecs[i]); CeedChk(ierr);
121*89c6efa4Sjeremylt       ierr = CeedVectorCreate(ceed, Q*ncomp*blksize, &qvecs[i]); CeedChk(ierr);
122*89c6efa4Sjeremylt       break;
123*89c6efa4Sjeremylt     case CEED_EVAL_GRAD:
124*89c6efa4Sjeremylt       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
125*89c6efa4Sjeremylt       ierr = CeedQFunctionFieldGetNumComponents(qffields[i], &ncomp);
126*89c6efa4Sjeremylt       CeedChk(ierr);
127*89c6efa4Sjeremylt       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
128*89c6efa4Sjeremylt       ierr = CeedElemRestrictionGetElementSize(r, &P);
129*89c6efa4Sjeremylt       CeedChk(ierr);
130*89c6efa4Sjeremylt       ierr = CeedVectorCreate(ceed, P*ncomp*blksize, &evecs[i]); CeedChk(ierr);
131*89c6efa4Sjeremylt       ierr = CeedVectorCreate(ceed, Q*ncomp*dim*blksize, &qvecs[i]); CeedChk(ierr);
132*89c6efa4Sjeremylt       break;
133*89c6efa4Sjeremylt     case CEED_EVAL_WEIGHT: // Only on input fields
134*89c6efa4Sjeremylt       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
135*89c6efa4Sjeremylt       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
136*89c6efa4Sjeremylt       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
137*89c6efa4Sjeremylt                             CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChk(ierr);
138*89c6efa4Sjeremylt 
139*89c6efa4Sjeremylt       break;
140*89c6efa4Sjeremylt     case CEED_EVAL_DIV:
141*89c6efa4Sjeremylt       break; // Not implimented
142*89c6efa4Sjeremylt     case CEED_EVAL_CURL:
143*89c6efa4Sjeremylt       break; // Not implimented
144*89c6efa4Sjeremylt     }
145*89c6efa4Sjeremylt   }
146*89c6efa4Sjeremylt   return 0;
147*89c6efa4Sjeremylt }
148*89c6efa4Sjeremylt 
149*89c6efa4Sjeremylt /*
150*89c6efa4Sjeremylt   CeedOperator needs to connect all the named fields (be they active or passive)
151*89c6efa4Sjeremylt   to the named inputs and outputs of its CeedQFunction.
152*89c6efa4Sjeremylt  */
153*89c6efa4Sjeremylt static int CeedOperatorSetup_Opt(CeedOperator op) {
154*89c6efa4Sjeremylt   int ierr;
155*89c6efa4Sjeremylt   bool setupdone;
156*89c6efa4Sjeremylt   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
157*89c6efa4Sjeremylt   if (setupdone) return 0;
158*89c6efa4Sjeremylt   Ceed ceed;
159*89c6efa4Sjeremylt   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
160*89c6efa4Sjeremylt   Ceed_Opt *ceedimpl;
161*89c6efa4Sjeremylt   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
162*89c6efa4Sjeremylt   const CeedInt blksize = ceedimpl->blksize;
163*89c6efa4Sjeremylt   CeedOperator_Opt *impl;
164*89c6efa4Sjeremylt   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
165*89c6efa4Sjeremylt   CeedQFunction qf;
166*89c6efa4Sjeremylt   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
167*89c6efa4Sjeremylt   CeedInt Q, numinputfields, numoutputfields;
168*89c6efa4Sjeremylt   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
169*89c6efa4Sjeremylt   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
170*89c6efa4Sjeremylt   CeedChk(ierr);
171*89c6efa4Sjeremylt   CeedOperatorField *opinputfields, *opoutputfields;
172*89c6efa4Sjeremylt   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
173*89c6efa4Sjeremylt   CeedChk(ierr);
174*89c6efa4Sjeremylt   CeedQFunctionField *qfinputfields, *qfoutputfields;
175*89c6efa4Sjeremylt   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
176*89c6efa4Sjeremylt   CeedChk(ierr);
177*89c6efa4Sjeremylt 
178*89c6efa4Sjeremylt   // Allocate
179*89c6efa4Sjeremylt   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
180*89c6efa4Sjeremylt   CeedChk(ierr);
181*89c6efa4Sjeremylt   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
182*89c6efa4Sjeremylt   CeedChk(ierr);
183*89c6efa4Sjeremylt   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
184*89c6efa4Sjeremylt   CeedChk(ierr);
185*89c6efa4Sjeremylt 
186*89c6efa4Sjeremylt   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
187*89c6efa4Sjeremylt   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
188*89c6efa4Sjeremylt   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
189*89c6efa4Sjeremylt   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
190*89c6efa4Sjeremylt   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
191*89c6efa4Sjeremylt 
192*89c6efa4Sjeremylt   impl->numein = numinputfields; impl->numeout = numoutputfields;
193*89c6efa4Sjeremylt 
194*89c6efa4Sjeremylt   // Set up infield and outfield pointer arrays
195*89c6efa4Sjeremylt   // Infields
196*89c6efa4Sjeremylt   ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr,
197*89c6efa4Sjeremylt                                      impl->evecs, impl->evecsin,
198*89c6efa4Sjeremylt                                      impl->qvecsin, 0,
199*89c6efa4Sjeremylt                                      numinputfields, Q);
200*89c6efa4Sjeremylt   CeedChk(ierr);
201*89c6efa4Sjeremylt   // Outfields
202*89c6efa4Sjeremylt   ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr,
203*89c6efa4Sjeremylt                                      impl->evecs, impl->evecsout,
204*89c6efa4Sjeremylt                                      impl->qvecsout, numinputfields,
205*89c6efa4Sjeremylt                                      numoutputfields, Q);
206*89c6efa4Sjeremylt   CeedChk(ierr);
207*89c6efa4Sjeremylt 
208*89c6efa4Sjeremylt   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
209*89c6efa4Sjeremylt 
210*89c6efa4Sjeremylt   return 0;
211*89c6efa4Sjeremylt }
212*89c6efa4Sjeremylt 
213*89c6efa4Sjeremylt static inline int CeedOperatorApply_Opt(CeedOperator op,
214*89c6efa4Sjeremylt                                         const CeedInt blksize, CeedVector invec,
215*89c6efa4Sjeremylt                                         CeedVector outvec,
216*89c6efa4Sjeremylt                                         CeedRequest *request) {
217*89c6efa4Sjeremylt   int ierr;
218*89c6efa4Sjeremylt   CeedOperator_Opt *impl;
219*89c6efa4Sjeremylt   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
220*89c6efa4Sjeremylt   CeedInt Q, elemsize, numinputfields, numoutputfields, numelements, ncomp;
221*89c6efa4Sjeremylt   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
222*89c6efa4Sjeremylt   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
223*89c6efa4Sjeremylt   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
224*89c6efa4Sjeremylt   CeedQFunction qf;
225*89c6efa4Sjeremylt   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
226*89c6efa4Sjeremylt   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
227*89c6efa4Sjeremylt   CeedChk(ierr);
228*89c6efa4Sjeremylt   CeedTransposeMode lmode;
229*89c6efa4Sjeremylt   CeedOperatorField *opinputfields, *opoutputfields;
230*89c6efa4Sjeremylt   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
231*89c6efa4Sjeremylt   CeedChk(ierr);
232*89c6efa4Sjeremylt   CeedQFunctionField *qfinputfields, *qfoutputfields;
233*89c6efa4Sjeremylt   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
234*89c6efa4Sjeremylt   CeedChk(ierr);
235*89c6efa4Sjeremylt   CeedEvalMode emode;
236*89c6efa4Sjeremylt   CeedVector vec;
237*89c6efa4Sjeremylt   CeedBasis basis;
238*89c6efa4Sjeremylt   CeedElemRestriction Erestrict;
239*89c6efa4Sjeremylt   uint64_t state;
240*89c6efa4Sjeremylt 
241*89c6efa4Sjeremylt   // Setup
242*89c6efa4Sjeremylt   ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr);
243*89c6efa4Sjeremylt 
244*89c6efa4Sjeremylt   // Input Evecs and Restriction
245*89c6efa4Sjeremylt   for (CeedInt i=0; i<numinputfields; i++) {
246*89c6efa4Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
247*89c6efa4Sjeremylt     CeedChk(ierr);
248*89c6efa4Sjeremylt     if (emode == CEED_EVAL_WEIGHT) { // Skip
249*89c6efa4Sjeremylt     } else {
250*89c6efa4Sjeremylt       // Get input vector
251*89c6efa4Sjeremylt       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
252*89c6efa4Sjeremylt       if (vec != CEED_VECTOR_ACTIVE) {
253*89c6efa4Sjeremylt         // Restrict
254*89c6efa4Sjeremylt         ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
255*89c6efa4Sjeremylt         if (state != impl->inputstate[i]) {
256*89c6efa4Sjeremylt           ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
257*89c6efa4Sjeremylt           CeedChk(ierr);
258*89c6efa4Sjeremylt           ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
259*89c6efa4Sjeremylt                                           lmode, vec, impl->evecs[i], request);
260*89c6efa4Sjeremylt           CeedChk(ierr);
261*89c6efa4Sjeremylt           impl->inputstate[i] = state;
262*89c6efa4Sjeremylt         }
263*89c6efa4Sjeremylt       } else {
264*89c6efa4Sjeremylt         // Set Qvec for CEED_EVAL_NONE
265*89c6efa4Sjeremylt         if (emode == CEED_EVAL_NONE) {
266*89c6efa4Sjeremylt           ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST,
267*89c6efa4Sjeremylt                                     &impl->edata[i]); CeedChk(ierr);
268*89c6efa4Sjeremylt           ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
269*89c6efa4Sjeremylt                                     CEED_USE_POINTER,
270*89c6efa4Sjeremylt                                     impl->edata[i]); CeedChk(ierr);
271*89c6efa4Sjeremylt           ierr = CeedVectorRestoreArray(impl->evecsin[i],
272*89c6efa4Sjeremylt                                         &impl->edata[i]); CeedChk(ierr);
273*89c6efa4Sjeremylt         }
274*89c6efa4Sjeremylt       }
275*89c6efa4Sjeremylt       // Get evec
276*89c6efa4Sjeremylt       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
277*89c6efa4Sjeremylt                                     (const CeedScalar **) &impl->edata[i]);
278*89c6efa4Sjeremylt       CeedChk(ierr);
279*89c6efa4Sjeremylt     }
280*89c6efa4Sjeremylt   }
281*89c6efa4Sjeremylt 
282*89c6efa4Sjeremylt   // Output Lvecs, Evecs, and Qvecs
283*89c6efa4Sjeremylt   for (CeedInt i=0; i<numoutputfields; i++) {
284*89c6efa4Sjeremylt     // Zero Lvecs
285*89c6efa4Sjeremylt     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
286*89c6efa4Sjeremylt     if (vec == CEED_VECTOR_ACTIVE) {
287*89c6efa4Sjeremylt       if (!impl->add) {
288*89c6efa4Sjeremylt         vec = outvec;
289*89c6efa4Sjeremylt         ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
290*89c6efa4Sjeremylt       }
291*89c6efa4Sjeremylt     } else {
292*89c6efa4Sjeremylt       ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
293*89c6efa4Sjeremylt     }
294*89c6efa4Sjeremylt     // Set Qvec if needed
295*89c6efa4Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
296*89c6efa4Sjeremylt     CeedChk(ierr);
297*89c6efa4Sjeremylt     if (emode == CEED_EVAL_NONE) {
298*89c6efa4Sjeremylt       // Set qvec to single block evec
299*89c6efa4Sjeremylt       ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST,
300*89c6efa4Sjeremylt                                 &impl->edata[i + numinputfields]);
301*89c6efa4Sjeremylt       CeedChk(ierr);
302*89c6efa4Sjeremylt       ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
303*89c6efa4Sjeremylt                                 CEED_USE_POINTER,
304*89c6efa4Sjeremylt                                 impl->edata[i + numinputfields]); CeedChk(ierr);
305*89c6efa4Sjeremylt       ierr = CeedVectorRestoreArray(impl->evecsout[i],
306*89c6efa4Sjeremylt                                     &impl->edata[i + numinputfields]);
307*89c6efa4Sjeremylt       CeedChk(ierr);
308*89c6efa4Sjeremylt     }
309*89c6efa4Sjeremylt   }
310*89c6efa4Sjeremylt   impl->add = false;
311*89c6efa4Sjeremylt 
312*89c6efa4Sjeremylt   // Loop through elements
313*89c6efa4Sjeremylt   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
314*89c6efa4Sjeremylt     // Input basis apply if needed
315*89c6efa4Sjeremylt     for (CeedInt i=0; i<numinputfields; i++) {
316*89c6efa4Sjeremylt       CeedInt activein = 0;
317*89c6efa4Sjeremylt       // Get elemsize, emode, ncomp
318*89c6efa4Sjeremylt       ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
319*89c6efa4Sjeremylt       CeedChk(ierr);
320*89c6efa4Sjeremylt       ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
321*89c6efa4Sjeremylt       CeedChk(ierr);
322*89c6efa4Sjeremylt       ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
323*89c6efa4Sjeremylt       CeedChk(ierr);
324*89c6efa4Sjeremylt       ierr = CeedQFunctionFieldGetNumComponents(qfinputfields[i], &ncomp);
325*89c6efa4Sjeremylt       CeedChk(ierr);
326*89c6efa4Sjeremylt       // Restrict block active input
327*89c6efa4Sjeremylt       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
328*89c6efa4Sjeremylt       if (vec == CEED_VECTOR_ACTIVE) {
329*89c6efa4Sjeremylt         ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
330*89c6efa4Sjeremylt         CeedChk(ierr);
331*89c6efa4Sjeremylt         ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize,
332*89c6efa4Sjeremylt                                              CEED_NOTRANSPOSE, lmode, invec,
333*89c6efa4Sjeremylt                                              impl->evecsin[i], request);
334*89c6efa4Sjeremylt         CeedChk(ierr);
335*89c6efa4Sjeremylt         activein = 1;
336*89c6efa4Sjeremylt       }
337*89c6efa4Sjeremylt       // Basis action
338*89c6efa4Sjeremylt       switch(emode) {
339*89c6efa4Sjeremylt       case CEED_EVAL_NONE:
340*89c6efa4Sjeremylt         if (!activein) {
341*89c6efa4Sjeremylt           ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
342*89c6efa4Sjeremylt                                     CEED_USE_POINTER,
343*89c6efa4Sjeremylt                                     &impl->edata[i][e*Q*ncomp]); CeedChk(ierr);
344*89c6efa4Sjeremylt         }
345*89c6efa4Sjeremylt         break;
346*89c6efa4Sjeremylt       case CEED_EVAL_INTERP:
347*89c6efa4Sjeremylt         ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
348*89c6efa4Sjeremylt         CeedChk(ierr);
349*89c6efa4Sjeremylt         if (!activein) {
350*89c6efa4Sjeremylt           ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
351*89c6efa4Sjeremylt                                     CEED_USE_POINTER,
352*89c6efa4Sjeremylt                                     &impl->edata[i][e*elemsize*ncomp]);
353*89c6efa4Sjeremylt           CeedChk(ierr);
354*89c6efa4Sjeremylt         }
355*89c6efa4Sjeremylt         ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
356*89c6efa4Sjeremylt                               CEED_EVAL_INTERP, impl->evecsin[i],
357*89c6efa4Sjeremylt                               impl->qvecsin[i]); CeedChk(ierr);
358*89c6efa4Sjeremylt         break;
359*89c6efa4Sjeremylt       case CEED_EVAL_GRAD:
360*89c6efa4Sjeremylt         ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
361*89c6efa4Sjeremylt         CeedChk(ierr);
362*89c6efa4Sjeremylt          if (!activein) {
363*89c6efa4Sjeremylt           ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
364*89c6efa4Sjeremylt                                     CEED_USE_POINTER,
365*89c6efa4Sjeremylt                                     &impl->edata[i][e*elemsize*ncomp]);
366*89c6efa4Sjeremylt           CeedChk(ierr);
367*89c6efa4Sjeremylt         }
368*89c6efa4Sjeremylt         ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
369*89c6efa4Sjeremylt                               CEED_EVAL_GRAD, impl->evecsin[i],
370*89c6efa4Sjeremylt                               impl->qvecsin[i]); CeedChk(ierr);
371*89c6efa4Sjeremylt         break;
372*89c6efa4Sjeremylt       case CEED_EVAL_WEIGHT:
373*89c6efa4Sjeremylt         break;  // No action
374*89c6efa4Sjeremylt       case CEED_EVAL_DIV:
375*89c6efa4Sjeremylt         break; // Not implimented
376*89c6efa4Sjeremylt       case CEED_EVAL_CURL:
377*89c6efa4Sjeremylt         break; // Not implimented
378*89c6efa4Sjeremylt       }
379*89c6efa4Sjeremylt     }
380*89c6efa4Sjeremylt 
381*89c6efa4Sjeremylt     // Q function
382*89c6efa4Sjeremylt     ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
383*89c6efa4Sjeremylt     CeedChk(ierr);
384*89c6efa4Sjeremylt 
385*89c6efa4Sjeremylt     // Output basis apply and restrict
386*89c6efa4Sjeremylt     for (CeedInt i=0; i<numoutputfields; i++) {
387*89c6efa4Sjeremylt       // Get elemsize, emode, ncomp
388*89c6efa4Sjeremylt       ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
389*89c6efa4Sjeremylt       CeedChk(ierr);
390*89c6efa4Sjeremylt       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
391*89c6efa4Sjeremylt       CeedChk(ierr);
392*89c6efa4Sjeremylt       // Basis action
393*89c6efa4Sjeremylt       switch(emode) {
394*89c6efa4Sjeremylt       case CEED_EVAL_NONE:
395*89c6efa4Sjeremylt         break; // No action
396*89c6efa4Sjeremylt       case CEED_EVAL_INTERP:
397*89c6efa4Sjeremylt         ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
398*89c6efa4Sjeremylt         CeedChk(ierr);
399*89c6efa4Sjeremylt         ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
400*89c6efa4Sjeremylt                               CEED_EVAL_INTERP, impl->qvecsout[i],
401*89c6efa4Sjeremylt                               impl->evecsout[i]); CeedChk(ierr);
402*89c6efa4Sjeremylt         break;
403*89c6efa4Sjeremylt       case CEED_EVAL_GRAD:
404*89c6efa4Sjeremylt         ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
405*89c6efa4Sjeremylt         CeedChk(ierr);
406*89c6efa4Sjeremylt         ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
407*89c6efa4Sjeremylt                               CEED_EVAL_GRAD, impl->qvecsout[i],
408*89c6efa4Sjeremylt                               impl->evecsout[i]); CeedChk(ierr);
409*89c6efa4Sjeremylt         break;
410*89c6efa4Sjeremylt       case CEED_EVAL_WEIGHT: {
411*89c6efa4Sjeremylt         Ceed ceed;
412*89c6efa4Sjeremylt         ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
413*89c6efa4Sjeremylt         return CeedError(ceed, 1,
414*89c6efa4Sjeremylt                          "CEED_EVAL_WEIGHT cannot be an output evaluation mode");
415*89c6efa4Sjeremylt         break; // Should not occur
416*89c6efa4Sjeremylt       }
417*89c6efa4Sjeremylt       case CEED_EVAL_DIV:
418*89c6efa4Sjeremylt         break; // Not implimented
419*89c6efa4Sjeremylt       case CEED_EVAL_CURL:
420*89c6efa4Sjeremylt         break; // Not implimented
421*89c6efa4Sjeremylt       }
422*89c6efa4Sjeremylt       // Restrict output block
423*89c6efa4Sjeremylt       // Get output vector
424*89c6efa4Sjeremylt       ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
425*89c6efa4Sjeremylt       if (vec == CEED_VECTOR_ACTIVE)
426*89c6efa4Sjeremylt         vec = outvec;
427*89c6efa4Sjeremylt       // Restrict
428*89c6efa4Sjeremylt       ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode);
429*89c6efa4Sjeremylt       CeedChk(ierr);
430*89c6efa4Sjeremylt       ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein],
431*89c6efa4Sjeremylt                                            e/blksize, CEED_TRANSPOSE,
432*89c6efa4Sjeremylt                                            lmode, impl->evecsout[i],
433*89c6efa4Sjeremylt                                            vec, request); CeedChk(ierr);
434*89c6efa4Sjeremylt     }
435*89c6efa4Sjeremylt   }
436*89c6efa4Sjeremylt 
437*89c6efa4Sjeremylt   // Restore input arrays
438*89c6efa4Sjeremylt   for (CeedInt i=0; i<numinputfields; i++) {
439*89c6efa4Sjeremylt     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
440*89c6efa4Sjeremylt     CeedChk(ierr);
441*89c6efa4Sjeremylt     if (emode == CEED_EVAL_WEIGHT) { // Skip
442*89c6efa4Sjeremylt     } else {
443*89c6efa4Sjeremylt       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
444*89c6efa4Sjeremylt                                         (const CeedScalar **) &impl->edata[i]);
445*89c6efa4Sjeremylt       CeedChk(ierr);
446*89c6efa4Sjeremylt     }
447*89c6efa4Sjeremylt   }
448*89c6efa4Sjeremylt 
449*89c6efa4Sjeremylt   return 0;
450*89c6efa4Sjeremylt }
451*89c6efa4Sjeremylt 
452*89c6efa4Sjeremylt int CeedOperatorApply_Opt_1(CeedOperator op, CeedVector invec,
453*89c6efa4Sjeremylt                             CeedVector outvec, CeedRequest *request) {
454*89c6efa4Sjeremylt   return CeedOperatorApply_Opt(op, 1, invec, outvec, request);
455*89c6efa4Sjeremylt }
456*89c6efa4Sjeremylt 
457*89c6efa4Sjeremylt int CeedOperatorApply_Opt_8(CeedOperator op, CeedVector invec,
458*89c6efa4Sjeremylt                             CeedVector outvec, CeedRequest *request) {
459*89c6efa4Sjeremylt   return CeedOperatorApply_Opt(op, 8, invec, outvec, request);
460*89c6efa4Sjeremylt }
461*89c6efa4Sjeremylt 
462*89c6efa4Sjeremylt int CeedOperatorCreate_Opt(CeedOperator op) {
463*89c6efa4Sjeremylt   int ierr;
464*89c6efa4Sjeremylt   Ceed ceed;
465*89c6efa4Sjeremylt   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
466*89c6efa4Sjeremylt   Ceed_Opt *ceedimpl;
467*89c6efa4Sjeremylt   ierr = CeedGetData(ceed, (void*)&ceedimpl); CeedChk(ierr);
468*89c6efa4Sjeremylt   CeedInt blksize = ceedimpl->blksize;
469*89c6efa4Sjeremylt   CeedOperator_Opt *impl;
470*89c6efa4Sjeremylt 
471*89c6efa4Sjeremylt   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
472*89c6efa4Sjeremylt   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
473*89c6efa4Sjeremylt 
474*89c6efa4Sjeremylt   if (blksize == 1) {
475*89c6efa4Sjeremylt     ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply",
476*89c6efa4Sjeremylt                                   CeedOperatorApply_Opt_1); CeedChk(ierr);
477*89c6efa4Sjeremylt   } else if (blksize == 8) {
478*89c6efa4Sjeremylt     ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply",
479*89c6efa4Sjeremylt                                   CeedOperatorApply_Opt_8); CeedChk(ierr);
480*89c6efa4Sjeremylt   } else {
481*89c6efa4Sjeremylt     return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize);
482*89c6efa4Sjeremylt   }
483*89c6efa4Sjeremylt 
484*89c6efa4Sjeremylt   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
485*89c6efa4Sjeremylt                                 CeedOperatorDestroy_Opt); CeedChk(ierr);
486*89c6efa4Sjeremylt   return 0;
487*89c6efa4Sjeremylt }
488