xref: /libCEED/backends/opt/ceed-opt-operator.c (revision dc7d240c1c3ecbc40e893ea38e37c8e19d48593c)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <string.h>
18 #include "ceed-opt.h"
19 #include "../ref/ceed-ref.h"
20 
21 static int CeedOperatorDestroy_Opt(CeedOperator op) {
22   int ierr;
23   CeedOperator_Opt *impl;
24   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
25 
26   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
27     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
28     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
29   }
30   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
31   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
32   ierr = CeedFree(&impl->edata); CeedChk(ierr);
33   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
34 
35   for (CeedInt i=0; i<impl->numein; i++) {
36     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
37     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
38   }
39   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
40   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
41 
42   for (CeedInt i=0; i<impl->numeout; i++) {
43     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
44     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
45   }
46   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
47   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
48 
49   ierr = CeedFree(&impl); CeedChk(ierr);
50   return 0;
51 }
52 
53 /*
54   Setup infields or outfields
55  */
56 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
57                                        bool inOrOut, const CeedInt blksize,
58                                        CeedElemRestriction *blkrestr,
59                                        CeedVector *fullevecs, CeedVector *evecs,
60                                        CeedVector *qvecs, CeedInt starte,
61                                        CeedInt numfields, CeedInt Q) {
62   CeedInt dim, ierr, ncomp, size, P;
63   Ceed ceed;
64   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
65   CeedBasis basis;
66   CeedElemRestriction r;
67   CeedOperatorField *opfields;
68   CeedQFunctionField *qffields;
69   if (inOrOut) {
70     ierr = CeedOperatorGetFields(op, NULL, &opfields);
71     CeedChk(ierr);
72     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
73     CeedChk(ierr);
74   } else {
75     ierr = CeedOperatorGetFields(op, &opfields, NULL);
76     CeedChk(ierr);
77     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
78     CeedChk(ierr);
79   }
80 
81   // Loop over fields
82   for (CeedInt i=0; i<numfields; i++) {
83     CeedEvalMode emode;
84     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
85 
86     if (emode != CEED_EVAL_WEIGHT) {
87       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
88       CeedChk(ierr);
89       CeedElemRestriction_Ref *data;
90       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
91       Ceed ceed;
92       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
93       CeedInt nelem, elemsize, nnodes;
94       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
95       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
96       ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
97       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
98       ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
99                                               blksize, nnodes, ncomp,
100                                               CEED_MEM_HOST, CEED_COPY_VALUES,
101                                               data->indices, &blkrestr[i+starte]);
102       CeedChk(ierr);
103       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
104                                              &fullevecs[i+starte]);
105       CeedChk(ierr);
106     }
107 
108     switch(emode) {
109     case CEED_EVAL_NONE:
110       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
111       ierr = CeedVectorCreate(ceed, Q*size*blksize, &evecs[i]); CeedChk(ierr);
112       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
113       break;
114     case CEED_EVAL_INTERP:
115       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
116       ierr = CeedElemRestrictionGetElementSize(r, &P);
117       CeedChk(ierr);
118       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
119       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
120       break;
121     case CEED_EVAL_GRAD:
122       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
123       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
124       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
125       ierr = CeedElemRestrictionGetElementSize(r, &P);
126       CeedChk(ierr);
127       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
128       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
129       break;
130     case CEED_EVAL_WEIGHT: // Only on input fields
131       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
132       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
133       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
134                             CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChk(ierr);
135 
136       break;
137     case CEED_EVAL_DIV:
138       break; // Not implimented
139     case CEED_EVAL_CURL:
140       break; // Not implimented
141     }
142   }
143   return 0;
144 }
145 
146 /*
147   CeedOperator needs to connect all the named fields (be they active or passive)
148   to the named inputs and outputs of its CeedQFunction.
149  */
150 static int CeedOperatorSetup_Opt(CeedOperator op) {
151   int ierr;
152   bool setupdone;
153   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
154   if (setupdone) return 0;
155   Ceed ceed;
156   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
157   Ceed_Opt *ceedimpl;
158   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
159   const CeedInt blksize = ceedimpl->blksize;
160   CeedOperator_Opt *impl;
161   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
162   CeedQFunction qf;
163   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
164   CeedInt Q, numinputfields, numoutputfields;
165   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
166   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
167   CeedChk(ierr);
168   CeedOperatorField *opinputfields, *opoutputfields;
169   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
170   CeedChk(ierr);
171   CeedQFunctionField *qfinputfields, *qfoutputfields;
172   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
173   CeedChk(ierr);
174 
175   // Allocate
176   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
177   CeedChk(ierr);
178   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
179   CeedChk(ierr);
180   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
181   CeedChk(ierr);
182 
183   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
184   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
185   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
186   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
187   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
188 
189   impl->numein = numinputfields; impl->numeout = numoutputfields;
190 
191   // Set up infield and outfield pointer arrays
192   // Infields
193   ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr,
194                                      impl->evecs, impl->evecsin,
195                                      impl->qvecsin, 0,
196                                      numinputfields, Q);
197   CeedChk(ierr);
198   // Outfields
199   ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr,
200                                      impl->evecs, impl->evecsout,
201                                      impl->qvecsout, numinputfields,
202                                      numoutputfields, Q);
203   CeedChk(ierr);
204 
205   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
206 
207   return 0;
208 }
209 
210 
211 // Setup Input fields
212 static inline int CeedOperatorSetupInputs_Opt(CeedInt numinputfields,
213     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
214     CeedVector invec, CeedOperator_Opt *impl, CeedRequest *request) {
215   CeedInt ierr;
216   CeedEvalMode emode;
217   CeedVector vec;
218   CeedTransposeMode lmode;
219   uint64_t state;
220 
221   for (CeedInt i=0; i<numinputfields; i++) {
222     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
223     CeedChk(ierr);
224     if (emode == CEED_EVAL_WEIGHT) { // Skip
225     } else {
226       // Get input vector
227       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
228       if (vec != CEED_VECTOR_ACTIVE) {
229         // Restrict
230         ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
231         if (state != impl->inputstate[i]) {
232           ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
233           CeedChk(ierr);
234           ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
235                                           lmode, vec, impl->evecs[i], request);
236           CeedChk(ierr);
237           impl->inputstate[i] = state;
238         }
239       } else {
240         // Set Qvec for CEED_EVAL_NONE
241         if (emode == CEED_EVAL_NONE) {
242           ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST,
243                                     &impl->edata[i]); CeedChk(ierr);
244           ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
245                                     CEED_USE_POINTER,
246                                     impl->edata[i]); CeedChk(ierr);
247           ierr = CeedVectorRestoreArray(impl->evecsin[i],
248                                         &impl->edata[i]); CeedChk(ierr);
249         }
250       }
251       // Get evec
252       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
253                                     (const CeedScalar **) &impl->edata[i]);
254       CeedChk(ierr);
255     }
256   }
257   return 0;
258 }
259 
260 // Input basis action
261 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
262     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
263     CeedInt numinputfields, CeedInt blksize, CeedVector invec, bool skipactive,
264     CeedOperator_Opt *impl, CeedRequest *request) {
265   CeedInt ierr;
266   CeedInt dim, elemsize, size;
267   CeedElemRestriction Erestrict;
268   CeedEvalMode emode;
269   CeedBasis basis;
270   CeedVector vec;
271   CeedTransposeMode lmode;
272 
273   for (CeedInt i=0; i<numinputfields; i++) {
274     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
275     // Skip active input
276     if (skipactive) {
277       if (vec == CEED_VECTOR_ACTIVE)
278         continue;
279     }
280 
281     CeedInt activein = 0;
282     // Get elemsize, emode, size
283     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
284     CeedChk(ierr);
285     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
286     CeedChk(ierr);
287     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
288     CeedChk(ierr);
289     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
290     // Restrict block active input
291     if (vec == CEED_VECTOR_ACTIVE) {
292       ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
293       CeedChk(ierr);
294       ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize,
295                                            CEED_NOTRANSPOSE, lmode, invec,
296                                            impl->evecsin[i], request);
297       CeedChk(ierr);
298       activein = 1;
299     }
300     // Basis action
301     switch(emode) {
302     case CEED_EVAL_NONE:
303       if (!activein) {
304         ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
305                                   CEED_USE_POINTER,
306                                   &impl->edata[i][e*Q*size]); CeedChk(ierr);
307       }
308       break;
309     case CEED_EVAL_INTERP:
310       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
311       CeedChk(ierr);
312       if (!activein) {
313         ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
314                                   CEED_USE_POINTER,
315                                   &impl->edata[i][e*elemsize*size]);
316         CeedChk(ierr);
317       }
318       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
319                             CEED_EVAL_INTERP, impl->evecsin[i],
320                             impl->qvecsin[i]); CeedChk(ierr);
321       break;
322     case CEED_EVAL_GRAD:
323       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
324       CeedChk(ierr);
325       if (!activein) {
326         ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
327         ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
328                                   CEED_USE_POINTER,
329                                   &impl->edata[i][e*elemsize*size/dim]);
330         CeedChk(ierr);
331       }
332       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
333                             CEED_EVAL_GRAD, impl->evecsin[i],
334                             impl->qvecsin[i]); CeedChk(ierr);
335       break;
336     case CEED_EVAL_WEIGHT:
337       break;  // No action
338     case CEED_EVAL_DIV:
339     case CEED_EVAL_CURL: {
340       // LCOV_EXCL_START
341       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
342       CeedChk(ierr);
343       Ceed ceed;
344       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
345       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
346       // LCOV_EXCL_STOP
347       break; // Not implemented
348     }
349     }
350   }
351   return 0;
352 }
353 
354 // Output basis action
355 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
356     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
357     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
358     CeedOperator op, CeedVector outvec, CeedOperator_Opt *impl,
359     CeedRequest *request) {
360   CeedInt ierr;
361   CeedElemRestriction Erestrict;
362   CeedEvalMode emode;
363   CeedBasis basis;
364   CeedVector vec;
365   CeedTransposeMode lmode;
366 
367   for (CeedInt i=0; i<numoutputfields; i++) {
368     // Get elemsize, emode, size
369     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
370     CeedChk(ierr);
371     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
372     CeedChk(ierr);
373     // Basis action
374     switch(emode) {
375     case CEED_EVAL_NONE:
376       break; // No action
377     case CEED_EVAL_INTERP:
378       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
379       CeedChk(ierr);
380       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
381                             CEED_EVAL_INTERP, impl->qvecsout[i],
382                             impl->evecsout[i]); CeedChk(ierr);
383       break;
384     case CEED_EVAL_GRAD:
385       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
386       CeedChk(ierr);
387       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
388                             CEED_EVAL_GRAD, impl->qvecsout[i],
389                             impl->evecsout[i]); CeedChk(ierr);
390       break;
391     case CEED_EVAL_WEIGHT: {
392       // LCOV_EXCL_START
393       Ceed ceed;
394       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
395       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
396                        "evaluation mode");
397       // LCOV_EXCL_STOP
398       break; // Should not occur
399     }
400     case CEED_EVAL_DIV:
401     case CEED_EVAL_CURL: {
402       // LCOV_EXCL_START
403       Ceed ceed;
404       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
405       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
406       // LCOV_EXCL_STOP
407       break; // Not implemented
408     }
409     }
410     // Restrict output block
411     // Get output vector
412     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
413     if (vec == CEED_VECTOR_ACTIVE)
414       vec = outvec;
415     // Restrict
416     ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode);
417     CeedChk(ierr);
418     ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein],
419                                          e/blksize, CEED_TRANSPOSE,
420                                          lmode, impl->evecsout[i],
421                                          vec, request); CeedChk(ierr);
422   }
423   return 0;
424 }
425 
426 
427 
428 // Restore Inputs
429 static inline int CeedOperatorRestoreInputs_Opt(CeedInt numinputfields,
430     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
431     bool skipactive, CeedOperator_Opt *impl) {
432   CeedInt ierr;
433   CeedEvalMode emode;
434 
435   for (CeedInt i=0; i<numinputfields; i++) {
436     // Skip active inputs
437     if (skipactive) {
438       CeedVector vec;
439       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
440       if (vec == CEED_VECTOR_ACTIVE)
441         continue;
442     }
443     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
444     CeedChk(ierr);
445     if (emode == CEED_EVAL_WEIGHT) { // Skip
446     } else {
447       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
448                                         (const CeedScalar **) &impl->edata[i]);
449       CeedChk(ierr);
450     }
451   }
452   return 0;
453 }
454 
455 // Apply Ceed Operator
456 static int CeedOperatorApply_Opt(CeedOperator op, CeedVector invec,
457                                  CeedVector outvec, CeedRequest *request) {
458   int ierr;
459   Ceed ceed;
460   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
461   Ceed_Opt *ceedimpl;
462   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
463   CeedInt blksize = ceedimpl->blksize;
464   CeedOperator_Opt *impl;
465   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
466   CeedInt Q, numinputfields, numoutputfields, numelements;
467   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
468   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
469   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
470   CeedQFunction qf;
471   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
472   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
473   CeedChk(ierr);
474   CeedOperatorField *opinputfields, *opoutputfields;
475   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
476   CeedChk(ierr);
477   CeedQFunctionField *qfinputfields, *qfoutputfields;
478   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
479   CeedChk(ierr);
480   CeedEvalMode emode;
481   CeedVector vec;
482 
483   // Setup
484   ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr);
485 
486   // Input Evecs and Restriction
487   ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields,
488                                      opinputfields, invec, impl, request); CeedChk(ierr);
489 
490   // Output Lvecs, Evecs, and Qvecs
491   for (CeedInt i=0; i<numoutputfields; i++) {
492     // Zero Lvecs
493     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
494     if (vec == CEED_VECTOR_ACTIVE) {
495       if (!impl->add) {
496         vec = outvec;
497         ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
498       }
499     } else {
500       ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
501     }
502     // Set Qvec if needed
503     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
504     CeedChk(ierr);
505     if (emode == CEED_EVAL_NONE) {
506       // Set qvec to single block evec
507       ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST,
508                                 &impl->edata[i + numinputfields]);
509       CeedChk(ierr);
510       ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
511                                 CEED_USE_POINTER,
512                                 impl->edata[i + numinputfields]); CeedChk(ierr);
513       ierr = CeedVectorRestoreArray(impl->evecsout[i],
514                                     &impl->edata[i + numinputfields]);
515       CeedChk(ierr);
516     }
517   }
518   impl->add = false;
519 
520   // Loop through elements
521   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
522     // Input basis apply
523     ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields,
524                                       numinputfields, blksize, invec, false,
525                                       impl, request); CeedChk(ierr);
526 
527     // Q function
528     ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
529     CeedChk(ierr);
530 
531     // Output basis apply and restrict
532     ierr = CeedOperatorOutputBasis_Opt(e, Q, qfoutputfields, opoutputfields,
533                                        blksize, numinputfields, numoutputfields,
534                                        op, outvec, impl, request);
535     CeedChk(ierr);
536   }
537 
538   // Restore input arrays
539   ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields,
540                                        opinputfields, false, impl);
541   CeedChk(ierr);
542 
543   return 0;
544 }
545 
546 // Assemble Linear QFunction
547 static int CeedOperatorAssembleLinearQFunction_Opt(CeedOperator op,
548     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
549   int ierr;
550   Ceed ceed;
551   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
552   Ceed_Opt *ceedimpl;
553   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
554   const CeedInt blksize = ceedimpl->blksize;
555   CeedOperator_Opt *impl;
556   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
557   CeedInt Q, numinputfields, numoutputfields, numelements, size;
558   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
559   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
560   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
561   CeedQFunction qf;
562   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
563   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
564   CeedChk(ierr);
565   CeedOperatorField *opinputfields, *opoutputfields;
566   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
567   CeedChk(ierr);
568   CeedQFunctionField *qfinputfields, *qfoutputfields;
569   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
570   CeedChk(ierr);
571   CeedVector vec, lvec;
572   CeedInt numactivein = 0, numactiveout = 0;
573   CeedVector *activein = NULL;
574   CeedScalar *a, *tmp;
575 
576   // Setup
577   ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr);
578 
579   // Input Evecs and Restriction
580   ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields,
581                                      opinputfields, NULL, impl, request);
582   CeedChk(ierr);
583 
584   // Count number of active input fields
585   for (CeedInt i=0; i<numinputfields; i++) {
586     // Get input vector
587     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
588     // Check if active input
589     if (vec == CEED_VECTOR_ACTIVE) {
590       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
591       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
592       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
593       CeedChk(ierr);
594       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
595       for (CeedInt field=0; field<size; field++) {
596         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
597         CeedChk(ierr);
598         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
599                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
600         CeedChk(ierr);
601       }
602       numactivein += size;
603       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
604     }
605   }
606 
607   // Count number of active output fields
608   for (CeedInt i=0; i<numoutputfields; i++) {
609     // Get output vector
610     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
611     // Check if active output
612     if (vec == CEED_VECTOR_ACTIVE) {
613       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
614       numactiveout += size;
615     }
616   }
617 
618   // Check sizes
619   if (!numactivein || !numactiveout)
620     // LCOV_EXCL_START
621     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
622                      "and outputs");
623   // LCOV_EXCL_STOP
624 
625   // Setup lvec
626   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
627                           &lvec); CeedChk(ierr);
628   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
629 
630   // Create output restriction
631   ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q,
632          numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr);
633   // Create assembled vector
634   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
635                           assembled); CeedChk(ierr);
636 
637   // Loop through elements
638   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
639     // Input basis apply
640     ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields,
641                                       numinputfields, blksize, NULL, true,
642                                       impl, request); CeedChk(ierr);
643 
644     // Assemble QFunction
645     for (CeedInt in=0; in<numactivein; in++) {
646       // Set Inputs
647       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
648       if (numactivein > 1) {
649         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
650                                   0.0); CeedChk(ierr);
651       }
652       // Set Outputs
653       for (CeedInt out=0; out<numoutputfields; out++) {
654         // Get output vector
655         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
656         CeedChk(ierr);
657         // Check if active output
658         if (vec == CEED_VECTOR_ACTIVE) {
659           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
660                              CEED_USE_POINTER, a); CeedChk(ierr);
661           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
662           CeedChk(ierr);
663           a += size*Q*blksize; // Advance the pointer by the size of the output
664         }
665       }
666       // Apply QFunction
667       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
668       CeedChk(ierr);
669     }
670   }
671 
672   // Un-set output Qvecs to prevent accidental overwrite of Assembled
673   for (CeedInt out=0; out<numoutputfields; out++) {
674     // Get output vector
675     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
676     CeedChk(ierr);
677     // Check if active output
678     if (vec == CEED_VECTOR_ACTIVE) {
679       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
680                          NULL); CeedChk(ierr);
681     }
682   }
683 
684   // Restore input arrays
685   ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields,
686                                        opinputfields, true, impl);
687   CeedChk(ierr);
688 
689   // Output blocked restriction
690   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
691   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
692   CeedElemRestriction blkrstr;
693   ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize,
694                                           numelements*Q,
695                                           numactivein*numactiveout,
696                                           CEED_MEM_HOST, CEED_COPY_VALUES,
697                                           NULL, &blkrstr); CeedChk(ierr);
698   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE,
699                                   lvec, *assembled, request); CeedChk(ierr);
700 
701   // Cleanup
702   for (CeedInt i=0; i<numactivein; i++) {
703     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
704   }
705   ierr = CeedFree(&activein); CeedChk(ierr);
706   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
707   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
708 
709   return 0;
710 }
711 
712 int CeedOperatorCreate_Opt(CeedOperator op) {
713   int ierr;
714   Ceed ceed;
715   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
716   Ceed_Opt *ceedimpl;
717   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
718   CeedInt blksize = ceedimpl->blksize;
719   CeedOperator_Opt *impl;
720 
721   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
722   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
723 
724   if (blksize == 1 || blksize == 8) {
725     ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
726                                   CeedOperatorAssembleLinearQFunction_Opt);
727     CeedChk(ierr);
728     ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply",
729                                   CeedOperatorApply_Opt); CeedChk(ierr);
730   } else {
731     // LCOV_EXCL_START
732     return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize);
733     // LCOV_EXCL_STOP
734   }
735 
736   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
737                                 CeedOperatorDestroy_Opt); CeedChk(ierr);
738   return 0;
739 }
740