xref: /libCEED/backends/opt/ceed-opt-operator.c (revision 1ef3f58f08373cfc32eb1b7a8e437c0dd90bedcd)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <string.h>
18 #include "ceed-opt.h"
19 #include "../ref/ceed-ref.h"
20 
21 static int CeedOperatorDestroy_Opt(CeedOperator op) {
22   int ierr;
23   CeedOperator_Opt *impl;
24   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
25 
26   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
27     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
28     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
29   }
30   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
31   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
32   ierr = CeedFree(&impl->edata); CeedChk(ierr);
33   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
34 
35   for (CeedInt i=0; i<impl->numein; i++) {
36     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
37     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
38   }
39   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
40   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
41 
42   for (CeedInt i=0; i<impl->numeout; i++) {
43     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
44     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
45   }
46   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
47   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
48 
49   ierr = CeedFree(&impl); CeedChk(ierr);
50   return 0;
51 }
52 
53 /*
54   Setup infields or outfields
55  */
56 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
57                                        bool inOrOut, const CeedInt blksize,
58                                        CeedElemRestriction *blkrestr,
59                                        CeedVector *fullevecs, CeedVector *evecs,
60                                        CeedVector *qvecs, CeedInt starte,
61                                        CeedInt numfields, CeedInt Q) {
62   CeedInt dim, ierr, ncomp, size, P;
63   Ceed ceed;
64   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
65   CeedBasis basis;
66   CeedElemRestriction r;
67   CeedOperatorField *opfields;
68   CeedQFunctionField *qffields;
69   if (inOrOut) {
70     ierr = CeedOperatorGetFields(op, NULL, &opfields);
71     CeedChk(ierr);
72     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
73     CeedChk(ierr);
74   } else {
75     ierr = CeedOperatorGetFields(op, &opfields, NULL);
76     CeedChk(ierr);
77     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
78     CeedChk(ierr);
79   }
80 
81   // Loop over fields
82   for (CeedInt i=0; i<numfields; i++) {
83     CeedEvalMode emode;
84     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
85 
86     if (emode != CEED_EVAL_WEIGHT) {
87       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
88       CeedChk(ierr);
89       CeedElemRestriction_Ref *data;
90       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
91       Ceed ceed;
92       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
93       CeedInt nelem, elemsize, nnodes;
94       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
95       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
96       ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
97       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
98       ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
99                                               blksize, nnodes, ncomp,
100                                               CEED_MEM_HOST, CEED_COPY_VALUES,
101                                               data->indices, &blkrestr[i+starte]);
102       CeedChk(ierr);
103       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
104                                              &fullevecs[i+starte]);
105       CeedChk(ierr);
106     }
107 
108     switch(emode) {
109     case CEED_EVAL_NONE:
110       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
111       ierr = CeedVectorCreate(ceed, Q*size*blksize, &evecs[i]); CeedChk(ierr);
112       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
113       break;
114     case CEED_EVAL_INTERP:
115       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
116       ierr = CeedElemRestrictionGetElementSize(r, &P);
117       CeedChk(ierr);
118       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
119       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
120       break;
121     case CEED_EVAL_GRAD:
122       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
123       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
124       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
125       ierr = CeedElemRestrictionGetElementSize(r, &P);
126       CeedChk(ierr);
127       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
128       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
129       break;
130     case CEED_EVAL_WEIGHT: // Only on input fields
131       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
132       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
133       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
134                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]);
135       CeedChk(ierr);
136 
137       break;
138     case CEED_EVAL_DIV:
139       break; // Not implimented
140     case CEED_EVAL_CURL:
141       break; // Not implimented
142     }
143   }
144   return 0;
145 }
146 
147 /*
148   CeedOperator needs to connect all the named fields (be they active or passive)
149   to the named inputs and outputs of its CeedQFunction.
150  */
151 static int CeedOperatorSetup_Opt(CeedOperator op) {
152   int ierr;
153   bool setupdone;
154   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
155   if (setupdone) return 0;
156   Ceed ceed;
157   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
158   Ceed_Opt *ceedimpl;
159   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
160   const CeedInt blksize = ceedimpl->blksize;
161   CeedOperator_Opt *impl;
162   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
163   CeedQFunction qf;
164   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
165   CeedInt Q, numinputfields, numoutputfields;
166   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
167   ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr);
168   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
169   CeedChk(ierr);
170   CeedOperatorField *opinputfields, *opoutputfields;
171   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
172   CeedChk(ierr);
173   CeedQFunctionField *qfinputfields, *qfoutputfields;
174   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
175   CeedChk(ierr);
176 
177   // Allocate
178   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
179   CeedChk(ierr);
180   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
181   CeedChk(ierr);
182   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
183   CeedChk(ierr);
184 
185   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
186   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
187   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
188   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
189   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
190 
191   impl->numein = numinputfields; impl->numeout = numoutputfields;
192 
193   // Set up infield and outfield pointer arrays
194   // Infields
195   ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr,
196                                      impl->evecs, impl->evecsin,
197                                      impl->qvecsin, 0,
198                                      numinputfields, Q);
199   CeedChk(ierr);
200   // Outfields
201   ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr,
202                                      impl->evecs, impl->evecsout,
203                                      impl->qvecsout, numinputfields,
204                                      numoutputfields, Q);
205   CeedChk(ierr);
206 
207   // Identity QFunctions
208   if (impl->identityqf) {
209     CeedEvalMode inmode, outmode;
210     CeedQFunctionField *infields, *outfields;
211     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
212 
213     for (CeedInt i=0; i<numinputfields; i++) {
214       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
215       CeedChk(ierr);
216       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
217       CeedChk(ierr);
218 
219       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
220       impl->qvecsout[i] = impl->qvecsin[i];
221       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr);
222     }
223   }
224 
225   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
226 
227   return 0;
228 }
229 
230 // Setup Input fields
231 static inline int CeedOperatorSetupInputs_Opt(CeedInt numinputfields,
232     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
233     CeedVector invec, CeedOperator_Opt *impl, CeedRequest *request) {
234   CeedInt ierr;
235   CeedEvalMode emode;
236   CeedVector vec;
237   CeedTransposeMode lmode;
238   uint64_t state;
239 
240   for (CeedInt i=0; i<numinputfields; i++) {
241     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
242     CeedChk(ierr);
243     if (emode == CEED_EVAL_WEIGHT) { // Skip
244     } else {
245       // Get input vector
246       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
247       if (vec != CEED_VECTOR_ACTIVE) {
248         // Restrict
249         ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
250         if (state != impl->inputstate[i]) {
251           ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
252           CeedChk(ierr);
253           ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
254                                           lmode, vec, impl->evecs[i], request);
255           CeedChk(ierr);
256           impl->inputstate[i] = state;
257         }
258       } else {
259         // Set Qvec for CEED_EVAL_NONE
260         if (emode == CEED_EVAL_NONE) {
261           ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST,
262                                     &impl->edata[i]); CeedChk(ierr);
263           ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
264                                     CEED_USE_POINTER,
265                                     impl->edata[i]); CeedChk(ierr);
266           ierr = CeedVectorRestoreArray(impl->evecsin[i],
267                                         &impl->edata[i]); CeedChk(ierr);
268         }
269       }
270       // Get evec
271       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
272                                     (const CeedScalar **) &impl->edata[i]);
273       CeedChk(ierr);
274     }
275   }
276   return 0;
277 }
278 
279 // Input basis action
280 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
281     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
282     CeedInt numinputfields, CeedInt blksize, CeedVector invec, bool skipactive,
283     CeedOperator_Opt *impl, CeedRequest *request) {
284   CeedInt ierr;
285   CeedInt dim, elemsize, size;
286   CeedElemRestriction Erestrict;
287   CeedEvalMode emode;
288   CeedBasis basis;
289   CeedVector vec;
290   CeedTransposeMode lmode;
291 
292   for (CeedInt i=0; i<numinputfields; i++) {
293     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
294     // Skip active input
295     if (skipactive) {
296       if (vec == CEED_VECTOR_ACTIVE)
297         continue;
298     }
299 
300     CeedInt activein = 0;
301     // Get elemsize, emode, size
302     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
303     CeedChk(ierr);
304     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
305     CeedChk(ierr);
306     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
307     CeedChk(ierr);
308     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
309     // Restrict block active input
310     if (vec == CEED_VECTOR_ACTIVE) {
311       ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
312       CeedChk(ierr);
313       ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize,
314                                            CEED_NOTRANSPOSE, lmode, invec,
315                                            impl->evecsin[i], request);
316       CeedChk(ierr);
317       activein = 1;
318     }
319     // Basis action
320     switch(emode) {
321     case CEED_EVAL_NONE:
322       if (!activein) {
323         ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
324                                   CEED_USE_POINTER,
325                                   &impl->edata[i][e*Q*size]); CeedChk(ierr);
326       }
327       break;
328     case CEED_EVAL_INTERP:
329       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
330       CeedChk(ierr);
331       if (!activein) {
332         ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
333                                   CEED_USE_POINTER,
334                                   &impl->edata[i][e*elemsize*size]);
335         CeedChk(ierr);
336       }
337       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
338                             CEED_EVAL_INTERP, impl->evecsin[i],
339                             impl->qvecsin[i]); CeedChk(ierr);
340       break;
341     case CEED_EVAL_GRAD:
342       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
343       CeedChk(ierr);
344       if (!activein) {
345         ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
346         ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
347                                   CEED_USE_POINTER,
348                                   &impl->edata[i][e*elemsize*size/dim]);
349         CeedChk(ierr);
350       }
351       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
352                             CEED_EVAL_GRAD, impl->evecsin[i],
353                             impl->qvecsin[i]); CeedChk(ierr);
354       break;
355     case CEED_EVAL_WEIGHT:
356       break;  // No action
357     case CEED_EVAL_DIV:
358     case CEED_EVAL_CURL: {
359       // LCOV_EXCL_START
360       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
361       CeedChk(ierr);
362       Ceed ceed;
363       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
364       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
365       // LCOV_EXCL_STOP
366       break; // Not implemented
367     }
368     }
369   }
370   return 0;
371 }
372 
373 // Output basis action
374 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
375     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
376     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
377     CeedOperator op, CeedVector outvec, CeedOperator_Opt *impl,
378     CeedRequest *request) {
379   CeedInt ierr;
380   CeedElemRestriction Erestrict;
381   CeedEvalMode emode;
382   CeedBasis basis;
383   CeedVector vec;
384   CeedTransposeMode lmode;
385 
386   for (CeedInt i=0; i<numoutputfields; i++) {
387     // Get elemsize, emode, size
388     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
389     CeedChk(ierr);
390     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
391     CeedChk(ierr);
392     // Basis action
393     switch(emode) {
394     case CEED_EVAL_NONE:
395       break; // No action
396     case CEED_EVAL_INTERP:
397       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
398       CeedChk(ierr);
399       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
400                             CEED_EVAL_INTERP, impl->qvecsout[i],
401                             impl->evecsout[i]); CeedChk(ierr);
402       break;
403     case CEED_EVAL_GRAD:
404       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
405       CeedChk(ierr);
406       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
407                             CEED_EVAL_GRAD, impl->qvecsout[i],
408                             impl->evecsout[i]); CeedChk(ierr);
409       break;
410     case CEED_EVAL_WEIGHT: {
411       // LCOV_EXCL_START
412       Ceed ceed;
413       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
414       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
415                        "evaluation mode");
416       // LCOV_EXCL_STOP
417       break; // Should not occur
418     }
419     case CEED_EVAL_DIV:
420     case CEED_EVAL_CURL: {
421       // LCOV_EXCL_START
422       Ceed ceed;
423       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
424       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
425       // LCOV_EXCL_STOP
426       break; // Not implemented
427     }
428     }
429     // Restrict output block
430     // Get output vector
431     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
432     if (vec == CEED_VECTOR_ACTIVE)
433       vec = outvec;
434     // Restrict
435     ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode);
436     CeedChk(ierr);
437     ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein],
438                                          e/blksize, CEED_TRANSPOSE,
439                                          lmode, impl->evecsout[i],
440                                          vec, request); CeedChk(ierr);
441   }
442   return 0;
443 }
444 
445 // Restore Inputs
446 static inline int CeedOperatorRestoreInputs_Opt(CeedInt numinputfields,
447     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
448     bool skipactive, CeedOperator_Opt *impl) {
449   CeedInt ierr;
450   CeedEvalMode emode;
451 
452   for (CeedInt i=0; i<numinputfields; i++) {
453     // Skip active inputs
454     if (skipactive) {
455       CeedVector vec;
456       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
457       if (vec == CEED_VECTOR_ACTIVE)
458         continue;
459     }
460     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
461     CeedChk(ierr);
462     if (emode == CEED_EVAL_WEIGHT) { // Skip
463     } else {
464       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
465                                         (const CeedScalar **) &impl->edata[i]);
466       CeedChk(ierr);
467     }
468   }
469   return 0;
470 }
471 
472 // Apply Ceed Operator
473 static int CeedOperatorApply_Opt(CeedOperator op, CeedVector invec,
474                                  CeedVector outvec, CeedRequest *request) {
475   int ierr;
476   Ceed ceed;
477   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
478   Ceed_Opt *ceedimpl;
479   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
480   CeedInt blksize = ceedimpl->blksize;
481   CeedOperator_Opt *impl;
482   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
483   CeedInt Q, numinputfields, numoutputfields, numelements;
484   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
485   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
486   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
487   CeedQFunction qf;
488   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
489   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
490   CeedChk(ierr);
491   CeedOperatorField *opinputfields, *opoutputfields;
492   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
493   CeedChk(ierr);
494   CeedQFunctionField *qfinputfields, *qfoutputfields;
495   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
496   CeedChk(ierr);
497   CeedEvalMode emode;
498   CeedVector vec;
499 
500   // Setup
501   ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr);
502 
503   // Input Evecs and Restriction
504   ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields,
505                                      opinputfields, invec, impl, request);
506   CeedChk(ierr);
507 
508   // Output Lvecs, Evecs, and Qvecs
509   for (CeedInt i=0; i<numoutputfields; i++) {
510     // Zero Lvecs
511     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
512     if (vec == CEED_VECTOR_ACTIVE) {
513       if (!impl->add) {
514         vec = outvec;
515         ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
516       }
517     } else {
518       ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
519     }
520     // Set Qvec if needed
521     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
522     CeedChk(ierr);
523     if (emode == CEED_EVAL_NONE) {
524       // Set qvec to single block evec
525       ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST,
526                                 &impl->edata[i + numinputfields]);
527       CeedChk(ierr);
528       ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
529                                 CEED_USE_POINTER,
530                                 impl->edata[i + numinputfields]); CeedChk(ierr);
531       ierr = CeedVectorRestoreArray(impl->evecsout[i],
532                                     &impl->edata[i + numinputfields]);
533       CeedChk(ierr);
534     }
535   }
536   impl->add = false;
537 
538   // Loop through elements
539   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
540     // Input basis apply
541     ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields,
542                                       numinputfields, blksize, invec, false,
543                                       impl, request); CeedChk(ierr);
544 
545     // Q function
546     if (!impl->identityqf) {
547       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
548       CeedChk(ierr);
549     }
550 
551     // Output basis apply and restrict
552     ierr = CeedOperatorOutputBasis_Opt(e, Q, qfoutputfields, opoutputfields,
553                                        blksize, numinputfields, numoutputfields,
554                                        op, outvec, impl, request);
555     CeedChk(ierr);
556   }
557 
558   // Restore input arrays
559   ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields,
560                                        opinputfields, false, impl);
561   CeedChk(ierr);
562 
563   return 0;
564 }
565 
566 // Assemble Linear QFunction
567 static int CeedOperatorAssembleLinearQFunction_Opt(CeedOperator op,
568     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
569   int ierr;
570   Ceed ceed;
571   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
572   Ceed_Opt *ceedimpl;
573   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
574   const CeedInt blksize = ceedimpl->blksize;
575   CeedOperator_Opt *impl;
576   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
577   CeedInt Q, numinputfields, numoutputfields, numelements, size;
578   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
579   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
580   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
581   CeedQFunction qf;
582   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
583   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
584   CeedChk(ierr);
585   CeedOperatorField *opinputfields, *opoutputfields;
586   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
587   CeedChk(ierr);
588   CeedQFunctionField *qfinputfields, *qfoutputfields;
589   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
590   CeedChk(ierr);
591   CeedVector vec, lvec;
592   CeedInt numactivein = 0, numactiveout = 0;
593   CeedVector *activein = NULL;
594   CeedScalar *a, *tmp;
595 
596   // Setup
597   ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr);
598 
599   // Check for identity
600   if (impl->identityqf)
601     // LCOV_EXCL_START
602     return CeedError(ceed, 1, "Assembling identity qfunctions not supported");
603   // LCOV_EXCL_STOP
604 
605   // Input Evecs and Restriction
606   ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields,
607                                      opinputfields, NULL, impl, request);
608   CeedChk(ierr);
609 
610   // Count number of active input fields
611   for (CeedInt i=0; i<numinputfields; i++) {
612     // Get input vector
613     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
614     // Check if active input
615     if (vec == CEED_VECTOR_ACTIVE) {
616       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
617       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
618       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
619       CeedChk(ierr);
620       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
621       for (CeedInt field=0; field<size; field++) {
622         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
623         CeedChk(ierr);
624         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
625                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
626         CeedChk(ierr);
627       }
628       numactivein += size;
629       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
630     }
631   }
632 
633   // Count number of active output fields
634   for (CeedInt i=0; i<numoutputfields; i++) {
635     // Get output vector
636     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
637     // Check if active output
638     if (vec == CEED_VECTOR_ACTIVE) {
639       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
640       numactiveout += size;
641     }
642   }
643 
644   // Check sizes
645   if (!numactivein || !numactiveout)
646     // LCOV_EXCL_START
647     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
648                      "and outputs");
649   // LCOV_EXCL_STOP
650 
651   // Setup lvec
652   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
653                           &lvec); CeedChk(ierr);
654   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
655 
656   // Create output restriction
657   ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q,
658          numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr);
659   // Create assembled vector
660   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
661                           assembled); CeedChk(ierr);
662 
663   // Loop through elements
664   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
665     // Input basis apply
666     ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields,
667                                       numinputfields, blksize, NULL, true,
668                                       impl, request); CeedChk(ierr);
669 
670     // Assemble QFunction
671     for (CeedInt in=0; in<numactivein; in++) {
672       // Set Inputs
673       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
674       if (numactivein > 1) {
675         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
676                                   0.0); CeedChk(ierr);
677       }
678       // Set Outputs
679       for (CeedInt out=0; out<numoutputfields; out++) {
680         // Get output vector
681         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
682         CeedChk(ierr);
683         // Check if active output
684         if (vec == CEED_VECTOR_ACTIVE) {
685           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
686                              CEED_USE_POINTER, a); CeedChk(ierr);
687           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
688           CeedChk(ierr);
689           a += size*Q*blksize; // Advance the pointer by the size of the output
690         }
691       }
692       // Apply QFunction
693       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
694       CeedChk(ierr);
695     }
696   }
697 
698   // Un-set output Qvecs to prevent accidental overwrite of Assembled
699   for (CeedInt out=0; out<numoutputfields; out++) {
700     // Get output vector
701     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
702     CeedChk(ierr);
703     // Check if active output
704     if (vec == CEED_VECTOR_ACTIVE) {
705       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
706                          NULL); CeedChk(ierr);
707     }
708   }
709 
710   // Restore input arrays
711   ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields,
712                                        opinputfields, true, impl);
713   CeedChk(ierr);
714 
715   // Output blocked restriction
716   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
717   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
718   CeedElemRestriction blkrstr;
719   ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize,
720                                           numelements*Q,
721                                           numactivein*numactiveout,
722                                           CEED_MEM_HOST, CEED_COPY_VALUES,
723                                           NULL, &blkrstr); CeedChk(ierr);
724   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE,
725                                   lvec, *assembled, request); CeedChk(ierr);
726 
727   // Cleanup
728   for (CeedInt i=0; i<numactivein; i++) {
729     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
730   }
731   ierr = CeedFree(&activein); CeedChk(ierr);
732   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
733   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
734 
735   return 0;
736 }
737 
738 int CeedOperatorCreate_Opt(CeedOperator op) {
739   int ierr;
740   Ceed ceed;
741   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
742   Ceed_Opt *ceedimpl;
743   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
744   CeedInt blksize = ceedimpl->blksize;
745   CeedOperator_Opt *impl;
746 
747   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
748   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
749 
750   if (blksize == 1 || blksize == 8) {
751     ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
752                                   CeedOperatorAssembleLinearQFunction_Opt);
753     CeedChk(ierr);
754     ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply",
755                                   CeedOperatorApply_Opt); CeedChk(ierr);
756   } else {
757     // LCOV_EXCL_START
758     return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize);
759     // LCOV_EXCL_STOP
760   }
761 
762   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
763                                 CeedOperatorDestroy_Opt); CeedChk(ierr);
764   return 0;
765 }
766