xref: /libCEED/backends/opt/ceed-opt-operator.c (revision 55e4cc5bfc35b2c27e98784af386df9728efb8f2) !
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <string.h>
18 #include "ceed-opt.h"
19 #include "../ref/ceed-ref.h"
20 
21 static int CeedOperatorDestroy_Opt(CeedOperator op) {
22   int ierr;
23   CeedOperator_Opt *impl;
24   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
25 
26   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
27     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
28     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
29   }
30   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
31   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
32   ierr = CeedFree(&impl->edata); CeedChk(ierr);
33   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
34 
35   for (CeedInt i=0; i<impl->numein; i++) {
36     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
37     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
38   }
39   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
40   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
41 
42   for (CeedInt i=0; i<impl->numeout; i++) {
43     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
44     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
45   }
46   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
47   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
48 
49   ierr = CeedFree(&impl); CeedChk(ierr);
50   return 0;
51 }
52 
53 /*
54   Setup infields or outfields
55  */
56 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
57                                        bool inOrOut, const CeedInt blksize,
58                                        CeedElemRestriction *blkrestr,
59                                        CeedVector *fullevecs, CeedVector *evecs,
60                                        CeedVector *qvecs, CeedInt starte,
61                                        CeedInt numfields, CeedInt Q) {
62   CeedInt dim, ierr, ncomp, size, P;
63   Ceed ceed;
64   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
65   CeedBasis basis;
66   CeedElemRestriction r;
67   CeedOperatorField *opfields;
68   CeedQFunctionField *qffields;
69   if (inOrOut) {
70     ierr = CeedOperatorGetFields(op, NULL, &opfields);
71     CeedChk(ierr);
72     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
73     CeedChk(ierr);
74   } else {
75     ierr = CeedOperatorGetFields(op, &opfields, NULL);
76     CeedChk(ierr);
77     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
78     CeedChk(ierr);
79   }
80 
81   // Loop over fields
82   for (CeedInt i=0; i<numfields; i++) {
83     CeedEvalMode emode;
84     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
85 
86     if (emode != CEED_EVAL_WEIGHT) {
87       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
88       CeedChk(ierr);
89       CeedElemRestriction_Ref *data;
90       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
91       Ceed ceed;
92       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
93       CeedInt nelem, elemsize, nnodes;
94       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
95       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
96       ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
97       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
98       ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
99                                               blksize, nnodes, ncomp,
100                                               CEED_MEM_HOST, CEED_COPY_VALUES,
101                                               data->indices, &blkrestr[i+starte]);
102       CeedChk(ierr);
103       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
104                                              &fullevecs[i+starte]);
105       CeedChk(ierr);
106     }
107 
108     switch(emode) {
109     case CEED_EVAL_NONE:
110       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
111       ierr = CeedVectorCreate(ceed, Q*size*blksize, &evecs[i]); CeedChk(ierr);
112       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
113       break;
114     case CEED_EVAL_INTERP:
115       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
116       ierr = CeedElemRestrictionGetElementSize(r, &P);
117       CeedChk(ierr);
118       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
119       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
120       break;
121     case CEED_EVAL_GRAD:
122       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
123       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
124       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
125       ierr = CeedElemRestrictionGetElementSize(r, &P);
126       CeedChk(ierr);
127       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
128       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
129       break;
130     case CEED_EVAL_WEIGHT: // Only on input fields
131       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
132       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
133       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
134                             CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChk(ierr);
135 
136       break;
137     case CEED_EVAL_DIV:
138       break; // Not implimented
139     case CEED_EVAL_CURL:
140       break; // Not implimented
141     }
142   }
143   return 0;
144 }
145 
146 /*
147   CeedOperator needs to connect all the named fields (be they active or passive)
148   to the named inputs and outputs of its CeedQFunction.
149  */
150 static int CeedOperatorSetup_Opt(CeedOperator op) {
151   int ierr;
152   bool setupdone;
153   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
154   if (setupdone) return 0;
155   Ceed ceed;
156   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
157   Ceed_Opt *ceedimpl;
158   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
159   const CeedInt blksize = ceedimpl->blksize;
160   CeedOperator_Opt *impl;
161   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
162   CeedQFunction qf;
163   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
164   CeedInt Q, numinputfields, numoutputfields;
165   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
166   ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr);
167   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
168   CeedChk(ierr);
169   CeedOperatorField *opinputfields, *opoutputfields;
170   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
171   CeedChk(ierr);
172   CeedQFunctionField *qfinputfields, *qfoutputfields;
173   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
174   CeedChk(ierr);
175 
176   // Allocate
177   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
178   CeedChk(ierr);
179   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
180   CeedChk(ierr);
181   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
182   CeedChk(ierr);
183 
184   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
185   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
186   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
187   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
188   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
189 
190   impl->numein = numinputfields; impl->numeout = numoutputfields;
191 
192   // Set up infield and outfield pointer arrays
193   // Infields
194   ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr,
195                                      impl->evecs, impl->evecsin,
196                                      impl->qvecsin, 0,
197                                      numinputfields, Q);
198   CeedChk(ierr);
199   // Outfields
200   ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr,
201                                      impl->evecs, impl->evecsout,
202                                      impl->qvecsout, numinputfields,
203                                      numoutputfields, Q);
204   CeedChk(ierr);
205 
206   // Identity QFunctions
207   if (impl->identityqf) {
208     CeedEvalMode inmode, outmode;
209     CeedQFunctionField *infields, *outfields;
210     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
211 
212     for (CeedInt i=0; i<numinputfields; i++) {
213       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
214       CeedChk(ierr);
215       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
216       CeedChk(ierr);
217 
218       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
219       impl->qvecsout[i] = impl->qvecsin[i];
220       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr);
221     }
222   }
223 
224   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
225 
226   return 0;
227 }
228 
229 // Setup Input fields
230 static inline int CeedOperatorSetupInputs_Opt(CeedInt numinputfields,
231     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
232     CeedVector invec, CeedOperator_Opt *impl, CeedRequest *request) {
233   CeedInt ierr;
234   CeedEvalMode emode;
235   CeedVector vec;
236   CeedTransposeMode lmode;
237   uint64_t state;
238 
239   for (CeedInt i=0; i<numinputfields; i++) {
240     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
241     CeedChk(ierr);
242     if (emode == CEED_EVAL_WEIGHT) { // Skip
243     } else {
244       // Get input vector
245       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
246       if (vec != CEED_VECTOR_ACTIVE) {
247         // Restrict
248         ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
249         if (state != impl->inputstate[i]) {
250           ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
251           CeedChk(ierr);
252           ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
253                                           lmode, vec, impl->evecs[i], request);
254           CeedChk(ierr);
255           impl->inputstate[i] = state;
256         }
257       } else {
258         // Set Qvec for CEED_EVAL_NONE
259         if (emode == CEED_EVAL_NONE) {
260           ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST,
261                                     &impl->edata[i]); CeedChk(ierr);
262           ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
263                                     CEED_USE_POINTER,
264                                     impl->edata[i]); CeedChk(ierr);
265           ierr = CeedVectorRestoreArray(impl->evecsin[i],
266                                         &impl->edata[i]); CeedChk(ierr);
267         }
268       }
269       // Get evec
270       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
271                                     (const CeedScalar **) &impl->edata[i]);
272       CeedChk(ierr);
273     }
274   }
275   return 0;
276 }
277 
278 // Input basis action
279 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
280     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
281     CeedInt numinputfields, CeedInt blksize, CeedVector invec, bool skipactive,
282     CeedOperator_Opt *impl, CeedRequest *request) {
283   CeedInt ierr;
284   CeedInt dim, elemsize, size;
285   CeedElemRestriction Erestrict;
286   CeedEvalMode emode;
287   CeedBasis basis;
288   CeedVector vec;
289   CeedTransposeMode lmode;
290 
291   for (CeedInt i=0; i<numinputfields; i++) {
292     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
293     // Skip active input
294     if (skipactive) {
295       if (vec == CEED_VECTOR_ACTIVE)
296         continue;
297     }
298 
299     CeedInt activein = 0;
300     // Get elemsize, emode, size
301     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
302     CeedChk(ierr);
303     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
304     CeedChk(ierr);
305     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
306     CeedChk(ierr);
307     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
308     // Restrict block active input
309     if (vec == CEED_VECTOR_ACTIVE) {
310       ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
311       CeedChk(ierr);
312       ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize,
313                                            CEED_NOTRANSPOSE, lmode, invec,
314                                            impl->evecsin[i], request);
315       CeedChk(ierr);
316       activein = 1;
317     }
318     // Basis action
319     switch(emode) {
320     case CEED_EVAL_NONE:
321       if (!activein) {
322         ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
323                                   CEED_USE_POINTER,
324                                   &impl->edata[i][e*Q*size]); CeedChk(ierr);
325       }
326       break;
327     case CEED_EVAL_INTERP:
328       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
329       CeedChk(ierr);
330       if (!activein) {
331         ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
332                                   CEED_USE_POINTER,
333                                   &impl->edata[i][e*elemsize*size]);
334         CeedChk(ierr);
335       }
336       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
337                             CEED_EVAL_INTERP, impl->evecsin[i],
338                             impl->qvecsin[i]); CeedChk(ierr);
339       break;
340     case CEED_EVAL_GRAD:
341       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
342       CeedChk(ierr);
343       if (!activein) {
344         ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
345         ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
346                                   CEED_USE_POINTER,
347                                   &impl->edata[i][e*elemsize*size/dim]);
348         CeedChk(ierr);
349       }
350       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
351                             CEED_EVAL_GRAD, impl->evecsin[i],
352                             impl->qvecsin[i]); CeedChk(ierr);
353       break;
354     case CEED_EVAL_WEIGHT:
355       break;  // No action
356     case CEED_EVAL_DIV:
357     case CEED_EVAL_CURL: {
358       // LCOV_EXCL_START
359       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
360       CeedChk(ierr);
361       Ceed ceed;
362       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
363       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
364       // LCOV_EXCL_STOP
365       break; // Not implemented
366     }
367     }
368   }
369   return 0;
370 }
371 
372 // Output basis action
373 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
374     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
375     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
376     CeedOperator op, CeedVector outvec, CeedOperator_Opt *impl,
377     CeedRequest *request) {
378   CeedInt ierr;
379   CeedElemRestriction Erestrict;
380   CeedEvalMode emode;
381   CeedBasis basis;
382   CeedVector vec;
383   CeedTransposeMode lmode;
384 
385   for (CeedInt i=0; i<numoutputfields; i++) {
386     // Get elemsize, emode, size
387     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
388     CeedChk(ierr);
389     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
390     CeedChk(ierr);
391     // Basis action
392     switch(emode) {
393     case CEED_EVAL_NONE:
394       break; // No action
395     case CEED_EVAL_INTERP:
396       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
397       CeedChk(ierr);
398       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
399                             CEED_EVAL_INTERP, impl->qvecsout[i],
400                             impl->evecsout[i]); CeedChk(ierr);
401       break;
402     case CEED_EVAL_GRAD:
403       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
404       CeedChk(ierr);
405       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
406                             CEED_EVAL_GRAD, impl->qvecsout[i],
407                             impl->evecsout[i]); CeedChk(ierr);
408       break;
409     case CEED_EVAL_WEIGHT: {
410       // LCOV_EXCL_START
411       Ceed ceed;
412       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
413       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
414                        "evaluation mode");
415       // LCOV_EXCL_STOP
416       break; // Should not occur
417     }
418     case CEED_EVAL_DIV:
419     case CEED_EVAL_CURL: {
420       // LCOV_EXCL_START
421       Ceed ceed;
422       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
423       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
424       // LCOV_EXCL_STOP
425       break; // Not implemented
426     }
427     }
428     // Restrict output block
429     // Get output vector
430     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
431     if (vec == CEED_VECTOR_ACTIVE)
432       vec = outvec;
433     // Restrict
434     ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode);
435     CeedChk(ierr);
436     ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein],
437                                          e/blksize, CEED_TRANSPOSE,
438                                          lmode, impl->evecsout[i],
439                                          vec, request); CeedChk(ierr);
440   }
441   return 0;
442 }
443 
444 // Restore Inputs
445 static inline int CeedOperatorRestoreInputs_Opt(CeedInt numinputfields,
446     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
447     bool skipactive, CeedOperator_Opt *impl) {
448   CeedInt ierr;
449   CeedEvalMode emode;
450 
451   for (CeedInt i=0; i<numinputfields; i++) {
452     // Skip active inputs
453     if (skipactive) {
454       CeedVector vec;
455       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
456       if (vec == CEED_VECTOR_ACTIVE)
457         continue;
458     }
459     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
460     CeedChk(ierr);
461     if (emode == CEED_EVAL_WEIGHT) { // Skip
462     } else {
463       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
464                                         (const CeedScalar **) &impl->edata[i]);
465       CeedChk(ierr);
466     }
467   }
468   return 0;
469 }
470 
471 // Apply Ceed Operator
472 static int CeedOperatorApply_Opt(CeedOperator op, CeedVector invec,
473                                  CeedVector outvec, CeedRequest *request) {
474   int ierr;
475   Ceed ceed;
476   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
477   Ceed_Opt *ceedimpl;
478   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
479   CeedInt blksize = ceedimpl->blksize;
480   CeedOperator_Opt *impl;
481   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
482   CeedInt Q, numinputfields, numoutputfields, numelements;
483   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
484   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
485   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
486   CeedQFunction qf;
487   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
488   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
489   CeedChk(ierr);
490   CeedOperatorField *opinputfields, *opoutputfields;
491   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
492   CeedChk(ierr);
493   CeedQFunctionField *qfinputfields, *qfoutputfields;
494   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
495   CeedChk(ierr);
496   CeedEvalMode emode;
497   CeedVector vec;
498 
499   // Setup
500   ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr);
501 
502   // Input Evecs and Restriction
503   ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields,
504                                      opinputfields, invec, impl, request);
505   CeedChk(ierr);
506 
507   // Output Lvecs, Evecs, and Qvecs
508   for (CeedInt i=0; i<numoutputfields; i++) {
509     // Zero Lvecs
510     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
511     if (vec == CEED_VECTOR_ACTIVE) {
512       if (!impl->add) {
513         vec = outvec;
514         ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
515       }
516     } else {
517       ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
518     }
519     // Set Qvec if needed
520     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
521     CeedChk(ierr);
522     if (emode == CEED_EVAL_NONE) {
523       // Set qvec to single block evec
524       ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST,
525                                 &impl->edata[i + numinputfields]);
526       CeedChk(ierr);
527       ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
528                                 CEED_USE_POINTER,
529                                 impl->edata[i + numinputfields]); CeedChk(ierr);
530       ierr = CeedVectorRestoreArray(impl->evecsout[i],
531                                     &impl->edata[i + numinputfields]);
532       CeedChk(ierr);
533     }
534   }
535   impl->add = false;
536 
537   // Loop through elements
538   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
539     // Input basis apply
540     ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields,
541                                       numinputfields, blksize, invec, false,
542                                       impl, request); CeedChk(ierr);
543 
544     // Q function
545     if (!impl->identityqf) {
546       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
547       CeedChk(ierr);
548     }
549 
550     // Output basis apply and restrict
551     ierr = CeedOperatorOutputBasis_Opt(e, Q, qfoutputfields, opoutputfields,
552                                        blksize, numinputfields, numoutputfields,
553                                        op, outvec, impl, request);
554     CeedChk(ierr);
555   }
556 
557   // Restore input arrays
558   ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields,
559                                        opinputfields, false, impl);
560   CeedChk(ierr);
561 
562   return 0;
563 }
564 
565 // Assemble Linear QFunction
566 static int CeedOperatorAssembleLinearQFunction_Opt(CeedOperator op,
567     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
568   int ierr;
569   Ceed ceed;
570   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
571   Ceed_Opt *ceedimpl;
572   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
573   const CeedInt blksize = ceedimpl->blksize;
574   CeedOperator_Opt *impl;
575   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
576   CeedInt Q, numinputfields, numoutputfields, numelements, size;
577   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
578   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
579   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
580   CeedQFunction qf;
581   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
582   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
583   CeedChk(ierr);
584   CeedOperatorField *opinputfields, *opoutputfields;
585   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
586   CeedChk(ierr);
587   CeedQFunctionField *qfinputfields, *qfoutputfields;
588   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
589   CeedChk(ierr);
590   CeedVector vec, lvec;
591   CeedInt numactivein = 0, numactiveout = 0;
592   CeedVector *activein = NULL;
593   CeedScalar *a, *tmp;
594 
595   // Setup
596   ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr);
597 
598   // Check for identity
599   if (impl->identityqf)
600     // LCOV_EXCL_START
601     return CeedError(ceed, 1, "Assembling identity qfunctions not supported");
602   // LCOV_EXCL_STOP
603 
604   // Input Evecs and Restriction
605   ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields,
606                                      opinputfields, NULL, impl, request);
607   CeedChk(ierr);
608 
609   // Count number of active input fields
610   for (CeedInt i=0; i<numinputfields; i++) {
611     // Get input vector
612     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
613     // Check if active input
614     if (vec == CEED_VECTOR_ACTIVE) {
615       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
616       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
617       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
618       CeedChk(ierr);
619       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
620       for (CeedInt field=0; field<size; field++) {
621         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
622         CeedChk(ierr);
623         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
624                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
625         CeedChk(ierr);
626       }
627       numactivein += size;
628       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
629     }
630   }
631 
632   // Count number of active output fields
633   for (CeedInt i=0; i<numoutputfields; i++) {
634     // Get output vector
635     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
636     // Check if active output
637     if (vec == CEED_VECTOR_ACTIVE) {
638       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
639       numactiveout += size;
640     }
641   }
642 
643   // Check sizes
644   if (!numactivein || !numactiveout)
645     // LCOV_EXCL_START
646     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
647                      "and outputs");
648   // LCOV_EXCL_STOP
649 
650   // Setup lvec
651   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
652                           &lvec); CeedChk(ierr);
653   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
654 
655   // Create output restriction
656   ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q,
657          numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr);
658   // Create assembled vector
659   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
660                           assembled); CeedChk(ierr);
661 
662   // Loop through elements
663   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
664     // Input basis apply
665     ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields,
666                                       numinputfields, blksize, NULL, true,
667                                       impl, request); CeedChk(ierr);
668 
669     // Assemble QFunction
670     for (CeedInt in=0; in<numactivein; in++) {
671       // Set Inputs
672       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
673       if (numactivein > 1) {
674         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
675                                   0.0); CeedChk(ierr);
676       }
677       // Set Outputs
678       for (CeedInt out=0; out<numoutputfields; out++) {
679         // Get output vector
680         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
681         CeedChk(ierr);
682         // Check if active output
683         if (vec == CEED_VECTOR_ACTIVE) {
684           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
685                              CEED_USE_POINTER, a); CeedChk(ierr);
686           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
687           CeedChk(ierr);
688           a += size*Q*blksize; // Advance the pointer by the size of the output
689         }
690       }
691       // Apply QFunction
692       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
693       CeedChk(ierr);
694     }
695   }
696 
697   // Un-set output Qvecs to prevent accidental overwrite of Assembled
698   for (CeedInt out=0; out<numoutputfields; out++) {
699     // Get output vector
700     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
701     CeedChk(ierr);
702     // Check if active output
703     if (vec == CEED_VECTOR_ACTIVE) {
704       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
705                          NULL); CeedChk(ierr);
706     }
707   }
708 
709   // Restore input arrays
710   ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields,
711                                        opinputfields, true, impl);
712   CeedChk(ierr);
713 
714   // Output blocked restriction
715   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
716   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
717   CeedElemRestriction blkrstr;
718   ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize,
719                                           numelements*Q,
720                                           numactivein*numactiveout,
721                                           CEED_MEM_HOST, CEED_COPY_VALUES,
722                                           NULL, &blkrstr); CeedChk(ierr);
723   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE,
724                                   lvec, *assembled, request); CeedChk(ierr);
725 
726   // Cleanup
727   for (CeedInt i=0; i<numactivein; i++) {
728     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
729   }
730   ierr = CeedFree(&activein); CeedChk(ierr);
731   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
732   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
733 
734   return 0;
735 }
736 
737 int CeedOperatorCreate_Opt(CeedOperator op) {
738   int ierr;
739   Ceed ceed;
740   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
741   Ceed_Opt *ceedimpl;
742   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
743   CeedInt blksize = ceedimpl->blksize;
744   CeedOperator_Opt *impl;
745 
746   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
747   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
748 
749   if (blksize == 1 || blksize == 8) {
750     ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
751                                   CeedOperatorAssembleLinearQFunction_Opt);
752     CeedChk(ierr);
753     ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply",
754                                   CeedOperatorApply_Opt); CeedChk(ierr);
755   } else {
756     // LCOV_EXCL_START
757     return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize);
758     // LCOV_EXCL_STOP
759   }
760 
761   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
762                                 CeedOperatorDestroy_Opt); CeedChk(ierr);
763   return 0;
764 }
765