xref: /libCEED/backends/opt/ceed-opt-operator.c (revision a8d322087fa8f150327cdc2bf14a171452b711ec)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <string.h>
18 #include "ceed-opt.h"
19 #include "../ref/ceed-ref.h"
20 
21 //------------------------------------------------------------------------------
22 // Setup Input/Output Fields
23 //------------------------------------------------------------------------------
24 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
25                                        bool inOrOut, const CeedInt blksize,
26                                        CeedElemRestriction *blkrestr,
27                                        CeedVector *fullevecs, CeedVector *evecs,
28                                        CeedVector *qvecs, CeedInt starte,
29                                        CeedInt numfields, CeedInt Q) {
30   CeedInt dim, ierr, ncomp, size, P;
31   Ceed ceed;
32   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
33   CeedBasis basis;
34   CeedElemRestriction r;
35   CeedOperatorField *opfields;
36   CeedQFunctionField *qffields;
37   if (inOrOut) {
38     ierr = CeedOperatorGetFields(op, NULL, &opfields);
39     CeedChk(ierr);
40     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
41     CeedChk(ierr);
42   } else {
43     ierr = CeedOperatorGetFields(op, &opfields, NULL);
44     CeedChk(ierr);
45     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
46     CeedChk(ierr);
47   }
48 
49   // Loop over fields
50   for (CeedInt i=0; i<numfields; i++) {
51     CeedEvalMode emode;
52     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
53 
54     if (emode != CEED_EVAL_WEIGHT) {
55       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
56       CeedChk(ierr);
57       CeedElemRestriction_Ref *data;
58       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
59       Ceed ceed;
60       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
61       CeedInt nelem, elemsize, nnodes;
62       CeedTransposeMode lmode;
63       ierr = CeedElemRestrictionGetLMode(r, &lmode); CeedChk(ierr);
64       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
65       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
66       ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
67       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
68       ierr = CeedElemRestrictionCreateBlocked(ceed, lmode, nelem, elemsize,
69                                               blksize, nnodes, ncomp,
70                                               CEED_MEM_HOST, CEED_COPY_VALUES,
71                                               data->indices, &blkrestr[i+starte]);
72       CeedChk(ierr);
73       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
74                                              &fullevecs[i+starte]);
75       CeedChk(ierr);
76     }
77 
78     switch(emode) {
79     case CEED_EVAL_NONE:
80       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
81       ierr = CeedVectorCreate(ceed, Q*size*blksize, &evecs[i]); CeedChk(ierr);
82       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
83       break;
84     case CEED_EVAL_INTERP:
85       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
86       ierr = CeedElemRestrictionGetElementSize(r, &P);
87       CeedChk(ierr);
88       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
89       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
90       break;
91     case CEED_EVAL_GRAD:
92       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
93       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
94       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
95       ierr = CeedElemRestrictionGetElementSize(r, &P);
96       CeedChk(ierr);
97       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
98       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
99       break;
100     case CEED_EVAL_WEIGHT: // Only on input fields
101       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
102       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
103       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
104                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]);
105       CeedChk(ierr);
106 
107       break;
108     case CEED_EVAL_DIV:
109       break; // Not implimented
110     case CEED_EVAL_CURL:
111       break; // Not implimented
112     }
113   }
114   return 0;
115 }
116 
117 //------------------------------------------------------------------------------
118 // Setup Operator
119 //------------------------------------------------------------------------------
120 static int CeedOperatorSetup_Opt(CeedOperator op) {
121   int ierr;
122   bool setupdone;
123   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
124   if (setupdone) return 0;
125   Ceed ceed;
126   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
127   Ceed_Opt *ceedimpl;
128   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
129   const CeedInt blksize = ceedimpl->blksize;
130   CeedOperator_Opt *impl;
131   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
132   CeedQFunction qf;
133   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
134   CeedInt Q, numinputfields, numoutputfields;
135   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
136   ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr);
137   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
138   CeedChk(ierr);
139   CeedOperatorField *opinputfields, *opoutputfields;
140   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
141   CeedChk(ierr);
142   CeedQFunctionField *qfinputfields, *qfoutputfields;
143   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
144   CeedChk(ierr);
145 
146   // Allocate
147   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
148   CeedChk(ierr);
149   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
150   CeedChk(ierr);
151   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
152   CeedChk(ierr);
153 
154   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
155   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
156   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
157   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
158   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
159 
160   impl->numein = numinputfields; impl->numeout = numoutputfields;
161 
162   // Set up infield and outfield pointer arrays
163   // Infields
164   ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr,
165                                      impl->evecs, impl->evecsin,
166                                      impl->qvecsin, 0,
167                                      numinputfields, Q);
168   CeedChk(ierr);
169   // Outfields
170   ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr,
171                                      impl->evecs, impl->evecsout,
172                                      impl->qvecsout, numinputfields,
173                                      numoutputfields, Q);
174   CeedChk(ierr);
175 
176   // Identity QFunctions
177   if (impl->identityqf) {
178     CeedEvalMode inmode, outmode;
179     CeedQFunctionField *infields, *outfields;
180     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
181 
182     for (CeedInt i=0; i<numinputfields; i++) {
183       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
184       CeedChk(ierr);
185       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
186       CeedChk(ierr);
187 
188       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
189       impl->qvecsout[i] = impl->qvecsin[i];
190       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr);
191     }
192   }
193 
194   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
195 
196   return 0;
197 }
198 
199 //------------------------------------------------------------------------------
200 // Setup Input Fields
201 //------------------------------------------------------------------------------
202 static inline int CeedOperatorSetupInputs_Opt(CeedInt numinputfields,
203     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
204     CeedVector invec, CeedOperator_Opt *impl, CeedRequest *request) {
205   CeedInt ierr;
206   CeedEvalMode emode;
207   CeedVector vec;
208   uint64_t state;
209 
210   for (CeedInt i=0; i<numinputfields; i++) {
211     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
212     CeedChk(ierr);
213     if (emode == CEED_EVAL_WEIGHT) { // Skip
214     } else {
215       // Get input vector
216       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
217       if (vec != CEED_VECTOR_ACTIVE) {
218         // Restrict
219         ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
220         if (state != impl->inputstate[i]) {
221           ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
222                                           vec, impl->evecs[i], request);
223           CeedChk(ierr);
224           impl->inputstate[i] = state;
225         }
226       } else {
227         // Set Qvec for CEED_EVAL_NONE
228         if (emode == CEED_EVAL_NONE) {
229           ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST,
230                                     &impl->edata[i]); CeedChk(ierr);
231           ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
232                                     CEED_USE_POINTER,
233                                     impl->edata[i]); CeedChk(ierr);
234           ierr = CeedVectorRestoreArray(impl->evecsin[i],
235                                         &impl->edata[i]); CeedChk(ierr);
236         }
237       }
238       // Get evec
239       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
240                                     (const CeedScalar **) &impl->edata[i]);
241       CeedChk(ierr);
242     }
243   }
244   return 0;
245 }
246 
247 //------------------------------------------------------------------------------
248 // Input Basis Action
249 //------------------------------------------------------------------------------
250 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
251     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
252     CeedInt numinputfields, CeedInt blksize, CeedVector invec, bool skipactive,
253     CeedOperator_Opt *impl, CeedRequest *request) {
254   CeedInt ierr;
255   CeedInt dim, elemsize, size;
256   CeedElemRestriction Erestrict;
257   CeedEvalMode emode;
258   CeedBasis basis;
259   CeedVector vec;
260 
261   for (CeedInt i=0; i<numinputfields; i++) {
262     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
263     // Skip active input
264     if (skipactive) {
265       if (vec == CEED_VECTOR_ACTIVE)
266         continue;
267     }
268 
269     CeedInt activein = 0;
270     // Get elemsize, emode, size
271     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
272     CeedChk(ierr);
273     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
274     CeedChk(ierr);
275     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
276     CeedChk(ierr);
277     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
278     // Restrict block active input
279     if (vec == CEED_VECTOR_ACTIVE) {
280       ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize,
281                                            CEED_NOTRANSPOSE, invec,
282                                            impl->evecsin[i], request);
283       CeedChk(ierr);
284       activein = 1;
285     }
286     // Basis action
287     switch(emode) {
288     case CEED_EVAL_NONE:
289       if (!activein) {
290         ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
291                                   CEED_USE_POINTER,
292                                   &impl->edata[i][e*Q*size]); CeedChk(ierr);
293       }
294       break;
295     case CEED_EVAL_INTERP:
296       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
297       CeedChk(ierr);
298       if (!activein) {
299         ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
300                                   CEED_USE_POINTER,
301                                   &impl->edata[i][e*elemsize*size]);
302         CeedChk(ierr);
303       }
304       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
305                             CEED_EVAL_INTERP, impl->evecsin[i],
306                             impl->qvecsin[i]); CeedChk(ierr);
307       break;
308     case CEED_EVAL_GRAD:
309       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
310       CeedChk(ierr);
311       if (!activein) {
312         ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
313         ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
314                                   CEED_USE_POINTER,
315                                   &impl->edata[i][e*elemsize*size/dim]);
316         CeedChk(ierr);
317       }
318       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
319                             CEED_EVAL_GRAD, impl->evecsin[i],
320                             impl->qvecsin[i]); CeedChk(ierr);
321       break;
322     case CEED_EVAL_WEIGHT:
323       break;  // No action
324     case CEED_EVAL_DIV:
325     case CEED_EVAL_CURL: {
326       // LCOV_EXCL_START
327       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
328       CeedChk(ierr);
329       Ceed ceed;
330       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
331       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
332       // LCOV_EXCL_STOP
333       break; // Not implemented
334     }
335     }
336   }
337   return 0;
338 }
339 
340 //------------------------------------------------------------------------------
341 // Output Basis Action
342 //------------------------------------------------------------------------------
343 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
344     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
345     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
346     CeedOperator op, CeedVector outvec, CeedOperator_Opt *impl,
347     CeedRequest *request) {
348   CeedInt ierr;
349   CeedElemRestriction Erestrict;
350   CeedEvalMode emode;
351   CeedBasis basis;
352   CeedVector vec;
353 
354   for (CeedInt i=0; i<numoutputfields; i++) {
355     // Get elemsize, emode, size
356     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
357     CeedChk(ierr);
358     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
359     CeedChk(ierr);
360     // Basis action
361     switch(emode) {
362     case CEED_EVAL_NONE:
363       break; // No action
364     case CEED_EVAL_INTERP:
365       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
366       CeedChk(ierr);
367       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
368                             CEED_EVAL_INTERP, impl->qvecsout[i],
369                             impl->evecsout[i]); CeedChk(ierr);
370       break;
371     case CEED_EVAL_GRAD:
372       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
373       CeedChk(ierr);
374       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
375                             CEED_EVAL_GRAD, impl->qvecsout[i],
376                             impl->evecsout[i]); CeedChk(ierr);
377       break;
378     case CEED_EVAL_WEIGHT: {
379       // LCOV_EXCL_START
380       Ceed ceed;
381       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
382       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
383                        "evaluation mode");
384       // LCOV_EXCL_STOP
385       break; // Should not occur
386     }
387     case CEED_EVAL_DIV:
388     case CEED_EVAL_CURL: {
389       // LCOV_EXCL_START
390       Ceed ceed;
391       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
392       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
393       // LCOV_EXCL_STOP
394       break; // Not implemented
395     }
396     }
397     // Restrict output block
398     // Get output vector
399     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
400     if (vec == CEED_VECTOR_ACTIVE)
401       vec = outvec;
402     // Restrict
403     ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein],
404                                          e/blksize, CEED_TRANSPOSE,
405                                          impl->evecsout[i], vec, request);
406     CeedChk(ierr);
407   }
408   return 0;
409 }
410 
411 //------------------------------------------------------------------------------
412 // Restore Input Vectors
413 //------------------------------------------------------------------------------
414 static inline int CeedOperatorRestoreInputs_Opt(CeedInt numinputfields,
415     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
416     bool skipactive, CeedOperator_Opt *impl) {
417   CeedInt ierr;
418   CeedEvalMode emode;
419 
420   for (CeedInt i=0; i<numinputfields; i++) {
421     // Skip active inputs
422     if (skipactive) {
423       CeedVector vec;
424       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
425       if (vec == CEED_VECTOR_ACTIVE)
426         continue;
427     }
428     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
429     CeedChk(ierr);
430     if (emode == CEED_EVAL_WEIGHT) { // Skip
431     } else {
432       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
433                                         (const CeedScalar **) &impl->edata[i]);
434       CeedChk(ierr);
435     }
436   }
437   return 0;
438 }
439 
440 //------------------------------------------------------------------------------
441 // Operator Apply
442 //------------------------------------------------------------------------------
443 static int CeedOperatorApply_Opt(CeedOperator op, CeedVector invec,
444                                  CeedVector outvec, CeedRequest *request) {
445   int ierr;
446   Ceed ceed;
447   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
448   Ceed_Opt *ceedimpl;
449   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
450   CeedInt blksize = ceedimpl->blksize;
451   CeedOperator_Opt *impl;
452   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
453   CeedInt Q, numinputfields, numoutputfields, numelements;
454   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
455   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
456   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
457   CeedQFunction qf;
458   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
459   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
460   CeedChk(ierr);
461   CeedOperatorField *opinputfields, *opoutputfields;
462   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
463   CeedChk(ierr);
464   CeedQFunctionField *qfinputfields, *qfoutputfields;
465   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
466   CeedChk(ierr);
467   CeedEvalMode emode;
468 
469   // Setup
470   ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr);
471 
472   // Input Evecs and Restriction
473   ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields,
474                                      opinputfields, invec, impl, request);
475   CeedChk(ierr);
476 
477   // Output Lvecs, Evecs, and Qvecs
478   for (CeedInt i=0; i<numoutputfields; i++) {
479     // Set Qvec if needed
480     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
481     CeedChk(ierr);
482     if (emode == CEED_EVAL_NONE) {
483       // Set qvec to single block evec
484       ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST,
485                                 &impl->edata[i + numinputfields]);
486       CeedChk(ierr);
487       ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
488                                 CEED_USE_POINTER,
489                                 impl->edata[i + numinputfields]); CeedChk(ierr);
490       ierr = CeedVectorRestoreArray(impl->evecsout[i],
491                                     &impl->edata[i + numinputfields]);
492       CeedChk(ierr);
493     }
494   }
495 
496   // Loop through elements
497   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
498     // Input basis apply
499     ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields,
500                                       numinputfields, blksize, invec, false,
501                                       impl, request); CeedChk(ierr);
502 
503     // Q function
504     if (!impl->identityqf) {
505       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
506       CeedChk(ierr);
507     }
508 
509     // Output basis apply and restrict
510     ierr = CeedOperatorOutputBasis_Opt(e, Q, qfoutputfields, opoutputfields,
511                                        blksize, numinputfields, numoutputfields,
512                                        op, outvec, impl, request);
513     CeedChk(ierr);
514   }
515 
516   // Restore input arrays
517   ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields,
518                                        opinputfields, false, impl);
519   CeedChk(ierr);
520 
521   return 0;
522 }
523 
524 //------------------------------------------------------------------------------
525 // Assemble Linear QFunction
526 //------------------------------------------------------------------------------
527 static int CeedOperatorAssembleLinearQFunction_Opt(CeedOperator op,
528     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
529   int ierr;
530   Ceed ceed;
531   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
532   Ceed_Opt *ceedimpl;
533   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
534   const CeedInt blksize = ceedimpl->blksize;
535   CeedOperator_Opt *impl;
536   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
537   CeedInt Q, numinputfields, numoutputfields, numelements, size;
538   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
539   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
540   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
541   CeedQFunction qf;
542   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
543   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
544   CeedChk(ierr);
545   CeedOperatorField *opinputfields, *opoutputfields;
546   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
547   CeedChk(ierr);
548   CeedQFunctionField *qfinputfields, *qfoutputfields;
549   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
550   CeedChk(ierr);
551   CeedVector vec, lvec;
552   CeedInt numactivein = 0, numactiveout = 0;
553   CeedVector *activein = NULL;
554   CeedScalar *a, *tmp;
555 
556   // Setup
557   ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr);
558 
559   // Check for identity
560   if (impl->identityqf)
561     // LCOV_EXCL_START
562     return CeedError(ceed, 1, "Assembling identity qfunctions not supported");
563   // LCOV_EXCL_STOP
564 
565   // Input Evecs and Restriction
566   ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields,
567                                      opinputfields, NULL, impl, request);
568   CeedChk(ierr);
569 
570   // Count number of active input fields
571   for (CeedInt i=0; i<numinputfields; i++) {
572     // Get input vector
573     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
574     // Check if active input
575     if (vec == CEED_VECTOR_ACTIVE) {
576       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
577       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
578       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
579       CeedChk(ierr);
580       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
581       for (CeedInt field=0; field<size; field++) {
582         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
583         CeedChk(ierr);
584         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
585                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
586         CeedChk(ierr);
587       }
588       numactivein += size;
589       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
590     }
591   }
592 
593   // Count number of active output fields
594   for (CeedInt i=0; i<numoutputfields; i++) {
595     // Get output vector
596     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
597     // Check if active output
598     if (vec == CEED_VECTOR_ACTIVE) {
599       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
600       numactiveout += size;
601     }
602   }
603 
604   // Check sizes
605   if (!numactivein || !numactiveout)
606     // LCOV_EXCL_START
607     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
608                      "and outputs");
609   // LCOV_EXCL_STOP
610 
611   // Setup lvec
612   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
613                           &lvec); CeedChk(ierr);
614   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
615 
616   // Create output restriction
617   CeedTransposeMode lmode = CEED_NOTRANSPOSE;
618   ierr = CeedElemRestrictionCreateIdentity(ceed, lmode, numelements, Q,
619          numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr);
620   // Create assembled vector
621   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
622                           assembled); CeedChk(ierr);
623 
624   // Loop through elements
625   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
626     // Input basis apply
627     ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields,
628                                       numinputfields, blksize, NULL, true,
629                                       impl, request); CeedChk(ierr);
630 
631     // Assemble QFunction
632     for (CeedInt in=0; in<numactivein; in++) {
633       // Set Inputs
634       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
635       if (numactivein > 1) {
636         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
637                                   0.0); CeedChk(ierr);
638       }
639       // Set Outputs
640       for (CeedInt out=0; out<numoutputfields; out++) {
641         // Get output vector
642         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
643         CeedChk(ierr);
644         // Check if active output
645         if (vec == CEED_VECTOR_ACTIVE) {
646           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
647                              CEED_USE_POINTER, a); CeedChk(ierr);
648           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
649           CeedChk(ierr);
650           a += size*Q*blksize; // Advance the pointer by the size of the output
651         }
652       }
653       // Apply QFunction
654       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
655       CeedChk(ierr);
656     }
657   }
658 
659   // Un-set output Qvecs to prevent accidental overwrite of Assembled
660   for (CeedInt out=0; out<numoutputfields; out++) {
661     // Get output vector
662     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
663     CeedChk(ierr);
664     // Check if active output
665     if (vec == CEED_VECTOR_ACTIVE) {
666       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
667                          NULL); CeedChk(ierr);
668     }
669   }
670 
671   // Restore input arrays
672   ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields,
673                                        opinputfields, true, impl);
674   CeedChk(ierr);
675 
676   // Output blocked restriction
677   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
678   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
679   CeedElemRestriction blkrstr;
680   ierr = CeedElemRestrictionCreateBlocked(ceed, lmode, numelements, Q, blksize,
681                                           numelements*Q,
682                                           numactivein*numactiveout,
683                                           CEED_MEM_HOST, CEED_COPY_VALUES,
684                                           NULL, &blkrstr); CeedChk(ierr);
685   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled,
686                                   request); CeedChk(ierr);
687 
688   // Cleanup
689   for (CeedInt i=0; i<numactivein; i++) {
690     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
691   }
692   ierr = CeedFree(&activein); CeedChk(ierr);
693   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
694   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
695 
696   return 0;
697 }
698 
699 //------------------------------------------------------------------------------
700 // Operator Destroy
701 //------------------------------------------------------------------------------
702 static int CeedOperatorDestroy_Opt(CeedOperator op) {
703   int ierr;
704   CeedOperator_Opt *impl;
705   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
706 
707   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
708     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
709     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
710   }
711   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
712   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
713   ierr = CeedFree(&impl->edata); CeedChk(ierr);
714   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
715 
716   for (CeedInt i=0; i<impl->numein; i++) {
717     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
718     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
719   }
720   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
721   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
722 
723   for (CeedInt i=0; i<impl->numeout; i++) {
724     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
725     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
726   }
727   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
728   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
729 
730   ierr = CeedFree(&impl); CeedChk(ierr);
731   return 0;
732 }
733 
734 //------------------------------------------------------------------------------
735 // Operator Create
736 //------------------------------------------------------------------------------
737 int CeedOperatorCreate_Opt(CeedOperator op) {
738   int ierr;
739   Ceed ceed;
740   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
741   Ceed_Opt *ceedimpl;
742   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
743   CeedInt blksize = ceedimpl->blksize;
744   CeedOperator_Opt *impl;
745 
746   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
747   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
748 
749   if (blksize != 1 && blksize != 8)
750     // LCOV_EXCL_START
751     return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize);
752   // LCOV_EXCL_STOP
753 
754   ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
755                                 CeedOperatorAssembleLinearQFunction_Opt);
756   CeedChk(ierr);
757   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
758                                 CeedOperatorApply_Opt); CeedChk(ierr);
759   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
760                                 CeedOperatorDestroy_Opt); CeedChk(ierr);
761   return 0;
762 }
763 //------------------------------------------------------------------------------
764