xref: /libCEED/backends/opt/ceed-opt-operator.c (revision 014ec18d8ea934e4fc0f69a8c06ffce36a53ff2a)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <string.h>
18 #include "ceed-opt.h"
19 #include "../ref/ceed-ref.h"
20 
21 //------------------------------------------------------------------------------
22 // Setup Input/Output Fields
23 //------------------------------------------------------------------------------
24 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
25                                        bool inOrOut, const CeedInt blksize,
26                                        CeedElemRestriction *blkrestr,
27                                        CeedVector *fullevecs, CeedVector *evecs,
28                                        CeedVector *qvecs, CeedInt starte,
29                                        CeedInt numfields, CeedInt Q) {
30   CeedInt dim, ierr, ncomp, size, P;
31   Ceed ceed;
32   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
33   CeedBasis basis;
34   CeedElemRestriction r;
35   CeedOperatorField *opfields;
36   CeedQFunctionField *qffields;
37   if (inOrOut) {
38     ierr = CeedOperatorGetFields(op, NULL, &opfields);
39     CeedChk(ierr);
40     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
41     CeedChk(ierr);
42   } else {
43     ierr = CeedOperatorGetFields(op, &opfields, NULL);
44     CeedChk(ierr);
45     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
46     CeedChk(ierr);
47   }
48 
49   // Loop over fields
50   for (CeedInt i=0; i<numfields; i++) {
51     CeedEvalMode emode;
52     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
53 
54     if (emode != CEED_EVAL_WEIGHT) {
55       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
56       CeedChk(ierr);
57       CeedElemRestriction_Ref *data;
58       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
59       Ceed ceed;
60       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
61       CeedInt nelem, elemsize, lsize, compstride;
62       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
63       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
64       ierr = CeedElemRestrictionGetLVectorSize(r, &lsize); CeedChk(ierr);
65       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
66       if (data->offsets) {
67         ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr);
68         ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
69                                                 blksize, ncomp, compstride,
70                                                 lsize, CEED_MEM_HOST,
71                                                 CEED_COPY_VALUES, data->offsets,
72                                                 &blkrestr[i+starte]);
73         CeedChk(ierr);
74       } else {
75         CeedInt strides[3];
76         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr);
77         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, nelem, elemsize,
78                blksize, ncomp, lsize, strides, &blkrestr[i+starte]);
79         CeedChk(ierr);
80       }
81       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
82                                              &fullevecs[i+starte]);
83       CeedChk(ierr);
84     }
85 
86     switch(emode) {
87     case CEED_EVAL_NONE:
88       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
89       ierr = CeedVectorCreate(ceed, Q*size*blksize, &evecs[i]); CeedChk(ierr);
90       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
91       break;
92     case CEED_EVAL_INTERP:
93       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
94       ierr = CeedElemRestrictionGetElementSize(r, &P);
95       CeedChk(ierr);
96       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
97       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
98       break;
99     case CEED_EVAL_GRAD:
100       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
101       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
102       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
103       ierr = CeedElemRestrictionGetElementSize(r, &P);
104       CeedChk(ierr);
105       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
106       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
107       break;
108     case CEED_EVAL_WEIGHT: // Only on input fields
109       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
110       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
111       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
112                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]);
113       CeedChk(ierr);
114 
115       break;
116     case CEED_EVAL_DIV:
117       break; // Not implimented
118     case CEED_EVAL_CURL:
119       break; // Not implimented
120     }
121   }
122   return 0;
123 }
124 
125 //------------------------------------------------------------------------------
126 // Setup Operator
127 //------------------------------------------------------------------------------
128 static int CeedOperatorSetup_Opt(CeedOperator op) {
129   int ierr;
130   bool setupdone;
131   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
132   if (setupdone) return 0;
133   Ceed ceed;
134   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
135   Ceed_Opt *ceedimpl;
136   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
137   const CeedInt blksize = ceedimpl->blksize;
138   CeedOperator_Opt *impl;
139   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
140   CeedQFunction qf;
141   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
142   CeedInt Q, numinputfields, numoutputfields;
143   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
144   ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr);
145   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
146   CeedChk(ierr);
147   CeedOperatorField *opinputfields, *opoutputfields;
148   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
149   CeedChk(ierr);
150   CeedQFunctionField *qfinputfields, *qfoutputfields;
151   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
152   CeedChk(ierr);
153 
154   // Allocate
155   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
156   CeedChk(ierr);
157   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
158   CeedChk(ierr);
159   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
160   CeedChk(ierr);
161 
162   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
163   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
164   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
165   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
166   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
167 
168   impl->numein = numinputfields; impl->numeout = numoutputfields;
169 
170   // Set up infield and outfield pointer arrays
171   // Infields
172   ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr,
173                                      impl->evecs, impl->evecsin,
174                                      impl->qvecsin, 0,
175                                      numinputfields, Q);
176   CeedChk(ierr);
177   // Outfields
178   ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr,
179                                      impl->evecs, impl->evecsout,
180                                      impl->qvecsout, numinputfields,
181                                      numoutputfields, Q);
182   CeedChk(ierr);
183 
184   // Identity QFunctions
185   if (impl->identityqf) {
186     CeedEvalMode inmode, outmode;
187     CeedQFunctionField *infields, *outfields;
188     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
189 
190     for (CeedInt i=0; i<numinputfields; i++) {
191       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
192       CeedChk(ierr);
193       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
194       CeedChk(ierr);
195 
196       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
197       impl->qvecsout[i] = impl->qvecsin[i];
198       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr);
199     }
200   }
201 
202   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
203 
204   return 0;
205 }
206 
207 //------------------------------------------------------------------------------
208 // Setup Input Fields
209 //------------------------------------------------------------------------------
210 static inline int CeedOperatorSetupInputs_Opt(CeedInt numinputfields,
211     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
212     CeedVector invec, CeedOperator_Opt *impl, CeedRequest *request) {
213   CeedInt ierr;
214   CeedEvalMode emode;
215   CeedVector vec;
216   uint64_t state;
217 
218   for (CeedInt i=0; i<numinputfields; i++) {
219     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
220     CeedChk(ierr);
221     if (emode == CEED_EVAL_WEIGHT) { // Skip
222     } else {
223       // Get input vector
224       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
225       if (vec != CEED_VECTOR_ACTIVE) {
226         // Restrict
227         ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
228         if (state != impl->inputstate[i]) {
229           ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
230                                           vec, impl->evecs[i], request);
231           CeedChk(ierr);
232           impl->inputstate[i] = state;
233         }
234       } else {
235         // Set Qvec for CEED_EVAL_NONE
236         if (emode == CEED_EVAL_NONE) {
237           ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST,
238                                     &impl->edata[i]); CeedChk(ierr);
239           ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
240                                     CEED_USE_POINTER,
241                                     impl->edata[i]); CeedChk(ierr);
242           ierr = CeedVectorRestoreArray(impl->evecsin[i],
243                                         &impl->edata[i]); CeedChk(ierr);
244         }
245       }
246       // Get evec
247       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
248                                     (const CeedScalar **) &impl->edata[i]);
249       CeedChk(ierr);
250     }
251   }
252   return 0;
253 }
254 
255 //------------------------------------------------------------------------------
256 // Input Basis Action
257 //------------------------------------------------------------------------------
258 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
259     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
260     CeedInt numinputfields, CeedInt blksize, CeedVector invec, bool skipactive,
261     CeedOperator_Opt *impl, CeedRequest *request) {
262   CeedInt ierr;
263   CeedInt dim, elemsize, size;
264   CeedElemRestriction Erestrict;
265   CeedEvalMode emode;
266   CeedBasis basis;
267   CeedVector vec;
268 
269   for (CeedInt i=0; i<numinputfields; i++) {
270     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
271     // Skip active input
272     if (skipactive) {
273       if (vec == CEED_VECTOR_ACTIVE)
274         continue;
275     }
276 
277     CeedInt activein = 0;
278     // Get elemsize, emode, size
279     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
280     CeedChk(ierr);
281     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
282     CeedChk(ierr);
283     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
284     CeedChk(ierr);
285     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
286     // Restrict block active input
287     if (vec == CEED_VECTOR_ACTIVE) {
288       ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize,
289                                            CEED_NOTRANSPOSE, invec,
290                                            impl->evecsin[i], request);
291       CeedChk(ierr);
292       activein = 1;
293     }
294     // Basis action
295     switch(emode) {
296     case CEED_EVAL_NONE:
297       if (!activein) {
298         ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
299                                   CEED_USE_POINTER,
300                                   &impl->edata[i][e*Q*size]); CeedChk(ierr);
301       }
302       break;
303     case CEED_EVAL_INTERP:
304       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
305       CeedChk(ierr);
306       if (!activein) {
307         ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
308                                   CEED_USE_POINTER,
309                                   &impl->edata[i][e*elemsize*size]);
310         CeedChk(ierr);
311       }
312       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
313                             CEED_EVAL_INTERP, impl->evecsin[i],
314                             impl->qvecsin[i]); CeedChk(ierr);
315       break;
316     case CEED_EVAL_GRAD:
317       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
318       CeedChk(ierr);
319       if (!activein) {
320         ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
321         ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
322                                   CEED_USE_POINTER,
323                                   &impl->edata[i][e*elemsize*size/dim]);
324         CeedChk(ierr);
325       }
326       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
327                             CEED_EVAL_GRAD, impl->evecsin[i],
328                             impl->qvecsin[i]); CeedChk(ierr);
329       break;
330     case CEED_EVAL_WEIGHT:
331       break;  // No action
332     // LCOV_EXCL_START
333     case CEED_EVAL_DIV:
334     case CEED_EVAL_CURL: {
335       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
336       CeedChk(ierr);
337       Ceed ceed;
338       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
339       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
340       // LCOV_EXCL_STOP
341     }
342     }
343   }
344   return 0;
345 }
346 
347 //------------------------------------------------------------------------------
348 // Output Basis Action
349 //------------------------------------------------------------------------------
350 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
351     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
352     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
353     CeedOperator op, CeedVector outvec, CeedOperator_Opt *impl,
354     CeedRequest *request) {
355   CeedInt ierr;
356   CeedElemRestriction Erestrict;
357   CeedEvalMode emode;
358   CeedBasis basis;
359   CeedVector vec;
360 
361   for (CeedInt i=0; i<numoutputfields; i++) {
362     // Get elemsize, emode, size
363     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
364     CeedChk(ierr);
365     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
366     CeedChk(ierr);
367     // Basis action
368     switch(emode) {
369     case CEED_EVAL_NONE:
370       break; // No action
371     case CEED_EVAL_INTERP:
372       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
373       CeedChk(ierr);
374       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
375                             CEED_EVAL_INTERP, impl->qvecsout[i],
376                             impl->evecsout[i]); CeedChk(ierr);
377       break;
378     case CEED_EVAL_GRAD:
379       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
380       CeedChk(ierr);
381       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
382                             CEED_EVAL_GRAD, impl->qvecsout[i],
383                             impl->evecsout[i]); CeedChk(ierr);
384       break;
385     // LCOV_EXCL_START
386     case CEED_EVAL_WEIGHT: {
387       Ceed ceed;
388       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
389       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
390                        "evaluation mode");
391     }
392     case CEED_EVAL_DIV:
393     case CEED_EVAL_CURL: {
394       Ceed ceed;
395       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
396       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
397       // LCOV_EXCL_STOP
398     }
399     }
400     // Restrict output block
401     // Get output vector
402     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
403     if (vec == CEED_VECTOR_ACTIVE)
404       vec = outvec;
405     // Restrict
406     ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein],
407                                          e/blksize, CEED_TRANSPOSE,
408                                          impl->evecsout[i], vec, request);
409     CeedChk(ierr);
410   }
411   return 0;
412 }
413 
414 //------------------------------------------------------------------------------
415 // Restore Input Vectors
416 //------------------------------------------------------------------------------
417 static inline int CeedOperatorRestoreInputs_Opt(CeedInt numinputfields,
418     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
419     bool skipactive, CeedOperator_Opt *impl) {
420   CeedInt ierr;
421   CeedEvalMode emode;
422 
423   for (CeedInt i=0; i<numinputfields; i++) {
424     // Skip active inputs
425     if (skipactive) {
426       CeedVector vec;
427       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
428       if (vec == CEED_VECTOR_ACTIVE)
429         continue;
430     }
431     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
432     CeedChk(ierr);
433     if (emode == CEED_EVAL_WEIGHT) { // Skip
434     } else {
435       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
436                                         (const CeedScalar **) &impl->edata[i]);
437       CeedChk(ierr);
438     }
439   }
440   return 0;
441 }
442 
443 //------------------------------------------------------------------------------
444 // Operator Apply
445 //------------------------------------------------------------------------------
446 static int CeedOperatorApply_Opt(CeedOperator op, CeedVector invec,
447                                  CeedVector outvec, CeedRequest *request) {
448   int ierr;
449   Ceed ceed;
450   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
451   Ceed_Opt *ceedimpl;
452   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
453   CeedInt blksize = ceedimpl->blksize;
454   CeedOperator_Opt *impl;
455   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
456   CeedInt Q, numinputfields, numoutputfields, numelements;
457   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
458   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
459   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
460   CeedQFunction qf;
461   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
462   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
463   CeedChk(ierr);
464   CeedOperatorField *opinputfields, *opoutputfields;
465   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
466   CeedChk(ierr);
467   CeedQFunctionField *qfinputfields, *qfoutputfields;
468   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
469   CeedChk(ierr);
470   CeedEvalMode emode;
471 
472   // Setup
473   ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr);
474 
475   // Input Evecs and Restriction
476   ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields,
477                                      opinputfields, invec, impl, request);
478   CeedChk(ierr);
479 
480   // Output Lvecs, Evecs, and Qvecs
481   for (CeedInt i=0; i<numoutputfields; i++) {
482     // Set Qvec if needed
483     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
484     CeedChk(ierr);
485     if (emode == CEED_EVAL_NONE) {
486       // Set qvec to single block evec
487       ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST,
488                                 &impl->edata[i + numinputfields]);
489       CeedChk(ierr);
490       ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
491                                 CEED_USE_POINTER,
492                                 impl->edata[i + numinputfields]); CeedChk(ierr);
493       ierr = CeedVectorRestoreArray(impl->evecsout[i],
494                                     &impl->edata[i + numinputfields]);
495       CeedChk(ierr);
496     }
497   }
498 
499   // Loop through elements
500   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
501     // Input basis apply
502     ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields,
503                                       numinputfields, blksize, invec, false,
504                                       impl, request); CeedChk(ierr);
505 
506     // Q function
507     if (!impl->identityqf) {
508       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
509       CeedChk(ierr);
510     }
511 
512     // Output basis apply and restrict
513     ierr = CeedOperatorOutputBasis_Opt(e, Q, qfoutputfields, opoutputfields,
514                                        blksize, numinputfields, numoutputfields,
515                                        op, outvec, impl, request);
516     CeedChk(ierr);
517   }
518 
519   // Restore input arrays
520   ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields,
521                                        opinputfields, false, impl);
522   CeedChk(ierr);
523 
524   return 0;
525 }
526 
527 //------------------------------------------------------------------------------
528 // Assemble Linear QFunction
529 //------------------------------------------------------------------------------
530 static int CeedOperatorAssembleLinearQFunction_Opt(CeedOperator op,
531     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
532   int ierr;
533   Ceed ceed;
534   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
535   Ceed_Opt *ceedimpl;
536   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
537   const CeedInt blksize = ceedimpl->blksize;
538   CeedOperator_Opt *impl;
539   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
540   CeedInt Q, numinputfields, numoutputfields, numelements, size;
541   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
542   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
543   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
544   CeedQFunction qf;
545   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
546   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
547   CeedChk(ierr);
548   CeedOperatorField *opinputfields, *opoutputfields;
549   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
550   CeedChk(ierr);
551   CeedQFunctionField *qfinputfields, *qfoutputfields;
552   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
553   CeedChk(ierr);
554   CeedVector vec, lvec;
555   CeedInt numactivein = 0, numactiveout = 0;
556   CeedVector *activein = NULL;
557   CeedScalar *a, *tmp;
558 
559   // Setup
560   ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr);
561 
562   // Check for identity
563   if (impl->identityqf)
564     // LCOV_EXCL_START
565     return CeedError(ceed, 1, "Assembling identity qfunctions not supported");
566   // LCOV_EXCL_STOP
567 
568   // Input Evecs and Restriction
569   ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields,
570                                      opinputfields, NULL, impl, request);
571   CeedChk(ierr);
572 
573   // Count number of active input fields
574   for (CeedInt i=0; i<numinputfields; i++) {
575     // Get input vector
576     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
577     // Check if active input
578     if (vec == CEED_VECTOR_ACTIVE) {
579       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
580       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
581       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
582       CeedChk(ierr);
583       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
584       for (CeedInt field=0; field<size; field++) {
585         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
586         CeedChk(ierr);
587         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
588                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
589         CeedChk(ierr);
590       }
591       numactivein += size;
592       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
593     }
594   }
595 
596   // Count number of active output fields
597   for (CeedInt i=0; i<numoutputfields; i++) {
598     // Get output vector
599     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
600     // Check if active output
601     if (vec == CEED_VECTOR_ACTIVE) {
602       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
603       numactiveout += size;
604     }
605   }
606 
607   // Check sizes
608   if (!numactivein || !numactiveout)
609     // LCOV_EXCL_START
610     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
611                      "and outputs");
612   // LCOV_EXCL_STOP
613 
614   // Setup lvec
615   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
616                           &lvec); CeedChk(ierr);
617   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
618 
619   // Create output restriction
620   CeedInt strides[3] = {1, Q, numactivein *numactiveout*Q};
621   ierr = CeedElemRestrictionCreateStrided(ceed, numelements, Q,
622                                           numactivein*numactiveout,
623                                           numactivein*numactiveout*numelements*Q,
624                                           strides, rstr); CeedChk(ierr);
625   // Create assembled vector
626   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
627                           assembled); CeedChk(ierr);
628 
629   // Loop through elements
630   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
631     // Input basis apply
632     ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields,
633                                       numinputfields, blksize, NULL, true,
634                                       impl, request); CeedChk(ierr);
635 
636     // Assemble QFunction
637     for (CeedInt in=0; in<numactivein; in++) {
638       // Set Inputs
639       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
640       if (numactivein > 1) {
641         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
642                                   0.0); CeedChk(ierr);
643       }
644       // Set Outputs
645       for (CeedInt out=0; out<numoutputfields; out++) {
646         // Get output vector
647         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
648         CeedChk(ierr);
649         // Check if active output
650         if (vec == CEED_VECTOR_ACTIVE) {
651           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
652                              CEED_USE_POINTER, a); CeedChk(ierr);
653           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
654           CeedChk(ierr);
655           a += size*Q*blksize; // Advance the pointer by the size of the output
656         }
657       }
658       // Apply QFunction
659       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
660       CeedChk(ierr);
661     }
662   }
663 
664   // Un-set output Qvecs to prevent accidental overwrite of Assembled
665   for (CeedInt out=0; out<numoutputfields; out++) {
666     // Get output vector
667     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
668     CeedChk(ierr);
669     // Check if active output
670     if (vec == CEED_VECTOR_ACTIVE) {
671       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
672                          NULL); CeedChk(ierr);
673     }
674   }
675 
676   // Restore input arrays
677   ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields,
678                                        opinputfields, true, impl);
679   CeedChk(ierr);
680 
681   // Output blocked restriction
682   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
683   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
684   CeedElemRestriction blkrstr;
685   ierr = CeedElemRestrictionCreateBlockedStrided(ceed, numelements, Q, blksize,
686          numactivein*numactiveout, numactivein*numactiveout*numelements*Q,
687          strides, &blkrstr); CeedChk(ierr);
688   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled,
689                                   request); CeedChk(ierr);
690 
691   // Cleanup
692   for (CeedInt i=0; i<numactivein; i++) {
693     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
694   }
695   ierr = CeedFree(&activein); CeedChk(ierr);
696   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
697   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
698 
699   return 0;
700 }
701 
702 //------------------------------------------------------------------------------
703 // Operator Destroy
704 //------------------------------------------------------------------------------
705 static int CeedOperatorDestroy_Opt(CeedOperator op) {
706   int ierr;
707   CeedOperator_Opt *impl;
708   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
709 
710   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
711     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
712     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
713   }
714   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
715   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
716   ierr = CeedFree(&impl->edata); CeedChk(ierr);
717   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
718 
719   for (CeedInt i=0; i<impl->numein; i++) {
720     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
721     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
722   }
723   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
724   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
725 
726   for (CeedInt i=0; i<impl->numeout; i++) {
727     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
728     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
729   }
730   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
731   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
732 
733   ierr = CeedFree(&impl); CeedChk(ierr);
734   return 0;
735 }
736 
737 //------------------------------------------------------------------------------
738 // Operator Create
739 //------------------------------------------------------------------------------
740 int CeedOperatorCreate_Opt(CeedOperator op) {
741   int ierr;
742   Ceed ceed;
743   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
744   Ceed_Opt *ceedimpl;
745   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
746   CeedInt blksize = ceedimpl->blksize;
747   CeedOperator_Opt *impl;
748 
749   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
750   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
751 
752   if (blksize != 1 && blksize != 8)
753     // LCOV_EXCL_START
754     return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize);
755   // LCOV_EXCL_STOP
756 
757   ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
758                                 CeedOperatorAssembleLinearQFunction_Opt);
759   CeedChk(ierr);
760   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
761                                 CeedOperatorApply_Opt); CeedChk(ierr);
762   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
763                                 CeedOperatorDestroy_Opt); CeedChk(ierr);
764   return 0;
765 }
766 //------------------------------------------------------------------------------
767