xref: /libCEED/backends/opt/ceed-opt-operator.c (revision 7b46028a0f84b223a3c73283d348b3ce0c32972b)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed.h>
18 #include <ceed-backend.h>
19 #include <stdbool.h>
20 #include <stdint.h>
21 #include <string.h>
22 #include "ceed-opt.h"
23 
24 //------------------------------------------------------------------------------
25 // Setup Input/Output Fields
26 //------------------------------------------------------------------------------
27 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
28                                        bool inOrOut, const CeedInt blksize,
29                                        CeedElemRestriction *blkrestr,
30                                        CeedVector *fullevecs, CeedVector *evecs,
31                                        CeedVector *qvecs, CeedInt starte,
32                                        CeedInt numfields, CeedInt Q) {
33   CeedInt dim, ierr, ncomp, size, P;
34   Ceed ceed;
35   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
36   CeedBasis basis;
37   CeedElemRestriction r;
38   CeedOperatorField *opfields;
39   CeedQFunctionField *qffields;
40   if (inOrOut) {
41     ierr = CeedOperatorGetFields(op, NULL, &opfields);
42     CeedChkBackend(ierr);
43     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
44     CeedChkBackend(ierr);
45   } else {
46     ierr = CeedOperatorGetFields(op, &opfields, NULL);
47     CeedChkBackend(ierr);
48     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
49     CeedChkBackend(ierr);
50   }
51 
52   // Loop over fields
53   for (CeedInt i=0; i<numfields; i++) {
54     CeedEvalMode emode;
55     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChkBackend(ierr);
56 
57     if (emode != CEED_EVAL_WEIGHT) {
58       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
59       CeedChkBackend(ierr);
60       Ceed ceed;
61       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
62       CeedInt nelem, elemsize, lsize, compstride;
63       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChkBackend(ierr);
64       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChkBackend(ierr);
65       ierr = CeedElemRestrictionGetLVectorSize(r, &lsize); CeedChkBackend(ierr);
66       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChkBackend(ierr);
67 
68       bool strided;
69       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
70       if (strided) {
71         CeedInt strides[3];
72         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
73         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, nelem, elemsize,
74                blksize, ncomp, lsize, strides, &blkrestr[i+starte]);
75         CeedChkBackend(ierr);
76       } else {
77         const CeedInt *offsets = NULL;
78         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
79         CeedChkBackend(ierr);
80         ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChkBackend(ierr);
81         ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
82                                                 blksize, ncomp, compstride,
83                                                 lsize, CEED_MEM_HOST,
84                                                 CEED_COPY_VALUES, offsets,
85                                                 &blkrestr[i+starte]);
86         CeedChkBackend(ierr);
87         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
88       }
89       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
90                                              &fullevecs[i+starte]);
91       CeedChkBackend(ierr);
92     }
93 
94     switch(emode) {
95     case CEED_EVAL_NONE:
96       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChkBackend(ierr);
97       ierr = CeedVectorCreate(ceed, Q*size*blksize, &evecs[i]); CeedChkBackend(ierr);
98       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChkBackend(ierr);
99       break;
100     case CEED_EVAL_INTERP:
101       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChkBackend(ierr);
102       ierr = CeedElemRestrictionGetElementSize(r, &P);
103       CeedChkBackend(ierr);
104       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChkBackend(ierr);
105       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChkBackend(ierr);
106       break;
107     case CEED_EVAL_GRAD:
108       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChkBackend(ierr);
109       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChkBackend(ierr);
110       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
111       ierr = CeedElemRestrictionGetElementSize(r, &P);
112       CeedChkBackend(ierr);
113       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]);
114       CeedChkBackend(ierr);
115       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChkBackend(ierr);
116       break;
117     case CEED_EVAL_WEIGHT: // Only on input fields
118       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChkBackend(ierr);
119       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChkBackend(ierr);
120       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
121                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]);
122       CeedChkBackend(ierr);
123 
124       break;
125     case CEED_EVAL_DIV:
126       break; // Not implemented
127     case CEED_EVAL_CURL:
128       break; // Not implemented
129     }
130   }
131   return CEED_ERROR_SUCCESS;
132 }
133 
134 //------------------------------------------------------------------------------
135 // Setup Operator
136 //------------------------------------------------------------------------------
137 static int CeedOperatorSetup_Opt(CeedOperator op) {
138   int ierr;
139   bool setupdone;
140   ierr = CeedOperatorIsSetupDone(op, &setupdone); CeedChkBackend(ierr);
141   if (setupdone) return CEED_ERROR_SUCCESS;
142   Ceed ceed;
143   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
144   Ceed_Opt *ceedimpl;
145   ierr = CeedGetData(ceed, &ceedimpl); CeedChkBackend(ierr);
146   const CeedInt blksize = ceedimpl->blksize;
147   CeedOperator_Opt *impl;
148   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
149   CeedQFunction qf;
150   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
151   CeedInt Q, numinputfields, numoutputfields;
152   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
153   ierr = CeedQFunctionIsIdentity(qf, &impl->identityqf); CeedChkBackend(ierr);
154   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
155   CeedChkBackend(ierr);
156   CeedOperatorField *opinputfields, *opoutputfields;
157   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
158   CeedChkBackend(ierr);
159   CeedQFunctionField *qfinputfields, *qfoutputfields;
160   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
161   CeedChkBackend(ierr);
162 
163   // Allocate
164   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
165   CeedChkBackend(ierr);
166   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
167   CeedChkBackend(ierr);
168   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
169   CeedChkBackend(ierr);
170 
171   ierr = CeedCalloc(16, &impl->inputstate); CeedChkBackend(ierr);
172   ierr = CeedCalloc(16, &impl->evecsin); CeedChkBackend(ierr);
173   ierr = CeedCalloc(16, &impl->evecsout); CeedChkBackend(ierr);
174   ierr = CeedCalloc(16, &impl->qvecsin); CeedChkBackend(ierr);
175   ierr = CeedCalloc(16, &impl->qvecsout); CeedChkBackend(ierr);
176 
177   impl->numein = numinputfields; impl->numeout = numoutputfields;
178 
179   // Set up infield and outfield pointer arrays
180   // Infields
181   ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr,
182                                      impl->evecs, impl->evecsin,
183                                      impl->qvecsin, 0,
184                                      numinputfields, Q);
185   CeedChkBackend(ierr);
186   // Outfields
187   ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr,
188                                      impl->evecs, impl->evecsout,
189                                      impl->qvecsout, numinputfields,
190                                      numoutputfields, Q);
191   CeedChkBackend(ierr);
192 
193   // Identity QFunctions
194   if (impl->identityqf) {
195     CeedEvalMode inmode, outmode;
196     CeedQFunctionField *infields, *outfields;
197     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChkBackend(ierr);
198 
199     for (CeedInt i=0; i<numinputfields; i++) {
200       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
201       CeedChkBackend(ierr);
202       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
203       CeedChkBackend(ierr);
204 
205       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChkBackend(ierr);
206       impl->qvecsout[i] = impl->qvecsin[i];
207       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChkBackend(ierr);
208     }
209   }
210 
211   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
212 
213   return CEED_ERROR_SUCCESS;
214 }
215 
216 //------------------------------------------------------------------------------
217 // Setup Input Fields
218 //------------------------------------------------------------------------------
219 static inline int CeedOperatorSetupInputs_Opt(CeedInt numinputfields,
220     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
221     CeedVector invec, CeedOperator_Opt *impl, CeedRequest *request) {
222   CeedInt ierr;
223   CeedEvalMode emode;
224   CeedVector vec;
225   uint64_t state;
226 
227   for (CeedInt i=0; i<numinputfields; i++) {
228     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
229     CeedChkBackend(ierr);
230     if (emode == CEED_EVAL_WEIGHT) { // Skip
231     } else {
232       // Get input vector
233       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr);
234       if (vec != CEED_VECTOR_ACTIVE) {
235         // Restrict
236         ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
237         if (state != impl->inputstate[i]) {
238           ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
239                                           vec, impl->evecs[i], request);
240           CeedChkBackend(ierr);
241           impl->inputstate[i] = state;
242         }
243       } else {
244         // Set Qvec for CEED_EVAL_NONE
245         if (emode == CEED_EVAL_NONE) {
246           ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST,
247                                     &impl->edata[i]); CeedChkBackend(ierr);
248           ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
249                                     CEED_USE_POINTER,
250                                     impl->edata[i]); CeedChkBackend(ierr);
251           ierr = CeedVectorRestoreArray(impl->evecsin[i],
252                                         &impl->edata[i]); CeedChkBackend(ierr);
253         }
254       }
255       // Get evec
256       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
257                                     (const CeedScalar **) &impl->edata[i]);
258       CeedChkBackend(ierr);
259     }
260   }
261   return CEED_ERROR_SUCCESS;
262 }
263 
264 //------------------------------------------------------------------------------
265 // Input Basis Action
266 //------------------------------------------------------------------------------
267 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
268     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
269     CeedInt numinputfields, CeedInt blksize, CeedVector invec, bool skipactive,
270     CeedOperator_Opt *impl, CeedRequest *request) {
271   CeedInt ierr;
272   CeedInt dim, elemsize, size;
273   CeedElemRestriction Erestrict;
274   CeedEvalMode emode;
275   CeedBasis basis;
276   CeedVector vec;
277 
278   for (CeedInt i=0; i<numinputfields; i++) {
279     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr);
280     // Skip active input
281     if (skipactive) {
282       if (vec == CEED_VECTOR_ACTIVE)
283         continue;
284     }
285 
286     CeedInt activein = 0;
287     // Get elemsize, emode, size
288     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
289     CeedChkBackend(ierr);
290     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
291     CeedChkBackend(ierr);
292     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
293     CeedChkBackend(ierr);
294     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChkBackend(ierr);
295     // Restrict block active input
296     if (vec == CEED_VECTOR_ACTIVE) {
297       ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize,
298                                            CEED_NOTRANSPOSE, invec,
299                                            impl->evecsin[i], request);
300       CeedChkBackend(ierr);
301       activein = 1;
302     }
303     // Basis action
304     switch(emode) {
305     case CEED_EVAL_NONE:
306       if (!activein) {
307         ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
308                                   CEED_USE_POINTER,
309                                   &impl->edata[i][e*Q*size]); CeedChkBackend(ierr);
310       }
311       break;
312     case CEED_EVAL_INTERP:
313       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
314       CeedChkBackend(ierr);
315       if (!activein) {
316         ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
317                                   CEED_USE_POINTER,
318                                   &impl->edata[i][e*elemsize*size]);
319         CeedChkBackend(ierr);
320       }
321       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
322                             CEED_EVAL_INTERP, impl->evecsin[i],
323                             impl->qvecsin[i]); CeedChkBackend(ierr);
324       break;
325     case CEED_EVAL_GRAD:
326       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
327       CeedChkBackend(ierr);
328       if (!activein) {
329         ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
330         ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
331                                   CEED_USE_POINTER,
332                                   &impl->edata[i][e*elemsize*size/dim]);
333         CeedChkBackend(ierr);
334       }
335       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
336                             CEED_EVAL_GRAD, impl->evecsin[i],
337                             impl->qvecsin[i]); CeedChkBackend(ierr);
338       break;
339     case CEED_EVAL_WEIGHT:
340       break;  // No action
341     // LCOV_EXCL_START
342     case CEED_EVAL_DIV:
343     case CEED_EVAL_CURL: {
344       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
345       CeedChkBackend(ierr);
346       Ceed ceed;
347       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
348       return CeedError(ceed, CEED_ERROR_BACKEND,
349                        "Ceed evaluation mode not implemented");
350       // LCOV_EXCL_STOP
351     }
352     }
353   }
354   return CEED_ERROR_SUCCESS;
355 }
356 
357 //------------------------------------------------------------------------------
358 // Output Basis Action
359 //------------------------------------------------------------------------------
360 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
361     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
362     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
363     CeedOperator op, CeedVector outvec, CeedOperator_Opt *impl,
364     CeedRequest *request) {
365   CeedInt ierr;
366   CeedElemRestriction Erestrict;
367   CeedEvalMode emode;
368   CeedBasis basis;
369   CeedVector vec;
370 
371   for (CeedInt i=0; i<numoutputfields; i++) {
372     // Get elemsize, emode, size
373     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
374     CeedChkBackend(ierr);
375     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
376     CeedChkBackend(ierr);
377     // Basis action
378     switch(emode) {
379     case CEED_EVAL_NONE:
380       break; // No action
381     case CEED_EVAL_INTERP:
382       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
383       CeedChkBackend(ierr);
384       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
385                             CEED_EVAL_INTERP, impl->qvecsout[i],
386                             impl->evecsout[i]); CeedChkBackend(ierr);
387       break;
388     case CEED_EVAL_GRAD:
389       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
390       CeedChkBackend(ierr);
391       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
392                             CEED_EVAL_GRAD, impl->qvecsout[i],
393                             impl->evecsout[i]); CeedChkBackend(ierr);
394       break;
395     // LCOV_EXCL_START
396     case CEED_EVAL_WEIGHT: {
397       Ceed ceed;
398       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
399       return CeedError(ceed, CEED_ERROR_BACKEND,
400                        "CEED_EVAL_WEIGHT cannot be an output "
401                        "evaluation mode");
402     }
403     case CEED_EVAL_DIV:
404     case CEED_EVAL_CURL: {
405       Ceed ceed;
406       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
407       return CeedError(ceed, CEED_ERROR_BACKEND,
408                        "Ceed evaluation mode not implemented");
409       // LCOV_EXCL_STOP
410     }
411     }
412     // Restrict output block
413     // Get output vector
414     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec);
415     CeedChkBackend(ierr);
416     if (vec == CEED_VECTOR_ACTIVE)
417       vec = outvec;
418     // Restrict
419     ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein],
420                                          e/blksize, CEED_TRANSPOSE,
421                                          impl->evecsout[i], vec, request);
422     CeedChkBackend(ierr);
423   }
424   return CEED_ERROR_SUCCESS;
425 }
426 
427 //------------------------------------------------------------------------------
428 // Restore Input Vectors
429 //------------------------------------------------------------------------------
430 static inline int CeedOperatorRestoreInputs_Opt(CeedInt numinputfields,
431     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
432     CeedOperator_Opt *impl) {
433   CeedInt ierr;
434   CeedEvalMode emode;
435 
436   for (CeedInt i=0; i<numinputfields; i++) {
437     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
438     CeedChkBackend(ierr);
439     if (emode == CEED_EVAL_WEIGHT) { // Skip
440     } else {
441       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
442                                         (const CeedScalar **) &impl->edata[i]);
443       CeedChkBackend(ierr);
444     }
445   }
446   return CEED_ERROR_SUCCESS;
447 }
448 
449 //------------------------------------------------------------------------------
450 // Operator Apply
451 //------------------------------------------------------------------------------
452 static int CeedOperatorApplyAdd_Opt(CeedOperator op, CeedVector invec,
453                                     CeedVector outvec, CeedRequest *request) {
454   int ierr;
455   Ceed ceed;
456   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
457   Ceed_Opt *ceedimpl;
458   ierr = CeedGetData(ceed, &ceedimpl); CeedChkBackend(ierr);
459   CeedInt blksize = ceedimpl->blksize;
460   CeedOperator_Opt *impl;
461   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
462   CeedInt Q, numinputfields, numoutputfields, numelements;
463   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChkBackend(ierr);
464   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
465   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
466   CeedQFunction qf;
467   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
468   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
469   CeedChkBackend(ierr);
470   CeedOperatorField *opinputfields, *opoutputfields;
471   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
472   CeedChkBackend(ierr);
473   CeedQFunctionField *qfinputfields, *qfoutputfields;
474   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
475   CeedChkBackend(ierr);
476   CeedEvalMode emode;
477 
478   // Setup
479   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
480 
481   // Input Evecs and Restriction
482   ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields,
483                                      opinputfields, invec, impl, request);
484   CeedChkBackend(ierr);
485 
486   // Output Lvecs, Evecs, and Qvecs
487   for (CeedInt i=0; i<numoutputfields; i++) {
488     // Set Qvec if needed
489     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
490     CeedChkBackend(ierr);
491     if (emode == CEED_EVAL_NONE) {
492       // Set qvec to single block evec
493       ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST,
494                                 &impl->edata[i + numinputfields]);
495       CeedChkBackend(ierr);
496       ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
497                                 CEED_USE_POINTER,
498                                 impl->edata[i + numinputfields]); CeedChkBackend(ierr);
499       ierr = CeedVectorRestoreArray(impl->evecsout[i],
500                                     &impl->edata[i + numinputfields]);
501       CeedChkBackend(ierr);
502     }
503   }
504 
505   // Loop through elements
506   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
507     // Input basis apply
508     ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields,
509                                       numinputfields, blksize, invec, false,
510                                       impl, request); CeedChkBackend(ierr);
511 
512     // Q function
513     if (!impl->identityqf) {
514       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
515       CeedChkBackend(ierr);
516     }
517 
518     // Output basis apply and restrict
519     ierr = CeedOperatorOutputBasis_Opt(e, Q, qfoutputfields, opoutputfields,
520                                        blksize, numinputfields, numoutputfields,
521                                        op, outvec, impl, request);
522     CeedChkBackend(ierr);
523   }
524 
525   // Restore input arrays
526   ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields,
527                                        opinputfields, impl);
528   CeedChkBackend(ierr);
529 
530   return CEED_ERROR_SUCCESS;
531 }
532 
533 //------------------------------------------------------------------------------
534 // Assemble Linear QFunction
535 //------------------------------------------------------------------------------
536 static int CeedOperatorLinearAssembleQFunction_Opt(CeedOperator op,
537     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
538   int ierr;
539   Ceed ceed;
540   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
541   Ceed_Opt *ceedimpl;
542   ierr = CeedGetData(ceed, &ceedimpl); CeedChkBackend(ierr);
543   const CeedInt blksize = ceedimpl->blksize;
544   CeedOperator_Opt *impl;
545   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
546   CeedInt Q, numinputfields, numoutputfields, numelements, size;
547   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChkBackend(ierr);
548   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
549   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
550   CeedQFunction qf;
551   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
552   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
553   CeedChkBackend(ierr);
554   CeedOperatorField *opinputfields, *opoutputfields;
555   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
556   CeedChkBackend(ierr);
557   CeedQFunctionField *qfinputfields, *qfoutputfields;
558   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
559   CeedChkBackend(ierr);
560   CeedVector vec, lvec;
561   CeedInt numactivein = 0, numactiveout = 0;
562   CeedVector *activein = NULL;
563   CeedScalar *a, *tmp;
564 
565   // Setup
566   ierr = CeedOperatorSetup_Opt(op); CeedChkBackend(ierr);
567 
568   // Check for identity
569   if (impl->identityqf)
570     // LCOV_EXCL_START
571     return CeedError(ceed, CEED_ERROR_BACKEND,
572                      "Assembling identity qfunctions not supported");
573   // LCOV_EXCL_STOP
574 
575   // Input Evecs and Restriction
576   ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields,
577                                      opinputfields, NULL, impl, request);
578   CeedChkBackend(ierr);
579 
580   // Count number of active input fields
581   for (CeedInt i=0; i<numinputfields; i++) {
582     // Get input vector
583     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr);
584     // Check if active input
585     if (vec == CEED_VECTOR_ACTIVE) {
586       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChkBackend(ierr);
587       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChkBackend(ierr);
588       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
589       CeedChkBackend(ierr);
590       ierr = CeedRealloc(numactivein + size, &activein); CeedChkBackend(ierr);
591       for (CeedInt field=0; field<size; field++) {
592         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
593         CeedChkBackend(ierr);
594         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
595                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
596         CeedChkBackend(ierr);
597       }
598       numactivein += size;
599       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChkBackend(ierr);
600     }
601   }
602 
603   // Count number of active output fields
604   for (CeedInt i=0; i<numoutputfields; i++) {
605     // Get output vector
606     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec);
607     CeedChkBackend(ierr);
608     // Check if active output
609     if (vec == CEED_VECTOR_ACTIVE) {
610       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
611       CeedChkBackend(ierr);
612       numactiveout += size;
613     }
614   }
615 
616   // Check sizes
617   if (!numactivein || !numactiveout)
618     // LCOV_EXCL_START
619     return CeedError(ceed, CEED_ERROR_BACKEND,
620                      "Cannot assemble QFunction without active inputs "
621                      "and outputs");
622   // LCOV_EXCL_STOP
623 
624   // Setup lvec
625   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
626                           &lvec); CeedChkBackend(ierr);
627   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
628 
629   // Create output restriction
630   CeedInt strides[3] = {1, Q, numactivein *numactiveout*Q};
631   ierr = CeedElemRestrictionCreateStrided(ceed, numelements, Q,
632                                           numactivein*numactiveout,
633                                           numactivein*numactiveout*numelements*Q,
634                                           strides, rstr); CeedChkBackend(ierr);
635   // Create assembled vector
636   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
637                           assembled); CeedChkBackend(ierr);
638 
639   // Loop through elements
640   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
641     // Input basis apply
642     ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields,
643                                       numinputfields, blksize, NULL, true,
644                                       impl, request); CeedChkBackend(ierr);
645 
646     // Assemble QFunction
647     for (CeedInt in=0; in<numactivein; in++) {
648       // Set Inputs
649       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChkBackend(ierr);
650       if (numactivein > 1) {
651         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
652                                   0.0); CeedChkBackend(ierr);
653       }
654       // Set Outputs
655       for (CeedInt out=0; out<numoutputfields; out++) {
656         // Get output vector
657         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
658         CeedChkBackend(ierr);
659         // Check if active output
660         if (vec == CEED_VECTOR_ACTIVE) {
661           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
662                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
663           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
664           CeedChkBackend(ierr);
665           a += size*Q*blksize; // Advance the pointer by the size of the output
666         }
667       }
668       // Apply QFunction
669       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
670       CeedChkBackend(ierr);
671     }
672   }
673 
674   // Un-set output Qvecs to prevent accidental overwrite of Assembled
675   for (CeedInt out=0; out<numoutputfields; out++) {
676     // Get output vector
677     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
678     CeedChkBackend(ierr);
679     // Check if active output
680     if (vec == CEED_VECTOR_ACTIVE) {
681       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
682                          NULL); CeedChkBackend(ierr);
683     }
684   }
685 
686   // Restore input arrays
687   ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields,
688                                        opinputfields, impl);
689   CeedChkBackend(ierr);
690 
691   // Output blocked restriction
692   ierr = CeedVectorRestoreArray(lvec, &a); CeedChkBackend(ierr);
693   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
694   CeedElemRestriction blkrstr;
695   ierr = CeedElemRestrictionCreateBlockedStrided(ceed, numelements, Q, blksize,
696          numactivein*numactiveout, numactivein*numactiveout*numelements*Q,
697          strides, &blkrstr); CeedChkBackend(ierr);
698   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled,
699                                   request); CeedChkBackend(ierr);
700 
701   // Cleanup
702   for (CeedInt i=0; i<numactivein; i++) {
703     ierr = CeedVectorDestroy(&activein[i]); CeedChkBackend(ierr);
704   }
705   ierr = CeedFree(&activein); CeedChkBackend(ierr);
706   ierr = CeedVectorDestroy(&lvec); CeedChkBackend(ierr);
707   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChkBackend(ierr);
708 
709   return CEED_ERROR_SUCCESS;
710 }
711 
712 //------------------------------------------------------------------------------
713 // Operator Destroy
714 //------------------------------------------------------------------------------
715 static int CeedOperatorDestroy_Opt(CeedOperator op) {
716   int ierr;
717   CeedOperator_Opt *impl;
718   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
719 
720   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
721     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChkBackend(ierr);
722     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChkBackend(ierr);
723   }
724   ierr = CeedFree(&impl->blkrestr); CeedChkBackend(ierr);
725   ierr = CeedFree(&impl->evecs); CeedChkBackend(ierr);
726   ierr = CeedFree(&impl->edata); CeedChkBackend(ierr);
727   ierr = CeedFree(&impl->inputstate); CeedChkBackend(ierr);
728 
729   for (CeedInt i=0; i<impl->numein; i++) {
730     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChkBackend(ierr);
731     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChkBackend(ierr);
732   }
733   ierr = CeedFree(&impl->evecsin); CeedChkBackend(ierr);
734   ierr = CeedFree(&impl->qvecsin); CeedChkBackend(ierr);
735 
736   for (CeedInt i=0; i<impl->numeout; i++) {
737     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChkBackend(ierr);
738     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChkBackend(ierr);
739   }
740   ierr = CeedFree(&impl->evecsout); CeedChkBackend(ierr);
741   ierr = CeedFree(&impl->qvecsout); CeedChkBackend(ierr);
742 
743   ierr = CeedFree(&impl); CeedChkBackend(ierr);
744   return CEED_ERROR_SUCCESS;
745 }
746 
747 //------------------------------------------------------------------------------
748 // Operator Create
749 //------------------------------------------------------------------------------
750 int CeedOperatorCreate_Opt(CeedOperator op) {
751   int ierr;
752   Ceed ceed;
753   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
754   Ceed_Opt *ceedimpl;
755   ierr = CeedGetData(ceed, &ceedimpl); CeedChkBackend(ierr);
756   CeedInt blksize = ceedimpl->blksize;
757   CeedOperator_Opt *impl;
758 
759   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
760   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
761 
762   if (blksize != 1 && blksize != 8)
763     // LCOV_EXCL_START
764     return CeedError(ceed, CEED_ERROR_BACKEND,
765                      "Opt backend cannot use blocksize: %d", blksize);
766   // LCOV_EXCL_STOP
767 
768   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
769                                 CeedOperatorLinearAssembleQFunction_Opt);
770   CeedChkBackend(ierr);
771   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
772                                 CeedOperatorApplyAdd_Opt); CeedChkBackend(ierr);
773   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
774                                 CeedOperatorDestroy_Opt); CeedChkBackend(ierr);
775   return CEED_ERROR_SUCCESS;
776 }
777 //------------------------------------------------------------------------------
778