xref: /libCEED/backends/opt/ceed-opt-operator.c (revision 16911fdad6ca4b1dd37f9d3206958ee664667dbe)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <string.h>
18 #include "ceed-opt.h"
19 #include "../ref/ceed-ref.h"
20 
21 static int CeedOperatorDestroy_Opt(CeedOperator op) {
22   int ierr;
23   CeedOperator_Opt *impl;
24   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
25 
26   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
27     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
28     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
29   }
30   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
31   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
32   ierr = CeedFree(&impl->edata); CeedChk(ierr);
33   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
34 
35   for (CeedInt i=0; i<impl->numein; i++) {
36     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
37     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
38   }
39   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
40   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
41 
42   for (CeedInt i=0; i<impl->numeout; i++) {
43     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
44     if (!impl->identityqf) {
45       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
46     }
47   }
48   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
49   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
50 
51   ierr = CeedFree(&impl); CeedChk(ierr);
52   return 0;
53 }
54 
55 /*
56   Setup infields or outfields
57  */
58 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
59                                        bool inOrOut, const CeedInt blksize,
60                                        CeedElemRestriction *blkrestr,
61                                        CeedVector *fullevecs, CeedVector *evecs,
62                                        CeedVector *qvecs, CeedInt starte,
63                                        CeedInt numfields, CeedInt Q) {
64   CeedInt dim, ierr, ncomp, size, P;
65   Ceed ceed;
66   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
67   CeedBasis basis;
68   CeedElemRestriction r;
69   CeedOperatorField *opfields;
70   CeedQFunctionField *qffields;
71   if (inOrOut) {
72     ierr = CeedOperatorGetFields(op, NULL, &opfields);
73     CeedChk(ierr);
74     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
75     CeedChk(ierr);
76   } else {
77     ierr = CeedOperatorGetFields(op, &opfields, NULL);
78     CeedChk(ierr);
79     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
80     CeedChk(ierr);
81   }
82 
83   // Loop over fields
84   for (CeedInt i=0; i<numfields; i++) {
85     CeedEvalMode emode;
86     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
87 
88     if (emode != CEED_EVAL_WEIGHT) {
89       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
90       CeedChk(ierr);
91       CeedElemRestriction_Ref *data;
92       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
93       Ceed ceed;
94       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
95       CeedInt nelem, elemsize, nnodes;
96       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
97       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
98       ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
99       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
100       ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
101                                               blksize, nnodes, ncomp,
102                                               CEED_MEM_HOST, CEED_COPY_VALUES,
103                                               data->indices, &blkrestr[i+starte]);
104       CeedChk(ierr);
105       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
106                                              &fullevecs[i+starte]);
107       CeedChk(ierr);
108     }
109 
110     switch(emode) {
111     case CEED_EVAL_NONE:
112       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
113       ierr = CeedVectorCreate(ceed, Q*size*blksize, &evecs[i]); CeedChk(ierr);
114       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
115       break;
116     case CEED_EVAL_INTERP:
117       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
118       ierr = CeedElemRestrictionGetElementSize(r, &P);
119       CeedChk(ierr);
120       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
121       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
122       break;
123     case CEED_EVAL_GRAD:
124       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
125       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
126       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
127       ierr = CeedElemRestrictionGetElementSize(r, &P);
128       CeedChk(ierr);
129       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
130       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
131       break;
132     case CEED_EVAL_WEIGHT: // Only on input fields
133       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
134       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
135       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
136                             CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChk(ierr);
137 
138       break;
139     case CEED_EVAL_DIV:
140       break; // Not implimented
141     case CEED_EVAL_CURL:
142       break; // Not implimented
143     }
144   }
145   return 0;
146 }
147 
148 /*
149   CeedOperator needs to connect all the named fields (be they active or passive)
150   to the named inputs and outputs of its CeedQFunction.
151  */
152 static int CeedOperatorSetup_Opt(CeedOperator op) {
153   int ierr;
154   bool setupdone;
155   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
156   if (setupdone) return 0;
157   Ceed ceed;
158   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
159   Ceed_Opt *ceedimpl;
160   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
161   const CeedInt blksize = ceedimpl->blksize;
162   CeedOperator_Opt *impl;
163   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
164   CeedQFunction qf;
165   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
166   CeedInt Q, numinputfields, numoutputfields;
167   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
168   ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr);
169   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
170   CeedChk(ierr);
171   CeedOperatorField *opinputfields, *opoutputfields;
172   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
173   CeedChk(ierr);
174   CeedQFunctionField *qfinputfields, *qfoutputfields;
175   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
176   CeedChk(ierr);
177 
178   // Allocate
179   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
180   CeedChk(ierr);
181   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
182   CeedChk(ierr);
183   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
184   CeedChk(ierr);
185 
186   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
187   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
188   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
189   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
190   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
191 
192   impl->numein = numinputfields; impl->numeout = numoutputfields;
193 
194   // Set up infield and outfield pointer arrays
195   // Infields
196   ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr,
197                                      impl->evecs, impl->evecsin,
198                                      impl->qvecsin, 0,
199                                      numinputfields, Q);
200   CeedChk(ierr);
201   // Outfields
202   ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr,
203                                      impl->evecs, impl->evecsout,
204                                      impl->qvecsout, numinputfields,
205                                      numoutputfields, Q);
206   CeedChk(ierr);
207 
208   // Identity QFunctions
209   if (impl->identityqf) {
210     CeedEvalMode inmode, outmode;
211     CeedQFunctionField *infields, *outfields;
212     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
213 
214     for (CeedInt i=0; i<numinputfields; i++) {
215       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
216       CeedChk(ierr);
217       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
218       CeedChk(ierr);
219 
220       if (inmode == CEED_EVAL_NONE && outmode == CEED_EVAL_NONE)
221         // LCOV_EXCL_START
222         return CeedError(ceed, 1, "CEED_EVAL_NONE for a matching input and "
223                          "output does not make sense with identity QFunction");
224        // LCOV_EXCL_STOP
225 
226       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
227       impl->qvecsout[i] = impl->qvecsin[i];
228     }
229   }
230 
231   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
232 
233   return 0;
234 }
235 
236 // Setup Input fields
237 static inline int CeedOperatorSetupInputs_Opt(CeedInt numinputfields,
238     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
239     CeedVector invec, CeedOperator_Opt *impl, CeedRequest *request) {
240   CeedInt ierr;
241   CeedEvalMode emode;
242   CeedVector vec;
243   CeedTransposeMode lmode;
244   uint64_t state;
245 
246   for (CeedInt i=0; i<numinputfields; i++) {
247     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
248     CeedChk(ierr);
249     if (emode == CEED_EVAL_WEIGHT) { // Skip
250     } else {
251       // Get input vector
252       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
253       if (vec != CEED_VECTOR_ACTIVE) {
254         // Restrict
255         ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
256         if (state != impl->inputstate[i]) {
257           ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
258           CeedChk(ierr);
259           ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
260                                           lmode, vec, impl->evecs[i], request);
261           CeedChk(ierr);
262           impl->inputstate[i] = state;
263         }
264       } else {
265         // Set Qvec for CEED_EVAL_NONE
266         if (emode == CEED_EVAL_NONE) {
267           ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST,
268                                     &impl->edata[i]); CeedChk(ierr);
269           ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
270                                     CEED_USE_POINTER,
271                                     impl->edata[i]); CeedChk(ierr);
272           ierr = CeedVectorRestoreArray(impl->evecsin[i],
273                                         &impl->edata[i]); CeedChk(ierr);
274         }
275       }
276       // Get evec
277       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
278                                     (const CeedScalar **) &impl->edata[i]);
279       CeedChk(ierr);
280     }
281   }
282   return 0;
283 }
284 
285 // Input basis action
286 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
287     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
288     CeedInt numinputfields, CeedInt blksize, CeedVector invec, bool skipactive,
289     CeedOperator_Opt *impl, CeedRequest *request) {
290   CeedInt ierr;
291   CeedInt dim, elemsize, size;
292   CeedElemRestriction Erestrict;
293   CeedEvalMode emode;
294   CeedBasis basis;
295   CeedVector vec;
296   CeedTransposeMode lmode;
297 
298   for (CeedInt i=0; i<numinputfields; i++) {
299     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
300     // Skip active input
301     if (skipactive) {
302       if (vec == CEED_VECTOR_ACTIVE)
303         continue;
304     }
305 
306     CeedInt activein = 0;
307     // Get elemsize, emode, size
308     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
309     CeedChk(ierr);
310     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
311     CeedChk(ierr);
312     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
313     CeedChk(ierr);
314     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
315     // Restrict block active input
316     if (vec == CEED_VECTOR_ACTIVE) {
317       ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
318       CeedChk(ierr);
319       ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize,
320                                            CEED_NOTRANSPOSE, lmode, invec,
321                                            impl->evecsin[i], request);
322       CeedChk(ierr);
323       activein = 1;
324     }
325     // Basis action
326     switch(emode) {
327     case CEED_EVAL_NONE:
328       if (!activein) {
329         ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
330                                   CEED_USE_POINTER,
331                                   &impl->edata[i][e*Q*size]); CeedChk(ierr);
332       }
333       break;
334     case CEED_EVAL_INTERP:
335       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
336       CeedChk(ierr);
337       if (!activein) {
338         ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
339                                   CEED_USE_POINTER,
340                                   &impl->edata[i][e*elemsize*size]);
341         CeedChk(ierr);
342       }
343       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
344                             CEED_EVAL_INTERP, impl->evecsin[i],
345                             impl->qvecsin[i]); CeedChk(ierr);
346       break;
347     case CEED_EVAL_GRAD:
348       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
349       CeedChk(ierr);
350       if (!activein) {
351         ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
352         ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
353                                   CEED_USE_POINTER,
354                                   &impl->edata[i][e*elemsize*size/dim]);
355         CeedChk(ierr);
356       }
357       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
358                             CEED_EVAL_GRAD, impl->evecsin[i],
359                             impl->qvecsin[i]); CeedChk(ierr);
360       break;
361     case CEED_EVAL_WEIGHT:
362       break;  // No action
363     case CEED_EVAL_DIV:
364     case CEED_EVAL_CURL: {
365       // LCOV_EXCL_START
366       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
367       CeedChk(ierr);
368       Ceed ceed;
369       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
370       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
371       // LCOV_EXCL_STOP
372       break; // Not implemented
373     }
374     }
375   }
376   return 0;
377 }
378 
379 // Output basis action
380 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
381     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
382     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
383     CeedOperator op, CeedVector outvec, CeedOperator_Opt *impl,
384     CeedRequest *request) {
385   CeedInt ierr;
386   CeedElemRestriction Erestrict;
387   CeedEvalMode emode;
388   CeedBasis basis;
389   CeedVector vec;
390   CeedTransposeMode lmode;
391 
392   for (CeedInt i=0; i<numoutputfields; i++) {
393     // Get elemsize, emode, size
394     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
395     CeedChk(ierr);
396     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
397     CeedChk(ierr);
398     // Basis action
399     switch(emode) {
400     case CEED_EVAL_NONE:
401       break; // No action
402     case CEED_EVAL_INTERP:
403       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
404       CeedChk(ierr);
405       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
406                             CEED_EVAL_INTERP, impl->qvecsout[i],
407                             impl->evecsout[i]); CeedChk(ierr);
408       break;
409     case CEED_EVAL_GRAD:
410       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
411       CeedChk(ierr);
412       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
413                             CEED_EVAL_GRAD, impl->qvecsout[i],
414                             impl->evecsout[i]); CeedChk(ierr);
415       break;
416     case CEED_EVAL_WEIGHT: {
417       // LCOV_EXCL_START
418       Ceed ceed;
419       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
420       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
421                        "evaluation mode");
422       // LCOV_EXCL_STOP
423       break; // Should not occur
424     }
425     case CEED_EVAL_DIV:
426     case CEED_EVAL_CURL: {
427       // LCOV_EXCL_START
428       Ceed ceed;
429       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
430       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
431       // LCOV_EXCL_STOP
432       break; // Not implemented
433     }
434     }
435     // Restrict output block
436     // Get output vector
437     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
438     if (vec == CEED_VECTOR_ACTIVE)
439       vec = outvec;
440     // Restrict
441     ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode);
442     CeedChk(ierr);
443     ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein],
444                                          e/blksize, CEED_TRANSPOSE,
445                                          lmode, impl->evecsout[i],
446                                          vec, request); CeedChk(ierr);
447   }
448   return 0;
449 }
450 
451 // Restore Inputs
452 static inline int CeedOperatorRestoreInputs_Opt(CeedInt numinputfields,
453     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
454     bool skipactive, CeedOperator_Opt *impl) {
455   CeedInt ierr;
456   CeedEvalMode emode;
457 
458   for (CeedInt i=0; i<numinputfields; i++) {
459     // Skip active inputs
460     if (skipactive) {
461       CeedVector vec;
462       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
463       if (vec == CEED_VECTOR_ACTIVE)
464         continue;
465     }
466     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
467     CeedChk(ierr);
468     if (emode == CEED_EVAL_WEIGHT) { // Skip
469     } else {
470       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
471                                         (const CeedScalar **) &impl->edata[i]);
472       CeedChk(ierr);
473     }
474   }
475   return 0;
476 }
477 
478 // Apply Ceed Operator
479 static int CeedOperatorApply_Opt(CeedOperator op, CeedVector invec,
480                                  CeedVector outvec, CeedRequest *request) {
481   int ierr;
482   Ceed ceed;
483   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
484   Ceed_Opt *ceedimpl;
485   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
486   CeedInt blksize = ceedimpl->blksize;
487   CeedOperator_Opt *impl;
488   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
489   CeedInt Q, numinputfields, numoutputfields, numelements;
490   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
491   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
492   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
493   CeedQFunction qf;
494   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
495   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
496   CeedChk(ierr);
497   CeedOperatorField *opinputfields, *opoutputfields;
498   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
499   CeedChk(ierr);
500   CeedQFunctionField *qfinputfields, *qfoutputfields;
501   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
502   CeedChk(ierr);
503   CeedEvalMode emode;
504   CeedVector vec;
505 
506   // Setup
507   ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr);
508 
509   // Input Evecs and Restriction
510   ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields,
511                                      opinputfields, invec, impl, request);
512   CeedChk(ierr);
513 
514   // Output Lvecs, Evecs, and Qvecs
515   for (CeedInt i=0; i<numoutputfields; i++) {
516     // Zero Lvecs
517     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
518     if (vec == CEED_VECTOR_ACTIVE) {
519       if (!impl->add) {
520         vec = outvec;
521         ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
522       }
523     } else {
524       ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
525     }
526     // Set Qvec if needed
527     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
528     CeedChk(ierr);
529     if (emode == CEED_EVAL_NONE) {
530       // Set qvec to single block evec
531       ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST,
532                                 &impl->edata[i + numinputfields]);
533       CeedChk(ierr);
534       ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
535                                 CEED_USE_POINTER,
536                                 impl->edata[i + numinputfields]); CeedChk(ierr);
537       ierr = CeedVectorRestoreArray(impl->evecsout[i],
538                                     &impl->edata[i + numinputfields]);
539       CeedChk(ierr);
540     }
541   }
542   impl->add = false;
543 
544   // Loop through elements
545   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
546     // Input basis apply
547     ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields,
548                                       numinputfields, blksize, invec, false,
549                                       impl, request); CeedChk(ierr);
550 
551     // Q function
552     if (!impl->identityqf) {
553       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
554       CeedChk(ierr);
555    }
556 
557     // Output basis apply and restrict
558     ierr = CeedOperatorOutputBasis_Opt(e, Q, qfoutputfields, opoutputfields,
559                                        blksize, numinputfields, numoutputfields,
560                                        op, outvec, impl, request);
561     CeedChk(ierr);
562   }
563 
564   // Restore input arrays
565   ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields,
566                                        opinputfields, false, impl);
567   CeedChk(ierr);
568 
569   return 0;
570 }
571 
572 // Assemble Linear QFunction
573 static int CeedOperatorAssembleLinearQFunction_Opt(CeedOperator op,
574     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
575   int ierr;
576   Ceed ceed;
577   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
578   Ceed_Opt *ceedimpl;
579   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
580   const CeedInt blksize = ceedimpl->blksize;
581   CeedOperator_Opt *impl;
582   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
583   CeedInt Q, numinputfields, numoutputfields, numelements, size;
584   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
585   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
586   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
587   CeedQFunction qf;
588   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
589   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
590   CeedChk(ierr);
591   CeedOperatorField *opinputfields, *opoutputfields;
592   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
593   CeedChk(ierr);
594   CeedQFunctionField *qfinputfields, *qfoutputfields;
595   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
596   CeedChk(ierr);
597   CeedVector vec, lvec;
598   CeedInt numactivein = 0, numactiveout = 0;
599   CeedVector *activein = NULL;
600   CeedScalar *a, *tmp;
601 
602   // Setup
603   ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr);
604 
605   // Check for identity
606   if (impl->identityqf)
607    // LCOV_EXCL_START
608     return CeedError(ceed, 1, "Assembling identity qfunction does not make sense");
609   // LCOV_EXCL_STOP
610 
611   // Input Evecs and Restriction
612   ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields,
613                                      opinputfields, NULL, impl, request);
614   CeedChk(ierr);
615 
616   // Count number of active input fields
617   for (CeedInt i=0; i<numinputfields; i++) {
618     // Get input vector
619     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
620     // Check if active input
621     if (vec == CEED_VECTOR_ACTIVE) {
622       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
623       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
624       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
625       CeedChk(ierr);
626       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
627       for (CeedInt field=0; field<size; field++) {
628         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
629         CeedChk(ierr);
630         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
631                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
632         CeedChk(ierr);
633       }
634       numactivein += size;
635       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
636     }
637   }
638 
639   // Count number of active output fields
640   for (CeedInt i=0; i<numoutputfields; i++) {
641     // Get output vector
642     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
643     // Check if active output
644     if (vec == CEED_VECTOR_ACTIVE) {
645       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
646       numactiveout += size;
647     }
648   }
649 
650   // Check sizes
651   if (!numactivein || !numactiveout)
652     // LCOV_EXCL_START
653     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
654                      "and outputs");
655   // LCOV_EXCL_STOP
656 
657   // Setup lvec
658   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
659                           &lvec); CeedChk(ierr);
660   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
661 
662   // Create output restriction
663   ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q,
664          numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr);
665   // Create assembled vector
666   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
667                           assembled); CeedChk(ierr);
668 
669   // Loop through elements
670   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
671     // Input basis apply
672     ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields,
673                                       numinputfields, blksize, NULL, true,
674                                       impl, request); CeedChk(ierr);
675 
676     // Assemble QFunction
677     for (CeedInt in=0; in<numactivein; in++) {
678       // Set Inputs
679       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
680       if (numactivein > 1) {
681         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
682                                   0.0); CeedChk(ierr);
683       }
684       // Set Outputs
685       for (CeedInt out=0; out<numoutputfields; out++) {
686         // Get output vector
687         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
688         CeedChk(ierr);
689         // Check if active output
690         if (vec == CEED_VECTOR_ACTIVE) {
691           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
692                              CEED_USE_POINTER, a); CeedChk(ierr);
693           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
694           CeedChk(ierr);
695           a += size*Q*blksize; // Advance the pointer by the size of the output
696         }
697       }
698       // Apply QFunction
699       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
700       CeedChk(ierr);
701     }
702   }
703 
704   // Un-set output Qvecs to prevent accidental overwrite of Assembled
705   for (CeedInt out=0; out<numoutputfields; out++) {
706     // Get output vector
707     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
708     CeedChk(ierr);
709     // Check if active output
710     if (vec == CEED_VECTOR_ACTIVE) {
711       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
712                          NULL); CeedChk(ierr);
713     }
714   }
715 
716   // Restore input arrays
717   ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields,
718                                        opinputfields, true, impl);
719   CeedChk(ierr);
720 
721   // Output blocked restriction
722   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
723   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
724   CeedElemRestriction blkrstr;
725   ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize,
726                                           numelements*Q,
727                                           numactivein*numactiveout,
728                                           CEED_MEM_HOST, CEED_COPY_VALUES,
729                                           NULL, &blkrstr); CeedChk(ierr);
730   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE,
731                                   lvec, *assembled, request); CeedChk(ierr);
732 
733   // Cleanup
734   for (CeedInt i=0; i<numactivein; i++) {
735     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
736   }
737   ierr = CeedFree(&activein); CeedChk(ierr);
738   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
739   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
740 
741   return 0;
742 }
743 
744 int CeedOperatorCreate_Opt(CeedOperator op) {
745   int ierr;
746   Ceed ceed;
747   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
748   Ceed_Opt *ceedimpl;
749   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
750   CeedInt blksize = ceedimpl->blksize;
751   CeedOperator_Opt *impl;
752 
753   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
754   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
755 
756   if (blksize == 1 || blksize == 8) {
757     ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
758                                   CeedOperatorAssembleLinearQFunction_Opt);
759     CeedChk(ierr);
760     ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply",
761                                   CeedOperatorApply_Opt); CeedChk(ierr);
762   } else {
763     // LCOV_EXCL_START
764     return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize);
765     // LCOV_EXCL_STOP
766   }
767 
768   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
769                                 CeedOperatorDestroy_Opt); CeedChk(ierr);
770   return 0;
771 }
772