xref: /libCEED/backends/opt/ceed-opt-operator.c (revision 5afe071892733ea0734472b79463b1948e1538d8)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <string.h>
18 #include "ceed-opt.h"
19 #include "../ref/ceed-ref.h"
20 
21 static int CeedOperatorDestroy_Opt(CeedOperator op) {
22   int ierr;
23   CeedOperator_Opt *impl;
24   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
25 
26   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
27     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
28     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
29   }
30   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
31   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
32   ierr = CeedFree(&impl->edata); CeedChk(ierr);
33   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
34 
35   for (CeedInt i=0; i<impl->numein; i++) {
36     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
37     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
38   }
39   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
40   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
41 
42   for (CeedInt i=0; i<impl->numeout; i++) {
43     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
44     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
45   }
46   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
47   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
48 
49   ierr = CeedFree(&impl); CeedChk(ierr);
50   return 0;
51 }
52 
53 /*
54   Setup infields or outfields
55  */
56 static int CeedOperatorSetupFields_Opt(CeedQFunction qf, CeedOperator op,
57                                        bool inOrOut, const CeedInt blksize,
58                                        CeedElemRestriction *blkrestr,
59                                        CeedVector *fullevecs, CeedVector *evecs,
60                                        CeedVector *qvecs, CeedInt starte,
61                                        CeedInt numfields, CeedInt Q) {
62   CeedInt dim, ierr, ncomp, size, P;
63   Ceed ceed;
64   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
65   CeedBasis basis;
66   CeedElemRestriction r;
67   CeedOperatorField *opfields;
68   CeedQFunctionField *qffields;
69   if (inOrOut) {
70     ierr = CeedOperatorGetFields(op, NULL, &opfields);
71     CeedChk(ierr);
72     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
73     CeedChk(ierr);
74   } else {
75     ierr = CeedOperatorGetFields(op, &opfields, NULL);
76     CeedChk(ierr);
77     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
78     CeedChk(ierr);
79   }
80 
81   // Loop over fields
82   for (CeedInt i=0; i<numfields; i++) {
83     CeedEvalMode emode;
84     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
85 
86     if (emode != CEED_EVAL_WEIGHT) {
87       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
88       CeedChk(ierr);
89       CeedElemRestriction_Ref *data;
90       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
91       Ceed ceed;
92       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
93       CeedInt nelem, elemsize, nnodes;
94       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
95       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
96       ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
97       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
98       ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
99                                               blksize, nnodes, ncomp,
100                                               CEED_MEM_HOST, CEED_COPY_VALUES,
101                                               data->indices, &blkrestr[i+starte]);
102       CeedChk(ierr);
103       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
104                                              &fullevecs[i+starte]);
105       CeedChk(ierr);
106     }
107 
108     switch(emode) {
109     case CEED_EVAL_NONE:
110       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
111       ierr = CeedVectorCreate(ceed, Q*size*blksize, &evecs[i]); CeedChk(ierr);
112       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
113       break;
114     case CEED_EVAL_INTERP:
115       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
116       ierr = CeedElemRestrictionGetElementSize(r, &P);
117       CeedChk(ierr);
118       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
119       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
120       break;
121     case CEED_EVAL_GRAD:
122       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
123       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
124       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
125       ierr = CeedElemRestrictionGetElementSize(r, &P);
126       CeedChk(ierr);
127       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
128       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
129       break;
130     case CEED_EVAL_WEIGHT: // Only on input fields
131       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
132       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
133       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
134                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]);
135       CeedChk(ierr);
136 
137       break;
138     case CEED_EVAL_DIV:
139       break; // Not implimented
140     case CEED_EVAL_CURL:
141       break; // Not implimented
142     }
143   }
144   return 0;
145 }
146 
147 /*
148   CeedOperator needs to connect all the named fields (be they active or passive)
149   to the named inputs and outputs of its CeedQFunction.
150  */
151 static int CeedOperatorSetup_Opt(CeedOperator op) {
152   int ierr;
153   bool setupdone;
154   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
155   if (setupdone) return 0;
156   Ceed ceed;
157   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
158   Ceed_Opt *ceedimpl;
159   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
160   const CeedInt blksize = ceedimpl->blksize;
161   CeedOperator_Opt *impl;
162   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
163   CeedQFunction qf;
164   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
165   CeedInt Q, numinputfields, numoutputfields;
166   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
167   ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr);
168   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
169   CeedChk(ierr);
170   CeedOperatorField *opinputfields, *opoutputfields;
171   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
172   CeedChk(ierr);
173   CeedQFunctionField *qfinputfields, *qfoutputfields;
174   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
175   CeedChk(ierr);
176 
177   // Allocate
178   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
179   CeedChk(ierr);
180   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
181   CeedChk(ierr);
182   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
183   CeedChk(ierr);
184 
185   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
186   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
187   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
188   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
189   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
190 
191   impl->numein = numinputfields; impl->numeout = numoutputfields;
192 
193   // Set up infield and outfield pointer arrays
194   // Infields
195   ierr = CeedOperatorSetupFields_Opt(qf, op, 0, blksize, impl->blkrestr,
196                                      impl->evecs, impl->evecsin,
197                                      impl->qvecsin, 0,
198                                      numinputfields, Q);
199   CeedChk(ierr);
200   // Outfields
201   ierr = CeedOperatorSetupFields_Opt(qf, op, 1, blksize, impl->blkrestr,
202                                      impl->evecs, impl->evecsout,
203                                      impl->qvecsout, numinputfields,
204                                      numoutputfields, Q);
205   CeedChk(ierr);
206 
207   // Identity QFunctions
208   if (impl->identityqf) {
209     CeedEvalMode inmode, outmode;
210     CeedQFunctionField *infields, *outfields;
211     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
212 
213     for (CeedInt i=0; i<numinputfields; i++) {
214       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
215       CeedChk(ierr);
216       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
217       CeedChk(ierr);
218 
219       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
220       impl->qvecsout[i] = impl->qvecsin[i];
221       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr);
222     }
223   }
224 
225   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
226 
227   return 0;
228 }
229 
230 // Setup Input fields
231 static inline int CeedOperatorSetupInputs_Opt(CeedInt numinputfields,
232     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
233     CeedVector invec, CeedOperator_Opt *impl, CeedRequest *request) {
234   CeedInt ierr;
235   CeedEvalMode emode;
236   CeedVector vec;
237   CeedTransposeMode lmode;
238   uint64_t state;
239 
240   for (CeedInt i=0; i<numinputfields; i++) {
241     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
242     CeedChk(ierr);
243     if (emode == CEED_EVAL_WEIGHT) { // Skip
244     } else {
245       // Get input vector
246       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
247       if (vec != CEED_VECTOR_ACTIVE) {
248         // Restrict
249         ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
250         if (state != impl->inputstate[i]) {
251           ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
252           CeedChk(ierr);
253           ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
254                                           lmode, vec, impl->evecs[i], request);
255           CeedChk(ierr);
256           impl->inputstate[i] = state;
257         }
258       } else {
259         // Set Qvec for CEED_EVAL_NONE
260         if (emode == CEED_EVAL_NONE) {
261           ierr = CeedVectorGetArray(impl->evecsin[i], CEED_MEM_HOST,
262                                     &impl->edata[i]); CeedChk(ierr);
263           ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
264                                     CEED_USE_POINTER,
265                                     impl->edata[i]); CeedChk(ierr);
266           ierr = CeedVectorRestoreArray(impl->evecsin[i],
267                                         &impl->edata[i]); CeedChk(ierr);
268         }
269       }
270       // Get evec
271       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
272                                     (const CeedScalar **) &impl->edata[i]);
273       CeedChk(ierr);
274     }
275   }
276   return 0;
277 }
278 
279 // Input basis action
280 static inline int CeedOperatorInputBasis_Opt(CeedInt e, CeedInt Q,
281     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
282     CeedInt numinputfields, CeedInt blksize, CeedVector invec, bool skipactive,
283     CeedOperator_Opt *impl, CeedRequest *request) {
284   CeedInt ierr;
285   CeedInt dim, elemsize, size;
286   CeedElemRestriction Erestrict;
287   CeedEvalMode emode;
288   CeedBasis basis;
289   CeedVector vec;
290   CeedTransposeMode lmode;
291 
292   for (CeedInt i=0; i<numinputfields; i++) {
293     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
294     // Skip active input
295     if (skipactive) {
296       if (vec == CEED_VECTOR_ACTIVE)
297         continue;
298     }
299 
300     CeedInt activein = 0;
301     // Get elemsize, emode, size
302     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
303     CeedChk(ierr);
304     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
305     CeedChk(ierr);
306     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
307     CeedChk(ierr);
308     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
309     // Restrict block active input
310     if (vec == CEED_VECTOR_ACTIVE) {
311       ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
312       CeedChk(ierr);
313       ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i], e/blksize,
314                                            CEED_NOTRANSPOSE, lmode, invec,
315                                            impl->evecsin[i], request);
316       CeedChk(ierr);
317       activein = 1;
318     }
319     // Basis action
320     switch(emode) {
321     case CEED_EVAL_NONE:
322       if (!activein) {
323         ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
324                                   CEED_USE_POINTER,
325                                   &impl->edata[i][e*Q*size]); CeedChk(ierr);
326       }
327       break;
328     case CEED_EVAL_INTERP:
329       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
330       CeedChk(ierr);
331       if (!activein) {
332         ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
333                                   CEED_USE_POINTER,
334                                   &impl->edata[i][e*elemsize*size]);
335         CeedChk(ierr);
336       }
337       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
338                             CEED_EVAL_INTERP, impl->evecsin[i],
339                             impl->qvecsin[i]); CeedChk(ierr);
340       break;
341     case CEED_EVAL_GRAD:
342       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
343       CeedChk(ierr);
344       if (!activein) {
345         ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
346         ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
347                                   CEED_USE_POINTER,
348                                   &impl->edata[i][e*elemsize*size/dim]);
349         CeedChk(ierr);
350       }
351       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
352                             CEED_EVAL_GRAD, impl->evecsin[i],
353                             impl->qvecsin[i]); CeedChk(ierr);
354       break;
355     case CEED_EVAL_WEIGHT:
356       break;  // No action
357     case CEED_EVAL_DIV:
358     case CEED_EVAL_CURL: {
359       // LCOV_EXCL_START
360       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
361       CeedChk(ierr);
362       Ceed ceed;
363       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
364       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
365       // LCOV_EXCL_STOP
366       break; // Not implemented
367     }
368     }
369   }
370   return 0;
371 }
372 
373 // Output basis action
374 static inline int CeedOperatorOutputBasis_Opt(CeedInt e, CeedInt Q,
375     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
376     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
377     CeedOperator op, CeedVector outvec, CeedOperator_Opt *impl,
378     CeedRequest *request) {
379   CeedInt ierr;
380   CeedElemRestriction Erestrict;
381   CeedEvalMode emode;
382   CeedBasis basis;
383   CeedVector vec;
384   CeedTransposeMode lmode;
385 
386   for (CeedInt i=0; i<numoutputfields; i++) {
387     // Get elemsize, emode, size
388     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
389     CeedChk(ierr);
390     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
391     CeedChk(ierr);
392     // Basis action
393     switch(emode) {
394     case CEED_EVAL_NONE:
395       break; // No action
396     case CEED_EVAL_INTERP:
397       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
398       CeedChk(ierr);
399       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
400                             CEED_EVAL_INTERP, impl->qvecsout[i],
401                             impl->evecsout[i]); CeedChk(ierr);
402       break;
403     case CEED_EVAL_GRAD:
404       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
405       CeedChk(ierr);
406       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
407                             CEED_EVAL_GRAD, impl->qvecsout[i],
408                             impl->evecsout[i]); CeedChk(ierr);
409       break;
410     case CEED_EVAL_WEIGHT: {
411       // LCOV_EXCL_START
412       Ceed ceed;
413       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
414       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
415                        "evaluation mode");
416       // LCOV_EXCL_STOP
417       break; // Should not occur
418     }
419     case CEED_EVAL_DIV:
420     case CEED_EVAL_CURL: {
421       // LCOV_EXCL_START
422       Ceed ceed;
423       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
424       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
425       // LCOV_EXCL_STOP
426       break; // Not implemented
427     }
428     }
429     // Restrict output block
430     // Get output vector
431     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
432     if (vec == CEED_VECTOR_ACTIVE)
433       vec = outvec;
434     // Restrict
435     ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode);
436     CeedChk(ierr);
437     ierr = CeedElemRestrictionApplyBlock(impl->blkrestr[i+impl->numein],
438                                          e/blksize, CEED_TRANSPOSE,
439                                          lmode, impl->evecsout[i],
440                                          vec, request); CeedChk(ierr);
441   }
442   return 0;
443 }
444 
445 // Restore Inputs
446 static inline int CeedOperatorRestoreInputs_Opt(CeedInt numinputfields,
447     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
448     bool skipactive, CeedOperator_Opt *impl) {
449   CeedInt ierr;
450   CeedEvalMode emode;
451 
452   for (CeedInt i=0; i<numinputfields; i++) {
453     // Skip active inputs
454     if (skipactive) {
455       CeedVector vec;
456       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
457       if (vec == CEED_VECTOR_ACTIVE)
458         continue;
459     }
460     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
461     CeedChk(ierr);
462     if (emode == CEED_EVAL_WEIGHT) { // Skip
463     } else {
464       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
465                                         (const CeedScalar **) &impl->edata[i]);
466       CeedChk(ierr);
467     }
468   }
469   return 0;
470 }
471 
472 // Apply Ceed Operator
473 static int CeedOperatorApply_Opt(CeedOperator op, CeedVector invec,
474                                  CeedVector outvec, CeedRequest *request) {
475   int ierr;
476   Ceed ceed;
477   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
478   Ceed_Opt *ceedimpl;
479   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
480   CeedInt blksize = ceedimpl->blksize;
481   CeedOperator_Opt *impl;
482   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
483   CeedInt Q, numinputfields, numoutputfields, numelements;
484   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
485   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
486   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
487   CeedQFunction qf;
488   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
489   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
490   CeedChk(ierr);
491   CeedOperatorField *opinputfields, *opoutputfields;
492   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
493   CeedChk(ierr);
494   CeedQFunctionField *qfinputfields, *qfoutputfields;
495   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
496   CeedChk(ierr);
497   CeedEvalMode emode;
498 
499   // Setup
500   ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr);
501 
502   // Input Evecs and Restriction
503   ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields,
504                                      opinputfields, invec, impl, request);
505   CeedChk(ierr);
506 
507   // Output Lvecs, Evecs, and Qvecs
508   for (CeedInt i=0; i<numoutputfields; i++) {
509     // Set Qvec if needed
510     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
511     CeedChk(ierr);
512     if (emode == CEED_EVAL_NONE) {
513       // Set qvec to single block evec
514       ierr = CeedVectorGetArray(impl->evecsout[i], CEED_MEM_HOST,
515                                 &impl->edata[i + numinputfields]);
516       CeedChk(ierr);
517       ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
518                                 CEED_USE_POINTER,
519                                 impl->edata[i + numinputfields]); CeedChk(ierr);
520       ierr = CeedVectorRestoreArray(impl->evecsout[i],
521                                     &impl->edata[i + numinputfields]);
522       CeedChk(ierr);
523     }
524   }
525 
526   // Loop through elements
527   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
528     // Input basis apply
529     ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields,
530                                       numinputfields, blksize, invec, false,
531                                       impl, request); CeedChk(ierr);
532 
533     // Q function
534     if (!impl->identityqf) {
535       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
536       CeedChk(ierr);
537     }
538 
539     // Output basis apply and restrict
540     ierr = CeedOperatorOutputBasis_Opt(e, Q, qfoutputfields, opoutputfields,
541                                        blksize, numinputfields, numoutputfields,
542                                        op, outvec, impl, request);
543     CeedChk(ierr);
544   }
545 
546   // Restore input arrays
547   ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields,
548                                        opinputfields, false, impl);
549   CeedChk(ierr);
550 
551   return 0;
552 }
553 
554 // Assemble Linear QFunction
555 static int CeedOperatorAssembleLinearQFunction_Opt(CeedOperator op,
556     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
557   int ierr;
558   Ceed ceed;
559   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
560   Ceed_Opt *ceedimpl;
561   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
562   const CeedInt blksize = ceedimpl->blksize;
563   CeedOperator_Opt *impl;
564   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
565   CeedInt Q, numinputfields, numoutputfields, numelements, size;
566   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
567   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
568   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
569   CeedQFunction qf;
570   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
571   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
572   CeedChk(ierr);
573   CeedOperatorField *opinputfields, *opoutputfields;
574   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
575   CeedChk(ierr);
576   CeedQFunctionField *qfinputfields, *qfoutputfields;
577   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
578   CeedChk(ierr);
579   CeedVector vec, lvec;
580   CeedInt numactivein = 0, numactiveout = 0;
581   CeedVector *activein = NULL;
582   CeedScalar *a, *tmp;
583 
584   // Setup
585   ierr = CeedOperatorSetup_Opt(op); CeedChk(ierr);
586 
587   // Check for identity
588   if (impl->identityqf)
589     // LCOV_EXCL_START
590     return CeedError(ceed, 1, "Assembling identity qfunctions not supported");
591   // LCOV_EXCL_STOP
592 
593   // Input Evecs and Restriction
594   ierr = CeedOperatorSetupInputs_Opt(numinputfields, qfinputfields,
595                                      opinputfields, NULL, impl, request);
596   CeedChk(ierr);
597 
598   // Count number of active input fields
599   for (CeedInt i=0; i<numinputfields; i++) {
600     // Get input vector
601     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
602     // Check if active input
603     if (vec == CEED_VECTOR_ACTIVE) {
604       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
605       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
606       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
607       CeedChk(ierr);
608       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
609       for (CeedInt field=0; field<size; field++) {
610         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
611         CeedChk(ierr);
612         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
613                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
614         CeedChk(ierr);
615       }
616       numactivein += size;
617       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
618     }
619   }
620 
621   // Count number of active output fields
622   for (CeedInt i=0; i<numoutputfields; i++) {
623     // Get output vector
624     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
625     // Check if active output
626     if (vec == CEED_VECTOR_ACTIVE) {
627       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
628       numactiveout += size;
629     }
630   }
631 
632   // Check sizes
633   if (!numactivein || !numactiveout)
634     // LCOV_EXCL_START
635     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
636                      "and outputs");
637   // LCOV_EXCL_STOP
638 
639   // Setup lvec
640   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
641                           &lvec); CeedChk(ierr);
642   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
643 
644   // Create output restriction
645   ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q,
646          numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr);
647   // Create assembled vector
648   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
649                           assembled); CeedChk(ierr);
650 
651   // Loop through elements
652   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
653     // Input basis apply
654     ierr = CeedOperatorInputBasis_Opt(e, Q, qfinputfields, opinputfields,
655                                       numinputfields, blksize, NULL, true,
656                                       impl, request); CeedChk(ierr);
657 
658     // Assemble QFunction
659     for (CeedInt in=0; in<numactivein; in++) {
660       // Set Inputs
661       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
662       if (numactivein > 1) {
663         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
664                                   0.0); CeedChk(ierr);
665       }
666       // Set Outputs
667       for (CeedInt out=0; out<numoutputfields; out++) {
668         // Get output vector
669         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
670         CeedChk(ierr);
671         // Check if active output
672         if (vec == CEED_VECTOR_ACTIVE) {
673           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
674                              CEED_USE_POINTER, a); CeedChk(ierr);
675           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
676           CeedChk(ierr);
677           a += size*Q*blksize; // Advance the pointer by the size of the output
678         }
679       }
680       // Apply QFunction
681       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
682       CeedChk(ierr);
683     }
684   }
685 
686   // Un-set output Qvecs to prevent accidental overwrite of Assembled
687   for (CeedInt out=0; out<numoutputfields; out++) {
688     // Get output vector
689     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
690     CeedChk(ierr);
691     // Check if active output
692     if (vec == CEED_VECTOR_ACTIVE) {
693       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
694                          NULL); CeedChk(ierr);
695     }
696   }
697 
698   // Restore input arrays
699   ierr = CeedOperatorRestoreInputs_Opt(numinputfields, qfinputfields,
700                                        opinputfields, true, impl);
701   CeedChk(ierr);
702 
703   // Output blocked restriction
704   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
705   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
706   CeedElemRestriction blkrstr;
707   ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize,
708                                           numelements*Q,
709                                           numactivein*numactiveout,
710                                           CEED_MEM_HOST, CEED_COPY_VALUES,
711                                           NULL, &blkrstr); CeedChk(ierr);
712   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE,
713                                   lvec, *assembled, request); CeedChk(ierr);
714 
715   // Cleanup
716   for (CeedInt i=0; i<numactivein; i++) {
717     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
718   }
719   ierr = CeedFree(&activein); CeedChk(ierr);
720   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
721   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
722 
723   return 0;
724 }
725 
726 int CeedOperatorCreate_Opt(CeedOperator op) {
727   int ierr;
728   Ceed ceed;
729   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
730   Ceed_Opt *ceedimpl;
731   ierr = CeedGetData(ceed, (void *)&ceedimpl); CeedChk(ierr);
732   CeedInt blksize = ceedimpl->blksize;
733   CeedOperator_Opt *impl;
734 
735   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
736   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
737 
738   if (blksize == 1 || blksize == 8) {
739     ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
740                                   CeedOperatorAssembleLinearQFunction_Opt);
741     CeedChk(ierr);
742     ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
743                                   CeedOperatorApply_Opt); CeedChk(ierr);
744   } else {
745     // LCOV_EXCL_START
746     return CeedError(ceed, 1, "Opt backend cannot use blocksize: %d", blksize);
747     // LCOV_EXCL_STOP
748   }
749 
750   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
751                                 CeedOperatorDestroy_Opt); CeedChk(ierr);
752   return 0;
753 }
754