xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision 55e4cc5bfc35b2c27e98784af386df9728efb8f2)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-blocked.h"
18 #include "../ref/ceed-ref.h"
19 
20 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
21   int ierr;
22   CeedOperator_Blocked *impl;
23   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
24 
25   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
26     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
27     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
28   }
29   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
30   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
31   ierr = CeedFree(&impl->edata); CeedChk(ierr);
32   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
33 
34   for (CeedInt i=0; i<impl->numein; i++) {
35     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
36     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
37   }
38   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
39   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
40 
41   for (CeedInt i=0; i<impl->numeout; i++) {
42     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
43     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
44   }
45   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
46   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
47 
48   ierr = CeedFree(&impl); CeedChk(ierr);
49   return 0;
50 }
51 
52 /*
53   Setup infields or outfields
54  */
55 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
56     CeedOperator op, bool inOrOut,
57     CeedElemRestriction *blkrestr,
58     CeedVector *fullevecs, CeedVector *evecs,
59     CeedVector *qvecs, CeedInt starte,
60     CeedInt numfields, CeedInt Q) {
61   CeedInt dim, ierr, ncomp, size, P;
62   Ceed ceed;
63   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
64   CeedBasis basis;
65   CeedElemRestriction r;
66   CeedOperatorField *opfields;
67   CeedQFunctionField *qffields;
68   if (inOrOut) {
69     ierr = CeedOperatorGetFields(op, NULL, &opfields);
70     CeedChk(ierr);
71     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
72     CeedChk(ierr);
73   } else {
74     ierr = CeedOperatorGetFields(op, &opfields, NULL);
75     CeedChk(ierr);
76     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
77     CeedChk(ierr);
78   }
79   const CeedInt blksize = 8;
80 
81   // Loop over fields
82   for (CeedInt i=0; i<numfields; i++) {
83     CeedEvalMode emode;
84     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
85 
86     if (emode != CEED_EVAL_WEIGHT) {
87       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
88       CeedChk(ierr);
89       CeedElemRestriction_Ref *data;
90       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
91       Ceed ceed;
92       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
93       CeedInt nelem, elemsize, nnodes;
94       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
95       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
96       ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
97       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
98       ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
99                                               blksize, nnodes, ncomp,
100                                               CEED_MEM_HOST, CEED_COPY_VALUES,
101                                               data->indices, &blkrestr[i+starte]);
102       CeedChk(ierr);
103       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
104                                              &fullevecs[i+starte]);
105       CeedChk(ierr);
106     }
107 
108     switch(emode) {
109     case CEED_EVAL_NONE:
110       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
111       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
112       break;
113     case CEED_EVAL_INTERP:
114       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
115       ierr = CeedElemRestrictionGetElementSize(r, &P);
116       CeedChk(ierr);
117       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
118       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
119       break;
120     case CEED_EVAL_GRAD:
121       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
122       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
123       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
124       ierr = CeedElemRestrictionGetElementSize(r, &P);
125       CeedChk(ierr);
126       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
127       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
128       break;
129     case CEED_EVAL_WEIGHT: // Only on input fields
130       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
131       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
132       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
133                             CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChk(ierr);
134 
135       break;
136     case CEED_EVAL_DIV:
137       break; // Not implemented
138     case CEED_EVAL_CURL:
139       break; // Not implemented
140     }
141   }
142   return 0;
143 }
144 
145 /*
146   CeedOperator needs to connect all the named fields (be they active or passive)
147   to the named inputs and outputs of its CeedQFunction.
148  */
149 static int CeedOperatorSetup_Blocked(CeedOperator op) {
150   int ierr;
151   bool setupdone;
152   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
153   if (setupdone) return 0;
154   Ceed ceed;
155   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
156   CeedOperator_Blocked *impl;
157   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
158   CeedQFunction qf;
159   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
160   CeedInt Q, numinputfields, numoutputfields;
161   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
162   ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr);
163   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
164   CeedChk(ierr);
165   CeedOperatorField *opinputfields, *opoutputfields;
166   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
167   CeedChk(ierr);
168   CeedQFunctionField *qfinputfields, *qfoutputfields;
169   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
170   CeedChk(ierr);
171 
172   // Allocate
173   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
174   CeedChk(ierr);
175   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
176   CeedChk(ierr);
177   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
178   CeedChk(ierr);
179 
180   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
181   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
182   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
183   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
184   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
185 
186   impl->numein = numinputfields; impl->numeout = numoutputfields;
187 
188   // Set up infield and outfield pointer arrays
189   // Infields
190   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr,
191                                          impl->evecs, impl->evecsin,
192                                          impl->qvecsin, 0,
193                                          numinputfields, Q);
194   CeedChk(ierr);
195   // Outfields
196   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr,
197                                          impl->evecs, impl->evecsout,
198                                          impl->qvecsout, numinputfields,
199                                          numoutputfields, Q);
200   CeedChk(ierr);
201 
202   // Identity QFunctions
203   if (impl->identityqf) {
204     CeedEvalMode inmode, outmode;
205     CeedQFunctionField *infields, *outfields;
206     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
207 
208     for (CeedInt i=0; i<numinputfields; i++) {
209       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
210       CeedChk(ierr);
211       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
212       CeedChk(ierr);
213 
214       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
215       impl->qvecsout[i] = impl->qvecsin[i];
216       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr);
217     }
218   }
219 
220   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
221 
222   return 0;
223 }
224 
225 // Setup Input fields
226 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields,
227     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
228     CeedVector invec, bool skipactive, CeedOperator_Blocked *impl,
229     CeedRequest *request) {
230   CeedInt ierr;
231   CeedEvalMode emode;
232   CeedVector vec;
233   CeedTransposeMode lmode;
234   uint64_t state;
235 
236   for (CeedInt i=0; i<numinputfields; i++) {
237     // Get input vector
238     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
239     if (vec == CEED_VECTOR_ACTIVE) {
240       if (skipactive)
241         continue;
242       else
243         vec = invec;
244     }
245 
246     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
247     CeedChk(ierr);
248     if (emode == CEED_EVAL_WEIGHT) { // Skip
249     } else {
250       // Restrict
251       ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
252       if (state != impl->inputstate[i] || vec == invec) {
253         ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
254         CeedChk(ierr);
255         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
256                                         lmode, vec, impl->evecs[i],
257                                         request); CeedChk(ierr); CeedChk(ierr);
258         impl->inputstate[i] = state;
259       }
260       // Get evec
261       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
262                                     (const CeedScalar **) &impl->edata[i]);
263       CeedChk(ierr);
264     }
265   }
266   return 0;
267 }
268 
269 // Input basis action
270 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
271     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
272     CeedInt numinputfields, CeedInt blksize, bool skipactive,
273     CeedOperator_Blocked *impl) {
274   CeedInt ierr;
275   CeedInt dim, elemsize, size;
276   CeedElemRestriction Erestrict;
277   CeedEvalMode emode;
278   CeedBasis basis;
279 
280   for (CeedInt i=0; i<numinputfields; i++) {
281     // Skip active input
282     if (skipactive) {
283       CeedVector vec;
284       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
285       if (vec == CEED_VECTOR_ACTIVE)
286         continue;
287     }
288 
289     // Get elemsize, emode, size
290     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
291     CeedChk(ierr);
292     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
293     CeedChk(ierr);
294     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
295     CeedChk(ierr);
296     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
297     // Basis action
298     switch(emode) {
299     case CEED_EVAL_NONE:
300       ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
301                                 CEED_USE_POINTER,
302                                 &impl->edata[i][e*Q*size]); CeedChk(ierr);
303       break;
304     case CEED_EVAL_INTERP:
305       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
306       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
307                                 CEED_USE_POINTER,
308                                 &impl->edata[i][e*elemsize*size]);
309       CeedChk(ierr);
310       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
311                             CEED_EVAL_INTERP, impl->evecsin[i],
312                             impl->qvecsin[i]); CeedChk(ierr);
313       break;
314     case CEED_EVAL_GRAD:
315       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
316       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
317       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
318                                 CEED_USE_POINTER,
319                                 &impl->edata[i][e*elemsize*size/dim]);
320       CeedChk(ierr);
321       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
322                             CEED_EVAL_GRAD, impl->evecsin[i],
323                             impl->qvecsin[i]); CeedChk(ierr);
324       break;
325     case CEED_EVAL_WEIGHT:
326       break;  // No action
327     case CEED_EVAL_DIV:
328     case CEED_EVAL_CURL: {
329       // LCOV_EXCL_START
330       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
331       CeedChk(ierr);
332       Ceed ceed;
333       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
334       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
335       // LCOV_EXCL_STOP
336       break; // Not implemented
337     }
338     }
339   }
340   return 0;
341 }
342 
343 // Output basis action
344 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
345     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
346     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
347     CeedOperator op, CeedOperator_Blocked *impl) {
348   CeedInt ierr;
349   CeedInt dim, elemsize, size;
350   CeedElemRestriction Erestrict;
351   CeedEvalMode emode;
352   CeedBasis basis;
353 
354   for (CeedInt i=0; i<numoutputfields; i++) {
355     // Get elemsize, emode, size
356     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
357     CeedChk(ierr);
358     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
359     CeedChk(ierr);
360     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
361     CeedChk(ierr);
362     ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
363     // Basis action
364     switch(emode) {
365     case CEED_EVAL_NONE:
366       break; // No action
367     case CEED_EVAL_INTERP:
368       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
369       CeedChk(ierr);
370       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
371                                 CEED_USE_POINTER,
372                                 &impl->edata[i + numinputfields][e*elemsize*size]);
373       CeedChk(ierr);
374       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
375                             CEED_EVAL_INTERP, impl->qvecsout[i],
376                             impl->evecsout[i]); CeedChk(ierr);
377       break;
378     case CEED_EVAL_GRAD:
379       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
380       CeedChk(ierr);
381       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
382       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
383                                 CEED_USE_POINTER,
384                                 &impl->edata[i + numinputfields][e*elemsize*size/dim]);
385       CeedChk(ierr);
386       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
387                             CEED_EVAL_GRAD, impl->qvecsout[i],
388                             impl->evecsout[i]); CeedChk(ierr);
389       break;
390     case CEED_EVAL_WEIGHT: {
391       // LCOV_EXCL_START
392       Ceed ceed;
393       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
394       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
395                        "evaluation mode");
396       // LCOV_EXCL_STOP
397       break; // Should not occur
398     }
399     case CEED_EVAL_DIV:
400     case CEED_EVAL_CURL: {
401       // LCOV_EXCL_START
402       Ceed ceed;
403       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
404       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
405       // LCOV_EXCL_STOP
406       break; // Not implemented
407     }
408     }
409   }
410   return 0;
411 }
412 
413 // Restore Inputs
414 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields,
415     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
416     bool skipactive, CeedOperator_Blocked *impl) {
417   CeedInt ierr;
418   CeedEvalMode emode;
419 
420   for (CeedInt i=0; i<numinputfields; i++) {
421     // Skip active inputs
422     if (skipactive) {
423       CeedVector vec;
424       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
425       if (vec == CEED_VECTOR_ACTIVE)
426         continue;
427     }
428     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
429     CeedChk(ierr);
430     if (emode == CEED_EVAL_WEIGHT) { // Skip
431     } else {
432       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
433                                         (const CeedScalar **) &impl->edata[i]);
434       CeedChk(ierr);
435     }
436   }
437   return 0;
438 }
439 
440 // Apply Ceed Operator
441 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec,
442                                      CeedVector outvec,
443                                      CeedRequest *request) {
444   int ierr;
445   CeedOperator_Blocked *impl;
446   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
447   const CeedInt blksize = 8;
448   CeedInt Q, numinputfields, numoutputfields, numelements, size;
449   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
450   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
451   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
452   CeedQFunction qf;
453   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
454   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
455   CeedChk(ierr);
456   CeedTransposeMode lmode;
457   CeedOperatorField *opinputfields, *opoutputfields;
458   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
459   CeedChk(ierr);
460   CeedQFunctionField *qfinputfields, *qfoutputfields;
461   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
462   CeedChk(ierr);
463   CeedEvalMode emode;
464   CeedVector vec;
465 
466   // Setup
467   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
468 
469   // Input Evecs and Restriction
470   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
471                                          opinputfields, invec, false, impl,
472                                          request); CeedChk(ierr);
473 
474   // Output Evecs
475   for (CeedInt i=0; i<numoutputfields; i++) {
476     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
477                               &impl->edata[i + numinputfields]); CeedChk(ierr);
478   }
479 
480   // Loop through elements
481   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
482     // Output pointers
483     for (CeedInt i=0; i<numoutputfields; i++) {
484       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
485       CeedChk(ierr);
486       if (emode == CEED_EVAL_NONE) {
487         ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
488         CeedChk(ierr);
489         ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
490                                   CEED_USE_POINTER,
491                                   &impl->edata[i + numinputfields][e*Q*size]);
492         CeedChk(ierr);
493       }
494     }
495 
496     // Input basis apply
497     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
498                                           numinputfields, blksize, false, impl);
499     CeedChk(ierr);
500 
501     // Q function
502     if (!impl->identityqf) {
503       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
504       CeedChk(ierr);
505     }
506 
507     // Output basis apply
508     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields,
509                                            blksize, numinputfields,
510                                            numoutputfields, op, impl);
511     CeedChk(ierr);
512   }
513 
514   // Zero lvecs
515   for (CeedInt i=0; i<numoutputfields; i++) {
516     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
517     if (vec == CEED_VECTOR_ACTIVE) {
518       if (!impl->add) {
519         vec = outvec;
520         ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
521       }
522     } else {
523       ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
524     }
525   }
526   impl->add = false;
527 
528   // Output restriction
529   for (CeedInt i=0; i<numoutputfields; i++) {
530     // Restore evec
531     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
532                                   &impl->edata[i + numinputfields]); CeedChk(ierr);
533     // Get output vector
534     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
535     // Active
536     if (vec == CEED_VECTOR_ACTIVE)
537       vec = outvec;
538     // Restrict
539     ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); CeedChk(ierr);
540     ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE,
541                                     lmode, impl->evecs[i+impl->numein], vec,
542                                     request); CeedChk(ierr);
543 
544   }
545 
546   // Restore input arrays
547   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
548          opinputfields, false, impl); CeedChk(ierr);
549 
550   return 0;
551 }
552 
553 // Assemble Linear QFunction
554 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op,
555     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
556   int ierr;
557   CeedOperator_Blocked *impl;
558   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
559   const CeedInt blksize = 8;
560   CeedInt Q, numinputfields, numoutputfields, numelements, size;
561   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
562   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
563   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
564   CeedQFunction qf;
565   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
566   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
567   CeedChk(ierr);
568   CeedOperatorField *opinputfields, *opoutputfields;
569   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
570   CeedChk(ierr);
571   CeedQFunctionField *qfinputfields, *qfoutputfields;
572   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
573   CeedChk(ierr);
574   CeedVector vec, lvec;
575   CeedInt numactivein = 0, numactiveout = 0;
576   CeedVector *activein = NULL;
577   CeedScalar *a, *tmp;
578   Ceed ceed;
579   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
580 
581   // Setup
582   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
583 
584   // Check for identity
585   if (impl->identityqf)
586     // LCOV_EXCL_START
587     return CeedError(ceed, 1, "Assembling identity qfunctions not supported");
588   // LCOV_EXCL_STOP
589 
590   // Input Evecs and Restriction
591   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
592                                          opinputfields, NULL, true, impl,
593                                          request); CeedChk(ierr);
594 
595   // Count number of active input fields
596   for (CeedInt i=0; i<numinputfields; i++) {
597     // Get input vector
598     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
599     // Check if active input
600     if (vec == CEED_VECTOR_ACTIVE) {
601       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
602       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
603       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
604       CeedChk(ierr);
605       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
606       for (CeedInt field=0; field<size; field++) {
607         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
608         CeedChk(ierr);
609         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
610                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
611         CeedChk(ierr);
612       }
613       numactivein += size;
614       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
615     }
616   }
617 
618   // Count number of active output fields
619   for (CeedInt i=0; i<numoutputfields; i++) {
620     // Get output vector
621     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
622     // Check if active output
623     if (vec == CEED_VECTOR_ACTIVE) {
624       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
625       numactiveout += size;
626     }
627   }
628 
629   // Check sizes
630   if (!numactivein || !numactiveout)
631     // LCOV_EXCL_START
632     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
633                      "and outputs");
634   // LCOV_EXCL_STOP
635 
636   // Setup lvec
637   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
638                           &lvec); CeedChk(ierr);
639   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
640 
641   // Create output restriction
642   ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q,
643          numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr);
644   // Create assembled vector
645   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
646                           assembled); CeedChk(ierr);
647 
648   // Loop through elements
649   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
650     // Input basis apply
651     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
652                                           numinputfields, blksize, true, impl);
653     CeedChk(ierr);
654 
655     // Assemble QFunction
656     for (CeedInt in=0; in<numactivein; in++) {
657       // Set Inputs
658       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
659       if (numactivein > 1) {
660         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
661                                   0.0); CeedChk(ierr);
662       }
663       // Set Outputs
664       for (CeedInt out=0; out<numoutputfields; out++) {
665         // Get output vector
666         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
667         CeedChk(ierr);
668         // Check if active output
669         if (vec == CEED_VECTOR_ACTIVE) {
670           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
671                              CEED_USE_POINTER, a); CeedChk(ierr);
672           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
673           CeedChk(ierr);
674           a += size*Q*blksize; // Advance the pointer by the size of the output
675         }
676       }
677       // Apply QFunction
678       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
679       CeedChk(ierr);
680     }
681   }
682 
683   // Un-set output Qvecs to prevent accidental overwrite of Assembled
684   for (CeedInt out=0; out<numoutputfields; out++) {
685     // Get output vector
686     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
687     CeedChk(ierr);
688     // Check if active output
689     if (vec == CEED_VECTOR_ACTIVE) {
690       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
691                          NULL); CeedChk(ierr);
692     }
693   }
694 
695   // Restore input arrays
696   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
697          opinputfields, true, impl); CeedChk(ierr);
698 
699   // Output blocked restriction
700   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
701   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
702   CeedElemRestriction blkrstr;
703   ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize,
704                                           numelements*Q,
705                                           numactivein*numactiveout,
706                                           CEED_MEM_HOST, CEED_COPY_VALUES,
707                                           NULL, &blkrstr); CeedChk(ierr);
708   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE,
709                                   lvec, *assembled, request); CeedChk(ierr);
710 
711   // Cleanup
712   for (CeedInt i=0; i<numactivein; i++) {
713     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
714   }
715   ierr = CeedFree(&activein); CeedChk(ierr);
716   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
717   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
718 
719   return 0;
720 }
721 
722 int CeedOperatorCreate_Blocked(CeedOperator op) {
723   int ierr;
724   Ceed ceed;
725   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
726   CeedOperator_Blocked *impl;
727 
728   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
729   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
730 
731   ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
732                                 CeedOperatorAssembleLinearQFunction_Blocked);
733   CeedChk(ierr);
734   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply",
735                                 CeedOperatorApply_Blocked); CeedChk(ierr);
736   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
737                                 CeedOperatorDestroy_Blocked); CeedChk(ierr);
738   return 0;
739 }
740