xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision 42ea3801ea98a61a6a231cc894cdbc2b3c33a731) !
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-blocked.h"
18 #include "../ref/ceed-ref.h"
19 
20 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
21   int ierr;
22   CeedOperator_Blocked *impl;
23   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
24 
25   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
26     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
27     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
28   }
29   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
30   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
31   ierr = CeedFree(&impl->edata); CeedChk(ierr);
32   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
33 
34   for (CeedInt i=0; i<impl->numein; i++) {
35     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
36     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
37   }
38   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
39   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
40 
41   for (CeedInt i=0; i<impl->numeout; i++) {
42     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
43     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
44   }
45   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
46   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
47 
48   ierr = CeedFree(&impl); CeedChk(ierr);
49   return 0;
50 }
51 
52 /*
53   Setup infields or outfields
54  */
55 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
56     CeedOperator op, bool inOrOut,
57     CeedElemRestriction *blkrestr,
58     CeedVector *fullevecs, CeedVector *evecs,
59     CeedVector *qvecs, CeedInt starte,
60     CeedInt numfields, CeedInt Q) {
61   CeedInt dim, ierr, ncomp, size, P;
62   Ceed ceed;
63   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
64   CeedBasis basis;
65   CeedElemRestriction r;
66   CeedOperatorField *opfields;
67   CeedQFunctionField *qffields;
68   if (inOrOut) {
69     ierr = CeedOperatorGetFields(op, NULL, &opfields);
70     CeedChk(ierr);
71     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
72     CeedChk(ierr);
73   } else {
74     ierr = CeedOperatorGetFields(op, &opfields, NULL);
75     CeedChk(ierr);
76     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
77     CeedChk(ierr);
78   }
79   const CeedInt blksize = 8;
80 
81   // Loop over fields
82   for (CeedInt i=0; i<numfields; i++) {
83     CeedEvalMode emode;
84     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
85 
86     if (emode != CEED_EVAL_WEIGHT) {
87       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
88       CeedChk(ierr);
89       CeedElemRestriction_Ref *data;
90       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
91       Ceed ceed;
92       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
93       CeedInt nelem, elemsize, nnodes;
94       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
95       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
96       ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
97       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
98       ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
99                                               blksize, nnodes, ncomp,
100                                               CEED_MEM_HOST, CEED_COPY_VALUES,
101                                               data->indices, &blkrestr[i+starte]);
102       CeedChk(ierr);
103       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
104                                              &fullevecs[i+starte]);
105       CeedChk(ierr);
106     }
107 
108     switch(emode) {
109     case CEED_EVAL_NONE:
110       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
111       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
112       break;
113     case CEED_EVAL_INTERP:
114       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
115       ierr = CeedElemRestrictionGetElementSize(r, &P);
116       CeedChk(ierr);
117       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
118       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
119       break;
120     case CEED_EVAL_GRAD:
121       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
122       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
123       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
124       ierr = CeedElemRestrictionGetElementSize(r, &P);
125       CeedChk(ierr);
126       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
127       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
128       break;
129     case CEED_EVAL_WEIGHT: // Only on input fields
130       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
131       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
132       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
133                             CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChk(ierr);
134 
135       break;
136     case CEED_EVAL_DIV:
137       break; // Not implemented
138     case CEED_EVAL_CURL:
139       break; // Not implemented
140     }
141   }
142   return 0;
143 }
144 
145 /*
146   CeedOperator needs to connect all the named fields (be they active or passive)
147   to the named inputs and outputs of its CeedQFunction.
148  */
149 static int CeedOperatorSetup_Blocked(CeedOperator op) {
150   int ierr;
151   bool setupdone;
152   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
153   if (setupdone) return 0;
154   Ceed ceed;
155   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
156   CeedOperator_Blocked *impl;
157   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
158   CeedQFunction qf;
159   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
160   CeedInt Q, numinputfields, numoutputfields;
161   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
162   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
163   CeedChk(ierr);
164   CeedOperatorField *opinputfields, *opoutputfields;
165   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
166   CeedChk(ierr);
167   CeedQFunctionField *qfinputfields, *qfoutputfields;
168   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
169   CeedChk(ierr);
170 
171   // Allocate
172   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
173   CeedChk(ierr);
174   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
175   CeedChk(ierr);
176   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
177   CeedChk(ierr);
178 
179   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
180   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
181   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
182   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
183   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
184 
185   impl->numein = numinputfields; impl->numeout = numoutputfields;
186 
187   // Set up infield and outfield pointer arrays
188   // Infields
189   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr,
190                                          impl->evecs, impl->evecsin,
191                                          impl->qvecsin, 0,
192                                          numinputfields, Q);
193   CeedChk(ierr);
194   // Outfields
195   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr,
196                                          impl->evecs, impl->evecsout,
197                                          impl->qvecsout, numinputfields,
198                                          numoutputfields, Q);
199   CeedChk(ierr);
200 
201   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
202 
203   return 0;
204 }
205 
206 
207 // Setup Input fields
208 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields,
209     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
210     CeedVector invec, bool skipactive, CeedOperator_Blocked *impl,
211     CeedRequest *request) {
212   CeedInt ierr;
213   CeedEvalMode emode;
214   CeedVector vec;
215   CeedTransposeMode lmode;
216   uint64_t state;
217 
218   for (CeedInt i=0; i<numinputfields; i++) {
219     // Get input vector
220     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
221     if (vec == CEED_VECTOR_ACTIVE) {
222       if (skipactive)
223         continue;
224       else
225         vec = invec;
226     }
227 
228     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
229     CeedChk(ierr);
230     if (emode == CEED_EVAL_WEIGHT) { // Skip
231     } else {
232       // Restrict
233       ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
234       if (state != impl->inputstate[i] || vec == invec) {
235         ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
236         CeedChk(ierr);
237         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
238                                         lmode, vec, impl->evecs[i],
239                                         request); CeedChk(ierr); CeedChk(ierr);
240         impl->inputstate[i] = state;
241       }
242       // Get evec
243       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
244                                     (const CeedScalar **) &impl->edata[i]);
245       CeedChk(ierr);
246     }
247   }
248   return 0;
249 }
250 
251 // Input basis action
252 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
253     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
254     CeedInt numinputfields, CeedInt blksize, bool skipactive,
255     CeedOperator_Blocked *impl) {
256   CeedInt ierr;
257   CeedInt dim, elemsize, size;
258   CeedElemRestriction Erestrict;
259   CeedEvalMode emode;
260   CeedBasis basis;
261 
262   for (CeedInt i=0; i<numinputfields; i++) {
263     // Skip active input
264     if (skipactive) {
265       CeedVector vec;
266       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
267       if (vec == CEED_VECTOR_ACTIVE)
268         continue;
269     }
270 
271     // Get elemsize, emode, size
272     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
273     CeedChk(ierr);
274     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
275     CeedChk(ierr);
276     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
277     CeedChk(ierr);
278     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
279     // Basis action
280     switch(emode) {
281     case CEED_EVAL_NONE:
282       ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
283                                 CEED_USE_POINTER,
284                                 &impl->edata[i][e*Q*size]); CeedChk(ierr);
285       break;
286     case CEED_EVAL_INTERP:
287       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
288       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
289                                 CEED_USE_POINTER,
290                                 &impl->edata[i][e*elemsize*size]);
291       CeedChk(ierr);
292       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
293                             CEED_EVAL_INTERP, impl->evecsin[i],
294                             impl->qvecsin[i]); CeedChk(ierr);
295       break;
296     case CEED_EVAL_GRAD:
297       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
298       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
299       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
300                                 CEED_USE_POINTER,
301                                 &impl->edata[i][e*elemsize*size/dim]);
302       CeedChk(ierr);
303       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
304                             CEED_EVAL_GRAD, impl->evecsin[i],
305                             impl->qvecsin[i]); CeedChk(ierr);
306       break;
307     case CEED_EVAL_WEIGHT:
308       break;  // No action
309     case CEED_EVAL_DIV:
310     case CEED_EVAL_CURL: {
311       // LCOV_EXCL_START
312       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
313       CeedChk(ierr);
314       Ceed ceed;
315       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
316       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
317       // LCOV_EXCL_STOP
318       break; // Not implemented
319     }
320     }
321   }
322   return 0;
323 }
324 
325 // Output basis action
326 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
327     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
328     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
329     CeedOperator op, CeedOperator_Blocked *impl) {
330   CeedInt ierr;
331   CeedInt dim, elemsize, size;
332   CeedElemRestriction Erestrict;
333   CeedEvalMode emode;
334   CeedBasis basis;
335 
336   for (CeedInt i=0; i<numoutputfields; i++) {
337     // Get elemsize, emode, size
338     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
339     CeedChk(ierr);
340     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
341     CeedChk(ierr);
342     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
343     CeedChk(ierr);
344     ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
345     // Basis action
346     switch(emode) {
347     case CEED_EVAL_NONE:
348       break; // No action
349     case CEED_EVAL_INTERP:
350       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
351       CeedChk(ierr);
352       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
353                                 CEED_USE_POINTER,
354                                 &impl->edata[i + numinputfields][e*elemsize*size]);
355       CeedChk(ierr);
356       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
357                             CEED_EVAL_INTERP, impl->qvecsout[i],
358                             impl->evecsout[i]); CeedChk(ierr);
359       break;
360     case CEED_EVAL_GRAD:
361       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
362       CeedChk(ierr);
363       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
364       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
365                                 CEED_USE_POINTER,
366                                 &impl->edata[i + numinputfields][e*elemsize*size/dim]);
367       CeedChk(ierr);
368       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
369                             CEED_EVAL_GRAD, impl->qvecsout[i],
370                             impl->evecsout[i]); CeedChk(ierr);
371       break;
372     case CEED_EVAL_WEIGHT: {
373       // LCOV_EXCL_START
374       Ceed ceed;
375       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
376       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
377                        "evaluation mode");
378       // LCOV_EXCL_STOP
379       break; // Should not occur
380     }
381     case CEED_EVAL_DIV:
382     case CEED_EVAL_CURL: {
383       // LCOV_EXCL_START
384       Ceed ceed;
385       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
386       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
387       // LCOV_EXCL_STOP
388       break; // Not implemented
389     }
390     }
391   }
392   return 0;
393 }
394 
395 // Restore Inputs
396 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields,
397     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
398     bool skipactive, CeedOperator_Blocked *impl) {
399   CeedInt ierr;
400   CeedEvalMode emode;
401 
402   for (CeedInt i=0; i<numinputfields; i++) {
403     // Skip active inputs
404     if (skipactive) {
405       CeedVector vec;
406       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
407       if (vec == CEED_VECTOR_ACTIVE)
408         continue;
409     }
410     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
411     CeedChk(ierr);
412     if (emode == CEED_EVAL_WEIGHT) { // Skip
413     } else {
414       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
415                                         (const CeedScalar **) &impl->edata[i]);
416       CeedChk(ierr);
417     }
418   }
419   return 0;
420 }
421 
422 // Apply Ceed Operator
423 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec,
424                                      CeedVector outvec,
425                                      CeedRequest *request) {
426   int ierr;
427   CeedOperator_Blocked *impl;
428   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
429   const CeedInt blksize = 8;
430   CeedInt Q, numinputfields, numoutputfields, numelements, size;
431   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
432   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
433   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
434   CeedQFunction qf;
435   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
436   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
437   CeedChk(ierr);
438   CeedTransposeMode lmode;
439   CeedOperatorField *opinputfields, *opoutputfields;
440   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
441   CeedChk(ierr);
442   CeedQFunctionField *qfinputfields, *qfoutputfields;
443   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
444   CeedChk(ierr);
445   CeedEvalMode emode;
446   CeedVector vec;
447 
448   // Setup
449   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
450 
451   // Input Evecs and Restriction
452   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
453     opinputfields, invec, false, impl, request); CeedChk(ierr);
454 
455   // Output Evecs
456   for (CeedInt i=0; i<numoutputfields; i++) {
457     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
458                               &impl->edata[i + numinputfields]); CeedChk(ierr);
459   }
460 
461   // Loop through elements
462   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
463     // Input basis apply
464     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
465                                           numinputfields, blksize, false, impl);
466     CeedChk(ierr);
467 
468     // Output pointers
469     for (CeedInt i=0; i<numoutputfields; i++) {
470       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
471       CeedChk(ierr);
472       if (emode == CEED_EVAL_NONE) {
473         ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
474         CeedChk(ierr);
475         ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
476                                   CEED_USE_POINTER,
477                                   &impl->edata[i + numinputfields][e*Q*size]);
478         CeedChk(ierr);
479       }
480     }
481 
482     // Q function
483     ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
484     CeedChk(ierr);
485 
486     // Output basis apply
487     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields,
488              blksize, numinputfields, numoutputfields, op, impl);
489     CeedChk(ierr);
490   }
491 
492   // Zero lvecs
493   for (CeedInt i=0; i<numoutputfields; i++) {
494     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
495     if (vec == CEED_VECTOR_ACTIVE) {
496       if (!impl->add) {
497         vec = outvec;
498         ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
499       }
500     } else {
501       ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
502     }
503   }
504   impl->add = false;
505 
506   // Output restriction
507   for (CeedInt i=0; i<numoutputfields; i++) {
508     // Restore evec
509     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
510                                   &impl->edata[i + numinputfields]); CeedChk(ierr);
511     // Get output vector
512     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
513     // Active
514     if (vec == CEED_VECTOR_ACTIVE)
515       vec = outvec;
516     // Restrict
517     ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); CeedChk(ierr);
518     ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE,
519                                     lmode, impl->evecs[i+impl->numein], vec,
520                                     request); CeedChk(ierr);
521 
522   }
523 
524   // Restore input arrays
525   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
526                                            opinputfields, false, impl);
527   CeedChk(ierr);
528 
529   return 0;
530 }
531 
532 // Assemble Linear QFunction
533 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op,
534     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
535   int ierr;
536   CeedOperator_Blocked *impl;
537   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
538   const CeedInt blksize = 8;
539   CeedInt Q, numinputfields, numoutputfields, numelements, size;
540   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
541   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
542   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
543   CeedQFunction qf;
544   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
545   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
546   CeedChk(ierr);
547   CeedOperatorField *opinputfields, *opoutputfields;
548   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
549   CeedChk(ierr);
550   CeedQFunctionField *qfinputfields, *qfoutputfields;
551   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
552   CeedChk(ierr);
553   CeedVector vec, lvec;
554   CeedInt numactivein = 0, numactiveout = 0;
555   CeedVector *activein = NULL;
556   CeedScalar *a, *tmp;
557   Ceed ceed;
558   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
559 
560   // Setup
561   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
562 
563   // Input Evecs and Restriction
564   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
565                                          opinputfields, NULL, true, impl,
566                                          request); CeedChk(ierr);
567 
568   // Count number of active input fields
569   for (CeedInt i=0; i<numinputfields; i++) {
570     // Get input vector
571     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
572     // Check if active input
573     if (vec == CEED_VECTOR_ACTIVE) {
574       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
575       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
576       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
577       CeedChk(ierr);
578       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
579       for (CeedInt field=0; field<size; field++) {
580         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
581         CeedChk(ierr);
582         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
583                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
584       }
585       numactivein += size;
586       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
587     }
588   }
589 
590   // Count number of active output fields
591   for (CeedInt i=0; i<numoutputfields; i++) {
592     // Get output vector
593     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
594     // Check if active output
595     if (vec == CEED_VECTOR_ACTIVE) {
596       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
597       numactiveout += size;
598     }
599   }
600 
601   // Check sizes
602   if (!numactivein || !numactiveout)
603     // LCOV_EXCL_START
604     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
605                      "and outputs");
606     // LCOV_EXCL_STOP
607 
608   // Setup lvec
609   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
610                           &lvec); CeedChk(ierr);
611   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
612 
613   // Create output restriction
614   ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q,
615                                            numelements*Q,
616                                            numactivein*numactiveout, rstr);
617   CeedChk(ierr);
618   // Create assembled vector
619   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
620                           assembled); CeedChk(ierr);
621 
622   // Loop through elements
623   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
624     // Input basis apply
625     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
626                                           numinputfields, blksize, true, impl);
627     CeedChk(ierr);
628 
629     // Assemble QFunction
630     for (CeedInt in=0; in<numactivein; in++) {
631       // Set Inputs
632       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
633       if (numactivein > 1) {
634         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
635                                   0.0); CeedChk(ierr);
636       }
637       // Set Outputs
638       for (CeedInt out=0; out<numoutputfields; out++) {
639         // Get output vector
640         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
641         CeedChk(ierr);
642         // Check if active output
643         if (vec == CEED_VECTOR_ACTIVE) {
644           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
645                              CEED_USE_POINTER, a); CeedChk(ierr);
646           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
647           CeedChk(ierr);
648           a += size*Q*blksize; // Advance the pointer by the size of the output
649         }
650       }
651       // Apply QFunction
652       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
653       CeedChk(ierr);
654     }
655   }
656 
657   // Un-set output Qvecs to prevent accidental overwrite of Assembled
658   for (CeedInt out=0; out<numoutputfields; out++) {
659     // Get output vector
660     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
661     CeedChk(ierr);
662     // Check if active output
663     if (vec == CEED_VECTOR_ACTIVE) {
664       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
665                          NULL); CeedChk(ierr);
666     }
667   }
668 
669   // Restore input arrays
670   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
671                                            opinputfields, true, impl);
672   CeedChk(ierr);
673 
674   // Output blocked restriction
675   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
676   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
677   CeedElemRestriction blkrstr;
678   ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize,
679                                           numelements*Q,
680                                           numactivein*numactiveout,
681                                           CEED_MEM_HOST, CEED_COPY_VALUES,
682                                           NULL, &blkrstr); CeedChk(ierr);
683   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE,
684                                   lvec, *assembled, request); CeedChk(ierr);
685 
686   // Cleanup
687   for (CeedInt i=0; i<numactivein; i++) {
688     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
689   }
690   ierr = CeedFree(&activein); CeedChk(ierr);
691   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
692   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
693 
694   return 0;
695 }
696 
697 int CeedOperatorCreate_Blocked(CeedOperator op) {
698   int ierr;
699   Ceed ceed;
700   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
701   CeedOperator_Blocked *impl;
702 
703   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
704   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
705 
706 
707   ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
708                                 CeedOperatorAssembleLinearQFunction_Blocked);
709   CeedChk(ierr);
710   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply",
711                                 CeedOperatorApply_Blocked); CeedChk(ierr);
712   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
713                                 CeedOperatorDestroy_Blocked); CeedChk(ierr);
714   return 0;
715 }
716