xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision e6a04bf5cb06fd03ee882d1ea55149c09771081b)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-blocked.h"
18 #include "../ref/ceed-ref.h"
19 
20 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
21   int ierr;
22   CeedOperator_Blocked *impl;
23   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
24 
25   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
26     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
27     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
28   }
29   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
30   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
31   ierr = CeedFree(&impl->edata); CeedChk(ierr);
32   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
33 
34   for (CeedInt i=0; i<impl->numein; i++) {
35     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
36     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
37   }
38   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
39   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
40 
41   for (CeedInt i=0; i<impl->numeout; i++) {
42     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
43     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
44   }
45   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
46   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
47 
48   ierr = CeedFree(&impl); CeedChk(ierr);
49   return 0;
50 }
51 
52 /*
53   Setup infields or outfields
54  */
55 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
56     CeedOperator op, bool inOrOut,
57     CeedElemRestriction *blkrestr,
58     CeedVector *fullevecs, CeedVector *evecs,
59     CeedVector *qvecs, CeedInt starte,
60     CeedInt numfields, CeedInt Q) {
61   CeedInt dim, ierr, ncomp, size, P;
62   Ceed ceed;
63   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
64   CeedBasis basis;
65   CeedElemRestriction r;
66   CeedOperatorField *opfields;
67   CeedQFunctionField *qffields;
68   if (inOrOut) {
69     ierr = CeedOperatorGetFields(op, NULL, &opfields);
70     CeedChk(ierr);
71     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
72     CeedChk(ierr);
73   } else {
74     ierr = CeedOperatorGetFields(op, &opfields, NULL);
75     CeedChk(ierr);
76     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
77     CeedChk(ierr);
78   }
79   const CeedInt blksize = 8;
80 
81   // Loop over fields
82   for (CeedInt i=0; i<numfields; i++) {
83     CeedEvalMode emode;
84     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
85 
86     if (emode != CEED_EVAL_WEIGHT) {
87       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
88       CeedChk(ierr);
89       CeedElemRestriction_Ref *data;
90       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
91       Ceed ceed;
92       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
93       CeedInt nelem, elemsize, nnodes;
94       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
95       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
96       ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
97       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
98       ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
99                                               blksize, nnodes, ncomp,
100                                               CEED_MEM_HOST, CEED_COPY_VALUES,
101                                               data->indices, &blkrestr[i+starte]);
102       CeedChk(ierr);
103       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
104                                              &fullevecs[i+starte]);
105       CeedChk(ierr);
106     }
107 
108     switch(emode) {
109     case CEED_EVAL_NONE:
110       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
111       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
112       break;
113     case CEED_EVAL_INTERP:
114       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
115       ierr = CeedElemRestrictionGetElementSize(r, &P);
116       CeedChk(ierr);
117       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
118       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
119       break;
120     case CEED_EVAL_GRAD:
121       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
122       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
123       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
124       ierr = CeedElemRestrictionGetElementSize(r, &P);
125       CeedChk(ierr);
126       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
127       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
128       break;
129     case CEED_EVAL_WEIGHT: // Only on input fields
130       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
131       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
132       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
133                             CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChk(ierr);
134 
135       break;
136     case CEED_EVAL_DIV:
137       break; // Not implemented
138     case CEED_EVAL_CURL:
139       break; // Not implemented
140     }
141   }
142   return 0;
143 }
144 
145 /*
146   CeedOperator needs to connect all the named fields (be they active or passive)
147   to the named inputs and outputs of its CeedQFunction.
148  */
149 static int CeedOperatorSetup_Blocked(CeedOperator op) {
150   int ierr;
151   bool setupdone;
152   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
153   if (setupdone) return 0;
154   Ceed ceed;
155   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
156   CeedOperator_Blocked *impl;
157   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
158   CeedQFunction qf;
159   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
160   CeedInt Q, numinputfields, numoutputfields;
161   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
162   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
163   CeedChk(ierr);
164   CeedOperatorField *opinputfields, *opoutputfields;
165   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
166   CeedChk(ierr);
167   CeedQFunctionField *qfinputfields, *qfoutputfields;
168   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
169   CeedChk(ierr);
170 
171   // Allocate
172   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
173   CeedChk(ierr);
174   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
175   CeedChk(ierr);
176   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
177   CeedChk(ierr);
178 
179   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
180   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
181   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
182   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
183   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
184 
185   impl->numein = numinputfields; impl->numeout = numoutputfields;
186 
187   // Set up infield and outfield pointer arrays
188   // Infields
189   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr,
190                                          impl->evecs, impl->evecsin,
191                                          impl->qvecsin, 0,
192                                          numinputfields, Q);
193   CeedChk(ierr);
194   // Outfields
195   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr,
196                                          impl->evecs, impl->evecsout,
197                                          impl->qvecsout, numinputfields,
198                                          numoutputfields, Q);
199   CeedChk(ierr);
200 
201   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
202 
203   return 0;
204 }
205 
206 
207 // Setup Input fields
208 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields,
209     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
210     CeedVector invec, bool skipactive, CeedOperator_Blocked *impl,
211     CeedRequest *request) {
212   CeedInt ierr;
213   CeedEvalMode emode;
214   CeedVector vec;
215   CeedTransposeMode lmode;
216   uint64_t state;
217 
218   for (CeedInt i=0; i<numinputfields; i++) {
219     // Get input vector
220     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
221     if (vec == CEED_VECTOR_ACTIVE) {
222       if (skipactive)
223         continue;
224       else
225         vec = invec;
226     }
227 
228     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
229     CeedChk(ierr);
230     if (emode == CEED_EVAL_WEIGHT) { // Skip
231     } else {
232       // Restrict
233       ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
234       if (state != impl->inputstate[i] || vec == invec) {
235         ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
236         CeedChk(ierr);
237         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
238                                         lmode, vec, impl->evecs[i],
239                                         request); CeedChk(ierr); CeedChk(ierr);
240         impl->inputstate[i] = state;
241       }
242       // Get evec
243       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
244                                     (const CeedScalar **) &impl->edata[i]);
245       CeedChk(ierr);
246     }
247   }
248   return 0;
249 }
250 
251 // Input basis action
252 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
253     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
254     CeedInt numinputfields, CeedInt blksize, bool skipactive,
255     CeedOperator_Blocked *impl) {
256   CeedInt ierr;
257   CeedInt dim, elemsize, size;
258   CeedElemRestriction Erestrict;
259   CeedEvalMode emode;
260   CeedBasis basis;
261 
262   for (CeedInt i=0; i<numinputfields; i++) {
263     // Skip active input
264     if (skipactive) {
265       CeedVector vec;
266       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
267       if (vec == CEED_VECTOR_ACTIVE)
268         continue;
269     }
270 
271     // Get elemsize, emode, size
272     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
273     CeedChk(ierr);
274     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
275     CeedChk(ierr);
276     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
277     CeedChk(ierr);
278     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
279     // Basis action
280     switch(emode) {
281     case CEED_EVAL_NONE:
282       ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
283                                 CEED_USE_POINTER,
284                                 &impl->edata[i][e*Q*size]); CeedChk(ierr);
285       break;
286     case CEED_EVAL_INTERP:
287       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
288       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
289                                 CEED_USE_POINTER,
290                                 &impl->edata[i][e*elemsize*size]);
291       CeedChk(ierr);
292       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
293                             CEED_EVAL_INTERP, impl->evecsin[i],
294                             impl->qvecsin[i]); CeedChk(ierr);
295       break;
296     case CEED_EVAL_GRAD:
297       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
298       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
299       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
300                                 CEED_USE_POINTER,
301                                 &impl->edata[i][e*elemsize*size/dim]);
302       CeedChk(ierr);
303       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
304                             CEED_EVAL_GRAD, impl->evecsin[i],
305                             impl->qvecsin[i]); CeedChk(ierr);
306       break;
307     case CEED_EVAL_WEIGHT:
308       break;  // No action
309     case CEED_EVAL_DIV:
310     case CEED_EVAL_CURL: {
311       // LCOV_EXCL_START
312       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
313       CeedChk(ierr);
314       Ceed ceed;
315       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
316       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
317       // LCOV_EXCL_STOP
318       break; // Not implemented
319     }
320     }
321   }
322   return 0;
323 }
324 
325 // Output basis action
326 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
327     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
328     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
329     CeedOperator op, CeedOperator_Blocked *impl) {
330   CeedInt ierr;
331   CeedInt dim, elemsize, size;
332   CeedElemRestriction Erestrict;
333   CeedEvalMode emode;
334   CeedBasis basis;
335 
336   for (CeedInt i=0; i<numoutputfields; i++) {
337     // Get elemsize, emode, size
338     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
339     CeedChk(ierr);
340     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
341     CeedChk(ierr);
342     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
343     CeedChk(ierr);
344     ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
345     // Basis action
346     switch(emode) {
347     case CEED_EVAL_NONE:
348       break; // No action
349     case CEED_EVAL_INTERP:
350       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
351       CeedChk(ierr);
352       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
353                                 CEED_USE_POINTER,
354                                 &impl->edata[i + numinputfields][e*elemsize*size]);
355       CeedChk(ierr);
356       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
357                             CEED_EVAL_INTERP, impl->qvecsout[i],
358                             impl->evecsout[i]); CeedChk(ierr);
359       break;
360     case CEED_EVAL_GRAD:
361       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
362       CeedChk(ierr);
363       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
364       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
365                                 CEED_USE_POINTER,
366                                 &impl->edata[i + numinputfields][e*elemsize*size/dim]);
367       CeedChk(ierr);
368       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
369                             CEED_EVAL_GRAD, impl->qvecsout[i],
370                             impl->evecsout[i]); CeedChk(ierr);
371       break;
372     case CEED_EVAL_WEIGHT: {
373       // LCOV_EXCL_START
374       Ceed ceed;
375       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
376       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
377                        "evaluation mode");
378       // LCOV_EXCL_STOP
379       break; // Should not occur
380     }
381     case CEED_EVAL_DIV:
382     case CEED_EVAL_CURL: {
383       // LCOV_EXCL_START
384       Ceed ceed;
385       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
386       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
387       // LCOV_EXCL_STOP
388       break; // Not implemented
389     }
390     }
391   }
392   return 0;
393 }
394 
395 // Restore Inputs
396 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields,
397     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
398     bool skipactive, CeedOperator_Blocked *impl) {
399   CeedInt ierr;
400   CeedEvalMode emode;
401 
402   for (CeedInt i=0; i<numinputfields; i++) {
403     // Skip active inputs
404     if (skipactive) {
405       CeedVector vec;
406       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
407       if (vec == CEED_VECTOR_ACTIVE)
408         continue;
409     }
410     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
411     CeedChk(ierr);
412     if (emode == CEED_EVAL_WEIGHT) { // Skip
413     } else {
414       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
415                                         (const CeedScalar **) &impl->edata[i]);
416       CeedChk(ierr);
417     }
418   }
419   return 0;
420 }
421 
422 // Apply Ceed Operator
423 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec,
424                                      CeedVector outvec,
425                                      CeedRequest *request) {
426   int ierr;
427   CeedOperator_Blocked *impl;
428   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
429   const CeedInt blksize = 8;
430   CeedInt Q, numinputfields, numoutputfields, numelements, size;
431   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
432   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
433   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
434   CeedQFunction qf;
435   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
436   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
437   CeedChk(ierr);
438   CeedTransposeMode lmode;
439   CeedOperatorField *opinputfields, *opoutputfields;
440   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
441   CeedChk(ierr);
442   CeedQFunctionField *qfinputfields, *qfoutputfields;
443   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
444   CeedChk(ierr);
445   CeedEvalMode emode;
446   CeedVector vec;
447 
448   // Setup
449   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
450 
451   // Input Evecs and Restriction
452   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
453                                          opinputfields, invec, false, impl,
454                                          request); CeedChk(ierr);
455 
456   // Output Evecs
457   for (CeedInt i=0; i<numoutputfields; i++) {
458     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
459                               &impl->edata[i + numinputfields]); CeedChk(ierr);
460   }
461 
462   // Loop through elements
463   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
464     // Input basis apply
465     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
466                                           numinputfields, blksize, false, impl);
467     CeedChk(ierr);
468 
469     // Output pointers
470     for (CeedInt i=0; i<numoutputfields; i++) {
471       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
472       CeedChk(ierr);
473       if (emode == CEED_EVAL_NONE) {
474         ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
475         CeedChk(ierr);
476         ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
477                                   CEED_USE_POINTER,
478                                   &impl->edata[i + numinputfields][e*Q*size]);
479         CeedChk(ierr);
480       }
481     }
482 
483     // Q function
484     ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
485     CeedChk(ierr);
486 
487     // Output basis apply
488     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields,
489                                            blksize, numinputfields,
490                                            numoutputfields, op, impl);
491     CeedChk(ierr);
492   }
493 
494   // Zero lvecs
495   for (CeedInt i=0; i<numoutputfields; i++) {
496     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
497     if (vec == CEED_VECTOR_ACTIVE) {
498       if (!impl->add) {
499         vec = outvec;
500         ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
501       }
502     } else {
503       ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
504     }
505   }
506   impl->add = false;
507 
508   // Output restriction
509   for (CeedInt i=0; i<numoutputfields; i++) {
510     // Restore evec
511     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
512                                   &impl->edata[i + numinputfields]); CeedChk(ierr);
513     // Get output vector
514     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
515     // Active
516     if (vec == CEED_VECTOR_ACTIVE)
517       vec = outvec;
518     // Restrict
519     ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); CeedChk(ierr);
520     ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE,
521                                     lmode, impl->evecs[i+impl->numein], vec,
522                                     request); CeedChk(ierr);
523 
524   }
525 
526   // Restore input arrays
527   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
528          opinputfields, false, impl); CeedChk(ierr);
529 
530   return 0;
531 }
532 
533 // Assemble Linear QFunction
534 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op,
535     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
536   int ierr;
537   CeedOperator_Blocked *impl;
538   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
539   const CeedInt blksize = 8;
540   CeedInt Q, numinputfields, numoutputfields, numelements, size;
541   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
542   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
543   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
544   CeedQFunction qf;
545   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
546   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
547   CeedChk(ierr);
548   CeedOperatorField *opinputfields, *opoutputfields;
549   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
550   CeedChk(ierr);
551   CeedQFunctionField *qfinputfields, *qfoutputfields;
552   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
553   CeedChk(ierr);
554   CeedVector vec, lvec;
555   CeedInt numactivein = 0, numactiveout = 0;
556   CeedVector *activein = NULL;
557   CeedScalar *a, *tmp;
558   Ceed ceed;
559   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
560 
561   // Setup
562   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
563 
564   // Input Evecs and Restriction
565   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
566                                          opinputfields, NULL, true, impl,
567                                          request); CeedChk(ierr);
568 
569   // Count number of active input fields
570   for (CeedInt i=0; i<numinputfields; i++) {
571     // Get input vector
572     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
573     // Check if active input
574     if (vec == CEED_VECTOR_ACTIVE) {
575       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
576       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
577       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
578       CeedChk(ierr);
579       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
580       for (CeedInt field=0; field<size; field++) {
581         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
582         CeedChk(ierr);
583         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
584                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
585         CeedChk(ierr);
586       }
587       numactivein += size;
588       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
589     }
590   }
591 
592   // Count number of active output fields
593   for (CeedInt i=0; i<numoutputfields; i++) {
594     // Get output vector
595     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
596     // Check if active output
597     if (vec == CEED_VECTOR_ACTIVE) {
598       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
599       numactiveout += size;
600     }
601   }
602 
603   // Check sizes
604   if (!numactivein || !numactiveout)
605     // LCOV_EXCL_START
606     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
607                      "and outputs");
608   // LCOV_EXCL_STOP
609 
610   // Setup lvec
611   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
612                           &lvec); CeedChk(ierr);
613   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
614 
615   // Create output restriction
616   ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q,
617          numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr);
618   // Create assembled vector
619   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
620                           assembled); CeedChk(ierr);
621 
622   // Loop through elements
623   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
624     // Input basis apply
625     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
626                                           numinputfields, blksize, true, impl);
627     CeedChk(ierr);
628 
629     // Assemble QFunction
630     for (CeedInt in=0; in<numactivein; in++) {
631       // Set Inputs
632       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
633       if (numactivein > 1) {
634         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
635                                   0.0); CeedChk(ierr);
636       }
637       // Set Outputs
638       for (CeedInt out=0; out<numoutputfields; out++) {
639         // Get output vector
640         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
641         CeedChk(ierr);
642         // Check if active output
643         if (vec == CEED_VECTOR_ACTIVE) {
644           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
645                              CEED_USE_POINTER, a); CeedChk(ierr);
646           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
647           CeedChk(ierr);
648           a += size*Q*blksize; // Advance the pointer by the size of the output
649         }
650       }
651       // Apply QFunction
652       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
653       CeedChk(ierr);
654     }
655   }
656 
657   // Un-set output Qvecs to prevent accidental overwrite of Assembled
658   for (CeedInt out=0; out<numoutputfields; out++) {
659     // Get output vector
660     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
661     CeedChk(ierr);
662     // Check if active output
663     if (vec == CEED_VECTOR_ACTIVE) {
664       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
665                          NULL); CeedChk(ierr);
666     }
667   }
668 
669   // Restore input arrays
670   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
671          opinputfields, true, impl); CeedChk(ierr);
672 
673   // Output blocked restriction
674   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
675   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
676   CeedElemRestriction blkrstr;
677   ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize,
678                                           numelements*Q,
679                                           numactivein*numactiveout,
680                                           CEED_MEM_HOST, CEED_COPY_VALUES,
681                                           NULL, &blkrstr); CeedChk(ierr);
682   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE,
683                                   lvec, *assembled, request); CeedChk(ierr);
684 
685   // Cleanup
686   for (CeedInt i=0; i<numactivein; i++) {
687     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
688   }
689   ierr = CeedFree(&activein); CeedChk(ierr);
690   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
691   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
692 
693   return 0;
694 }
695 
696 int CeedOperatorCreate_Blocked(CeedOperator op) {
697   int ierr;
698   Ceed ceed;
699   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
700   CeedOperator_Blocked *impl;
701 
702   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
703   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
704 
705 
706   ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
707                                 CeedOperatorAssembleLinearQFunction_Blocked);
708   CeedChk(ierr);
709   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply",
710                                 CeedOperatorApply_Blocked); CeedChk(ierr);
711   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
712                                 CeedOperatorDestroy_Blocked); CeedChk(ierr);
713   return 0;
714 }
715