xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision 014ec18d8ea934e4fc0f69a8c06ffce36a53ff2a)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-blocked.h"
18 #include "../ref/ceed-ref.h"
19 
20 //------------------------------------------------------------------------------
21 // Setup Input/Output Fields
22 //------------------------------------------------------------------------------
23 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
24     CeedOperator op, bool inOrOut,
25     CeedElemRestriction *blkrestr,
26     CeedVector *fullevecs, CeedVector *evecs,
27     CeedVector *qvecs, CeedInt starte,
28     CeedInt numfields, CeedInt Q) {
29   CeedInt dim, ierr, ncomp, size, P;
30   Ceed ceed;
31   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
32   CeedBasis basis;
33   CeedElemRestriction r;
34   CeedOperatorField *opfields;
35   CeedQFunctionField *qffields;
36   if (inOrOut) {
37     ierr = CeedOperatorGetFields(op, NULL, &opfields);
38     CeedChk(ierr);
39     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
40     CeedChk(ierr);
41   } else {
42     ierr = CeedOperatorGetFields(op, &opfields, NULL);
43     CeedChk(ierr);
44     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
45     CeedChk(ierr);
46   }
47   const CeedInt blksize = 8;
48 
49   // Loop over fields
50   for (CeedInt i=0; i<numfields; i++) {
51     CeedEvalMode emode;
52     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
53 
54     if (emode != CEED_EVAL_WEIGHT) {
55       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
56       CeedChk(ierr);
57       CeedElemRestriction_Ref *data;
58       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
59       Ceed ceed;
60       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
61       CeedInt nelem, elemsize, lsize, compstride;
62       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
63       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
64       ierr = CeedElemRestrictionGetLVectorSize(r, &lsize); CeedChk(ierr);
65       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
66       if (data->offsets) {
67         ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr);
68         ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
69                                                 blksize, ncomp, compstride,
70                                                 lsize, CEED_MEM_HOST,
71                                                 CEED_COPY_VALUES, data->offsets,
72                                                 &blkrestr[i+starte]);
73         CeedChk(ierr);
74       } else {
75         CeedInt strides[3];
76         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr);
77         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, nelem, elemsize,
78                blksize, ncomp, lsize, strides, &blkrestr[i+starte]);
79         CeedChk(ierr);
80       }
81       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
82                                              &fullevecs[i+starte]);
83       CeedChk(ierr);
84     }
85 
86     switch(emode) {
87     case CEED_EVAL_NONE:
88       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
89       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
90       break;
91     case CEED_EVAL_INTERP:
92       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
93       ierr = CeedElemRestrictionGetElementSize(r, &P);
94       CeedChk(ierr);
95       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
96       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
97       break;
98     case CEED_EVAL_GRAD:
99       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
100       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
101       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
102       ierr = CeedElemRestrictionGetElementSize(r, &P);
103       CeedChk(ierr);
104       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
105       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
106       break;
107     case CEED_EVAL_WEIGHT: // Only on input fields
108       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
109       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
110       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
111                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]);
112       CeedChk(ierr);
113 
114       break;
115     case CEED_EVAL_DIV:
116       break; // Not implemented
117     case CEED_EVAL_CURL:
118       break; // Not implemented
119     }
120   }
121   return 0;
122 }
123 
124 //------------------------------------------------------------------------------
125 // Setup Operator
126 //------------------------------------------------------------------------------
127 static int CeedOperatorSetup_Blocked(CeedOperator op) {
128   int ierr;
129   bool setupdone;
130   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
131   if (setupdone) return 0;
132   Ceed ceed;
133   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
134   CeedOperator_Blocked *impl;
135   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
136   CeedQFunction qf;
137   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
138   CeedInt Q, numinputfields, numoutputfields;
139   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
140   ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr);
141   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
142   CeedChk(ierr);
143   CeedOperatorField *opinputfields, *opoutputfields;
144   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
145   CeedChk(ierr);
146   CeedQFunctionField *qfinputfields, *qfoutputfields;
147   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
148   CeedChk(ierr);
149 
150   // Allocate
151   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
152   CeedChk(ierr);
153   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
154   CeedChk(ierr);
155   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
156   CeedChk(ierr);
157 
158   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
159   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
160   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
161   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
162   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
163 
164   impl->numein = numinputfields; impl->numeout = numoutputfields;
165 
166   // Set up infield and outfield pointer arrays
167   // Infields
168   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr,
169                                          impl->evecs, impl->evecsin,
170                                          impl->qvecsin, 0,
171                                          numinputfields, Q);
172   CeedChk(ierr);
173   // Outfields
174   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr,
175                                          impl->evecs, impl->evecsout,
176                                          impl->qvecsout, numinputfields,
177                                          numoutputfields, Q);
178   CeedChk(ierr);
179 
180   // Identity QFunctions
181   if (impl->identityqf) {
182     CeedEvalMode inmode, outmode;
183     CeedQFunctionField *infields, *outfields;
184     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
185 
186     for (CeedInt i=0; i<numinputfields; i++) {
187       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
188       CeedChk(ierr);
189       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
190       CeedChk(ierr);
191 
192       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
193       impl->qvecsout[i] = impl->qvecsin[i];
194       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr);
195     }
196   }
197 
198   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
199 
200   return 0;
201 }
202 
203 //------------------------------------------------------------------------------
204 // Setup Operator Inputs
205 //------------------------------------------------------------------------------
206 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields,
207     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
208     CeedVector invec, bool skipactive, CeedOperator_Blocked *impl,
209     CeedRequest *request) {
210   CeedInt ierr;
211   CeedEvalMode emode;
212   CeedVector vec;
213   uint64_t state;
214 
215   for (CeedInt i=0; i<numinputfields; i++) {
216     // Get input vector
217     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
218     if (vec == CEED_VECTOR_ACTIVE) {
219       if (skipactive)
220         continue;
221       else
222         vec = invec;
223     }
224 
225     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
226     CeedChk(ierr);
227     if (emode == CEED_EVAL_WEIGHT) { // Skip
228     } else {
229       // Restrict
230       ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
231       if (state != impl->inputstate[i] || vec == invec) {
232         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
233                                         vec, impl->evecs[i], request);
234         CeedChk(ierr);
235         impl->inputstate[i] = state;
236       }
237       // Get evec
238       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
239                                     (const CeedScalar **) &impl->edata[i]);
240       CeedChk(ierr);
241     }
242   }
243   return 0;
244 }
245 
246 //------------------------------------------------------------------------------
247 // Input Basis Action
248 //------------------------------------------------------------------------------
249 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
250     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
251     CeedInt numinputfields, CeedInt blksize, bool skipactive,
252     CeedOperator_Blocked *impl) {
253   CeedInt ierr;
254   CeedInt dim, elemsize, size;
255   CeedElemRestriction Erestrict;
256   CeedEvalMode emode;
257   CeedBasis basis;
258 
259   for (CeedInt i=0; i<numinputfields; i++) {
260     // Skip active input
261     if (skipactive) {
262       CeedVector vec;
263       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
264       if (vec == CEED_VECTOR_ACTIVE)
265         continue;
266     }
267 
268     // Get elemsize, emode, size
269     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
270     CeedChk(ierr);
271     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
272     CeedChk(ierr);
273     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
274     CeedChk(ierr);
275     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
276     // Basis action
277     switch(emode) {
278     case CEED_EVAL_NONE:
279       ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
280                                 CEED_USE_POINTER,
281                                 &impl->edata[i][e*Q*size]); CeedChk(ierr);
282       break;
283     case CEED_EVAL_INTERP:
284       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
285       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
286                                 CEED_USE_POINTER,
287                                 &impl->edata[i][e*elemsize*size]);
288       CeedChk(ierr);
289       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
290                             CEED_EVAL_INTERP, impl->evecsin[i],
291                             impl->qvecsin[i]); CeedChk(ierr);
292       break;
293     case CEED_EVAL_GRAD:
294       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
295       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
296       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
297                                 CEED_USE_POINTER,
298                                 &impl->edata[i][e*elemsize*size/dim]);
299       CeedChk(ierr);
300       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
301                             CEED_EVAL_GRAD, impl->evecsin[i],
302                             impl->qvecsin[i]); CeedChk(ierr);
303       break;
304     case CEED_EVAL_WEIGHT:
305       break;  // No action
306     // LCOV_EXCL_START
307     case CEED_EVAL_DIV:
308     case CEED_EVAL_CURL: {
309       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
310       CeedChk(ierr);
311       Ceed ceed;
312       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
313       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
314       // LCOV_EXCL_STOP
315     }
316     }
317   }
318   return 0;
319 }
320 
321 //------------------------------------------------------------------------------
322 // Output Basis Action
323 //------------------------------------------------------------------------------
324 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
325     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
326     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
327     CeedOperator op, CeedOperator_Blocked *impl) {
328   CeedInt ierr;
329   CeedInt dim, elemsize, size;
330   CeedElemRestriction Erestrict;
331   CeedEvalMode emode;
332   CeedBasis basis;
333 
334   for (CeedInt i=0; i<numoutputfields; i++) {
335     // Get elemsize, emode, size
336     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
337     CeedChk(ierr);
338     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
339     CeedChk(ierr);
340     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
341     CeedChk(ierr);
342     ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
343     // Basis action
344     switch(emode) {
345     case CEED_EVAL_NONE:
346       break; // No action
347     case CEED_EVAL_INTERP:
348       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
349       CeedChk(ierr);
350       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
351                                 CEED_USE_POINTER,
352                                 &impl->edata[i + numinputfields][e*elemsize*size]);
353       CeedChk(ierr);
354       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
355                             CEED_EVAL_INTERP, impl->qvecsout[i],
356                             impl->evecsout[i]); CeedChk(ierr);
357       break;
358     case CEED_EVAL_GRAD:
359       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
360       CeedChk(ierr);
361       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
362       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
363                                 CEED_USE_POINTER,
364                                 &impl->edata[i + numinputfields][e*elemsize*size/dim]);
365       CeedChk(ierr);
366       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
367                             CEED_EVAL_GRAD, impl->qvecsout[i],
368                             impl->evecsout[i]); CeedChk(ierr);
369       break;
370     // LCOV_EXCL_START
371     case CEED_EVAL_WEIGHT: {
372       Ceed ceed;
373       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
374       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
375                        "evaluation mode");
376     }
377     case CEED_EVAL_DIV:
378     case CEED_EVAL_CURL: {
379       Ceed ceed;
380       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
381       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
382       // LCOV_EXCL_STOP
383     }
384     }
385   }
386   return 0;
387 }
388 
389 //------------------------------------------------------------------------------
390 // Restore Input Vectors
391 //------------------------------------------------------------------------------
392 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields,
393     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
394     bool skipactive, CeedOperator_Blocked *impl) {
395   CeedInt ierr;
396   CeedEvalMode emode;
397 
398   for (CeedInt i=0; i<numinputfields; i++) {
399     // Skip active inputs
400     if (skipactive) {
401       CeedVector vec;
402       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
403       if (vec == CEED_VECTOR_ACTIVE)
404         continue;
405     }
406     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
407     CeedChk(ierr);
408     if (emode == CEED_EVAL_WEIGHT) { // Skip
409     } else {
410       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
411                                         (const CeedScalar **) &impl->edata[i]);
412       CeedChk(ierr);
413     }
414   }
415   return 0;
416 }
417 
418 //------------------------------------------------------------------------------
419 // Operator Apply
420 //------------------------------------------------------------------------------
421 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec,
422                                      CeedVector outvec,
423                                      CeedRequest *request) {
424   int ierr;
425   CeedOperator_Blocked *impl;
426   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
427   const CeedInt blksize = 8;
428   CeedInt Q, numinputfields, numoutputfields, numelements, size;
429   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
430   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
431   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
432   CeedQFunction qf;
433   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
434   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
435   CeedChk(ierr);
436   CeedOperatorField *opinputfields, *opoutputfields;
437   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
438   CeedChk(ierr);
439   CeedQFunctionField *qfinputfields, *qfoutputfields;
440   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
441   CeedChk(ierr);
442   CeedEvalMode emode;
443   CeedVector vec;
444 
445   // Setup
446   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
447 
448   // Input Evecs and Restriction
449   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
450                                          opinputfields, invec, false, impl,
451                                          request); CeedChk(ierr);
452 
453   // Output Evecs
454   for (CeedInt i=0; i<numoutputfields; i++) {
455     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
456                               &impl->edata[i + numinputfields]); CeedChk(ierr);
457   }
458 
459   // Loop through elements
460   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
461     // Output pointers
462     for (CeedInt i=0; i<numoutputfields; i++) {
463       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
464       CeedChk(ierr);
465       if (emode == CEED_EVAL_NONE) {
466         ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
467         CeedChk(ierr);
468         ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
469                                   CEED_USE_POINTER,
470                                   &impl->edata[i + numinputfields][e*Q*size]);
471         CeedChk(ierr);
472       }
473     }
474 
475     // Input basis apply
476     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
477                                           numinputfields, blksize, false, impl);
478     CeedChk(ierr);
479 
480     // Q function
481     if (!impl->identityqf) {
482       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
483       CeedChk(ierr);
484     }
485 
486     // Output basis apply
487     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields,
488                                            blksize, numinputfields,
489                                            numoutputfields, op, impl);
490     CeedChk(ierr);
491   }
492 
493   // Output restriction
494   for (CeedInt i=0; i<numoutputfields; i++) {
495     // Restore evec
496     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
497                                   &impl->edata[i + numinputfields]); CeedChk(ierr);
498     // Get output vector
499     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
500     // Active
501     if (vec == CEED_VECTOR_ACTIVE)
502       vec = outvec;
503     // Restrict
504     ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein],
505                                     CEED_TRANSPOSE, impl->evecs[i+impl->numein],
506                                     vec, request); CeedChk(ierr);
507 
508   }
509 
510   // Restore input arrays
511   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
512          opinputfields, false, impl); CeedChk(ierr);
513 
514   return 0;
515 }
516 
517 //------------------------------------------------------------------------------
518 // Assemble Linear QFunction
519 //------------------------------------------------------------------------------
520 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op,
521     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
522   int ierr;
523   CeedOperator_Blocked *impl;
524   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
525   const CeedInt blksize = 8;
526   CeedInt Q, numinputfields, numoutputfields, numelements, size;
527   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
528   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
529   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
530   CeedQFunction qf;
531   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
532   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
533   CeedChk(ierr);
534   CeedOperatorField *opinputfields, *opoutputfields;
535   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
536   CeedChk(ierr);
537   CeedQFunctionField *qfinputfields, *qfoutputfields;
538   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
539   CeedChk(ierr);
540   CeedVector vec, lvec;
541   CeedInt numactivein = 0, numactiveout = 0;
542   CeedVector *activein = NULL;
543   CeedScalar *a, *tmp;
544   Ceed ceed;
545   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
546 
547   // Setup
548   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
549 
550   // Check for identity
551   if (impl->identityqf)
552     // LCOV_EXCL_START
553     return CeedError(ceed, 1, "Assembling identity qfunctions not supported");
554   // LCOV_EXCL_STOP
555 
556   // Input Evecs and Restriction
557   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
558                                          opinputfields, NULL, true, impl,
559                                          request); CeedChk(ierr);
560 
561   // Count number of active input fields
562   for (CeedInt i=0; i<numinputfields; i++) {
563     // Get input vector
564     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
565     // Check if active input
566     if (vec == CEED_VECTOR_ACTIVE) {
567       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
568       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
569       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
570       CeedChk(ierr);
571       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
572       for (CeedInt field=0; field<size; field++) {
573         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
574         CeedChk(ierr);
575         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
576                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
577         CeedChk(ierr);
578       }
579       numactivein += size;
580       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
581     }
582   }
583 
584   // Count number of active output fields
585   for (CeedInt i=0; i<numoutputfields; i++) {
586     // Get output vector
587     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
588     // Check if active output
589     if (vec == CEED_VECTOR_ACTIVE) {
590       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
591       numactiveout += size;
592     }
593   }
594 
595   // Check sizes
596   if (!numactivein || !numactiveout)
597     // LCOV_EXCL_START
598     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
599                      "and outputs");
600   // LCOV_EXCL_STOP
601 
602   // Setup lvec
603   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
604                           &lvec); CeedChk(ierr);
605   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
606 
607   // Create output restriction
608   CeedInt strides[3] = {1, Q, numactivein *numactiveout*Q};
609   ierr = CeedElemRestrictionCreateStrided(ceed, numelements, Q,
610                                           numactivein*numactiveout,
611                                           numactivein*numactiveout*numelements*Q,
612                                           strides, rstr); CeedChk(ierr);
613   // Create assembled vector
614   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
615                           assembled); CeedChk(ierr);
616 
617   // Loop through elements
618   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
619     // Input basis apply
620     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
621                                           numinputfields, blksize, true, impl);
622     CeedChk(ierr);
623 
624     // Assemble QFunction
625     for (CeedInt in=0; in<numactivein; in++) {
626       // Set Inputs
627       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
628       if (numactivein > 1) {
629         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
630                                   0.0); CeedChk(ierr);
631       }
632       // Set Outputs
633       for (CeedInt out=0; out<numoutputfields; out++) {
634         // Get output vector
635         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
636         CeedChk(ierr);
637         // Check if active output
638         if (vec == CEED_VECTOR_ACTIVE) {
639           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
640                              CEED_USE_POINTER, a); CeedChk(ierr);
641           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
642           CeedChk(ierr);
643           a += size*Q*blksize; // Advance the pointer by the size of the output
644         }
645       }
646       // Apply QFunction
647       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
648       CeedChk(ierr);
649     }
650   }
651 
652   // Un-set output Qvecs to prevent accidental overwrite of Assembled
653   for (CeedInt out=0; out<numoutputfields; out++) {
654     // Get output vector
655     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
656     CeedChk(ierr);
657     // Check if active output
658     if (vec == CEED_VECTOR_ACTIVE) {
659       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
660                          NULL); CeedChk(ierr);
661     }
662   }
663 
664   // Restore input arrays
665   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
666          opinputfields, true, impl); CeedChk(ierr);
667 
668   // Output blocked restriction
669   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
670   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
671   CeedElemRestriction blkrstr;
672   ierr = CeedElemRestrictionCreateBlockedStrided(ceed, numelements, Q, blksize,
673          numactivein*numactiveout, numactivein*numactiveout*numelements*Q,
674          strides, &blkrstr); CeedChk(ierr);
675   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled,
676                                   request); CeedChk(ierr);
677 
678   // Cleanup
679   for (CeedInt i=0; i<numactivein; i++) {
680     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
681   }
682   ierr = CeedFree(&activein); CeedChk(ierr);
683   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
684   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
685 
686   return 0;
687 }
688 
689 //------------------------------------------------------------------------------
690 // Operator Destroy
691 //------------------------------------------------------------------------------
692 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
693   int ierr;
694   CeedOperator_Blocked *impl;
695   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
696 
697   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
698     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
699     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
700   }
701   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
702   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
703   ierr = CeedFree(&impl->edata); CeedChk(ierr);
704   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
705 
706   for (CeedInt i=0; i<impl->numein; i++) {
707     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
708     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
709   }
710   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
711   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
712 
713   for (CeedInt i=0; i<impl->numeout; i++) {
714     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
715     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
716   }
717   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
718   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
719 
720   ierr = CeedFree(&impl); CeedChk(ierr);
721   return 0;
722 }
723 
724 //------------------------------------------------------------------------------
725 // Operator Create
726 //------------------------------------------------------------------------------
727 int CeedOperatorCreate_Blocked(CeedOperator op) {
728   int ierr;
729   Ceed ceed;
730   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
731   CeedOperator_Blocked *impl;
732 
733   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
734   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
735 
736   ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
737                                 CeedOperatorAssembleLinearQFunction_Blocked);
738   CeedChk(ierr);
739   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
740                                 CeedOperatorApply_Blocked); CeedChk(ierr);
741   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
742                                 CeedOperatorDestroy_Blocked); CeedChk(ierr);
743   return 0;
744 }
745 //------------------------------------------------------------------------------
746