xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision bd4df46207a63b48a485389a63848eb7a2477f6b)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed.h>
18 #include <ceed-backend.h>
19 #include <stdbool.h>
20 #include <stddef.h>
21 #include <stdint.h>
22 #include "ceed-blocked.h"
23 
24 //------------------------------------------------------------------------------
25 // Setup Input/Output Fields
26 //------------------------------------------------------------------------------
27 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
28     CeedOperator op, bool inOrOut,
29     CeedElemRestriction *blkrestr,
30     CeedVector *fullevecs, CeedVector *evecs,
31     CeedVector *qvecs, CeedInt starte,
32     CeedInt numfields, CeedInt Q) {
33   CeedInt dim, ierr, ncomp, size, P;
34   Ceed ceed;
35   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
36   CeedBasis basis;
37   CeedElemRestriction r;
38   CeedOperatorField *opfields;
39   CeedQFunctionField *qffields;
40   if (inOrOut) {
41     ierr = CeedOperatorGetFields(op, NULL, &opfields);
42     CeedChk(ierr);
43     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
44     CeedChk(ierr);
45   } else {
46     ierr = CeedOperatorGetFields(op, &opfields, NULL);
47     CeedChk(ierr);
48     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
49     CeedChk(ierr);
50   }
51   const CeedInt blksize = 8;
52 
53   // Loop over fields
54   for (CeedInt i=0; i<numfields; i++) {
55     CeedEvalMode emode;
56     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
57 
58     if (emode != CEED_EVAL_WEIGHT) {
59       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
60       CeedChk(ierr);
61       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
62       CeedInt nelem, elemsize, lsize, compstride;
63       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
64       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
65       ierr = CeedElemRestrictionGetLVectorSize(r, &lsize); CeedChk(ierr);
66       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
67 
68       bool strided;
69       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChk(ierr);
70       if (strided) {
71         CeedInt strides[3];
72         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr);
73         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, nelem, elemsize,
74                blksize, ncomp, lsize, strides, &blkrestr[i+starte]);
75         CeedChk(ierr);
76       } else {
77         const CeedInt *offsets = NULL;
78         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
79         CeedChk(ierr);
80         ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr);
81         ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
82                                                 blksize, ncomp, compstride,
83                                                 lsize, CEED_MEM_HOST,
84                                                 CEED_COPY_VALUES, offsets,
85                                                 &blkrestr[i+starte]);
86         CeedChk(ierr);
87         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChk(ierr);
88       }
89       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
90                                              &fullevecs[i+starte]);
91       CeedChk(ierr);
92     }
93 
94     switch(emode) {
95     case CEED_EVAL_NONE:
96       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
97       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
98       break;
99     case CEED_EVAL_INTERP:
100       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
101       ierr = CeedElemRestrictionGetElementSize(r, &P);
102       CeedChk(ierr);
103       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
104       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
105       break;
106     case CEED_EVAL_GRAD:
107       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
108       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
109       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
110       ierr = CeedElemRestrictionGetElementSize(r, &P);
111       CeedChk(ierr);
112       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
113       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
114       break;
115     case CEED_EVAL_WEIGHT: // Only on input fields
116       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
117       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
118       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
119                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]);
120       CeedChk(ierr);
121 
122       break;
123     case CEED_EVAL_DIV:
124       break; // Not implemented
125     case CEED_EVAL_CURL:
126       break; // Not implemented
127     }
128   }
129   return 0;
130 }
131 
132 //------------------------------------------------------------------------------
133 // Setup Operator
134 //------------------------------------------------------------------------------
135 static int CeedOperatorSetup_Blocked(CeedOperator op) {
136   int ierr;
137   bool setupdone;
138   ierr = CeedOperatorIsSetupDone(op, &setupdone); CeedChk(ierr);
139   if (setupdone) return 0;
140   Ceed ceed;
141   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
142   CeedOperator_Blocked *impl;
143   ierr = CeedOperatorGetData(op, &impl); CeedChk(ierr);
144   CeedQFunction qf;
145   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
146   CeedInt Q, numinputfields, numoutputfields;
147   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
148   ierr = CeedQFunctionIsIdentity(qf, &impl->identityqf); CeedChk(ierr);
149   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
150   CeedChk(ierr);
151   CeedOperatorField *opinputfields, *opoutputfields;
152   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
153   CeedChk(ierr);
154   CeedQFunctionField *qfinputfields, *qfoutputfields;
155   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
156   CeedChk(ierr);
157 
158   // Allocate
159   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
160   CeedChk(ierr);
161   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
162   CeedChk(ierr);
163   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
164   CeedChk(ierr);
165 
166   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
167   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
168   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
169   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
170   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
171 
172   impl->numein = numinputfields; impl->numeout = numoutputfields;
173 
174   // Set up infield and outfield pointer arrays
175   // Infields
176   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr,
177                                          impl->evecs, impl->evecsin,
178                                          impl->qvecsin, 0,
179                                          numinputfields, Q);
180   CeedChk(ierr);
181   // Outfields
182   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr,
183                                          impl->evecs, impl->evecsout,
184                                          impl->qvecsout, numinputfields,
185                                          numoutputfields, Q);
186   CeedChk(ierr);
187 
188   // Identity QFunctions
189   if (impl->identityqf) {
190     CeedEvalMode inmode, outmode;
191     CeedQFunctionField *infields, *outfields;
192     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
193 
194     for (CeedInt i=0; i<numinputfields; i++) {
195       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
196       CeedChk(ierr);
197       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
198       CeedChk(ierr);
199 
200       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
201       impl->qvecsout[i] = impl->qvecsin[i];
202       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr);
203     }
204   }
205 
206   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
207 
208   return 0;
209 }
210 
211 //------------------------------------------------------------------------------
212 // Setup Operator Inputs
213 //------------------------------------------------------------------------------
214 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields,
215     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
216     CeedVector invec, bool skipactive, CeedOperator_Blocked *impl,
217     CeedRequest *request) {
218   CeedInt ierr;
219   CeedEvalMode emode;
220   CeedVector vec;
221   uint64_t state;
222 
223   for (CeedInt i=0; i<numinputfields; i++) {
224     // Get input vector
225     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
226     if (vec == CEED_VECTOR_ACTIVE) {
227       if (skipactive)
228         continue;
229       else
230         vec = invec;
231     }
232 
233     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
234     CeedChk(ierr);
235     if (emode == CEED_EVAL_WEIGHT) { // Skip
236     } else {
237       // Restrict
238       ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
239       if (state != impl->inputstate[i] || vec == invec) {
240         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
241                                         vec, impl->evecs[i], request);
242         CeedChk(ierr);
243         impl->inputstate[i] = state;
244       }
245       // Get evec
246       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
247                                     (const CeedScalar **) &impl->edata[i]);
248       CeedChk(ierr);
249     }
250   }
251   return 0;
252 }
253 
254 //------------------------------------------------------------------------------
255 // Input Basis Action
256 //------------------------------------------------------------------------------
257 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
258     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
259     CeedInt numinputfields, CeedInt blksize, bool skipactive,
260     CeedOperator_Blocked *impl) {
261   CeedInt ierr;
262   CeedInt dim, elemsize, size;
263   CeedElemRestriction Erestrict;
264   CeedEvalMode emode;
265   CeedBasis basis;
266 
267   for (CeedInt i=0; i<numinputfields; i++) {
268     // Skip active input
269     if (skipactive) {
270       CeedVector vec;
271       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
272       if (vec == CEED_VECTOR_ACTIVE)
273         continue;
274     }
275 
276     // Get elemsize, emode, size
277     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
278     CeedChk(ierr);
279     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
280     CeedChk(ierr);
281     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
282     CeedChk(ierr);
283     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
284     // Basis action
285     switch(emode) {
286     case CEED_EVAL_NONE:
287       ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
288                                 CEED_USE_POINTER,
289                                 &impl->edata[i][e*Q*size]); CeedChk(ierr);
290       break;
291     case CEED_EVAL_INTERP:
292       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
293       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
294                                 CEED_USE_POINTER,
295                                 &impl->edata[i][e*elemsize*size]);
296       CeedChk(ierr);
297       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
298                             CEED_EVAL_INTERP, impl->evecsin[i],
299                             impl->qvecsin[i]); CeedChk(ierr);
300       break;
301     case CEED_EVAL_GRAD:
302       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
303       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
304       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
305                                 CEED_USE_POINTER,
306                                 &impl->edata[i][e*elemsize*size/dim]);
307       CeedChk(ierr);
308       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
309                             CEED_EVAL_GRAD, impl->evecsin[i],
310                             impl->qvecsin[i]); CeedChk(ierr);
311       break;
312     case CEED_EVAL_WEIGHT:
313       break;  // No action
314     // LCOV_EXCL_START
315     case CEED_EVAL_DIV:
316     case CEED_EVAL_CURL: {
317       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
318       CeedChk(ierr);
319       Ceed ceed;
320       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
321       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
322       // LCOV_EXCL_STOP
323     }
324     }
325   }
326   return 0;
327 }
328 
329 //------------------------------------------------------------------------------
330 // Output Basis Action
331 //------------------------------------------------------------------------------
332 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
333     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
334     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
335     CeedOperator op, CeedOperator_Blocked *impl) {
336   CeedInt ierr;
337   CeedInt dim, elemsize, size;
338   CeedElemRestriction Erestrict;
339   CeedEvalMode emode;
340   CeedBasis basis;
341 
342   for (CeedInt i=0; i<numoutputfields; i++) {
343     // Get elemsize, emode, size
344     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
345     CeedChk(ierr);
346     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
347     CeedChk(ierr);
348     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
349     CeedChk(ierr);
350     ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
351     // Basis action
352     switch(emode) {
353     case CEED_EVAL_NONE:
354       break; // No action
355     case CEED_EVAL_INTERP:
356       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
357       CeedChk(ierr);
358       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
359                                 CEED_USE_POINTER,
360                                 &impl->edata[i + numinputfields][e*elemsize*size]);
361       CeedChk(ierr);
362       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
363                             CEED_EVAL_INTERP, impl->qvecsout[i],
364                             impl->evecsout[i]); CeedChk(ierr);
365       break;
366     case CEED_EVAL_GRAD:
367       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
368       CeedChk(ierr);
369       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
370       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
371                                 CEED_USE_POINTER,
372                                 &impl->edata[i + numinputfields][e*elemsize*size/dim]);
373       CeedChk(ierr);
374       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
375                             CEED_EVAL_GRAD, impl->qvecsout[i],
376                             impl->evecsout[i]); CeedChk(ierr);
377       break;
378     // LCOV_EXCL_START
379     case CEED_EVAL_WEIGHT: {
380       Ceed ceed;
381       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
382       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
383                        "evaluation mode");
384     }
385     case CEED_EVAL_DIV:
386     case CEED_EVAL_CURL: {
387       Ceed ceed;
388       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
389       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
390       // LCOV_EXCL_STOP
391     }
392     }
393   }
394   return 0;
395 }
396 
397 //------------------------------------------------------------------------------
398 // Restore Input Vectors
399 //------------------------------------------------------------------------------
400 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields,
401     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
402     bool skipactive, CeedOperator_Blocked *impl) {
403   CeedInt ierr;
404   CeedEvalMode emode;
405 
406   for (CeedInt i=0; i<numinputfields; i++) {
407     // Skip active inputs
408     if (skipactive) {
409       CeedVector vec;
410       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
411       if (vec == CEED_VECTOR_ACTIVE)
412         continue;
413     }
414     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
415     CeedChk(ierr);
416     if (emode == CEED_EVAL_WEIGHT) { // Skip
417     } else {
418       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
419                                         (const CeedScalar **) &impl->edata[i]);
420       CeedChk(ierr);
421     }
422   }
423   return 0;
424 }
425 
426 //------------------------------------------------------------------------------
427 // Operator Apply
428 //------------------------------------------------------------------------------
429 static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector invec,
430                                         CeedVector outvec,
431                                         CeedRequest *request) {
432   int ierr;
433   CeedOperator_Blocked *impl;
434   ierr = CeedOperatorGetData(op, &impl); CeedChk(ierr);
435   const CeedInt blksize = 8;
436   CeedInt Q, numinputfields, numoutputfields, numelements, size;
437   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
438   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
439   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
440   CeedQFunction qf;
441   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
442   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
443   CeedChk(ierr);
444   CeedOperatorField *opinputfields, *opoutputfields;
445   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
446   CeedChk(ierr);
447   CeedQFunctionField *qfinputfields, *qfoutputfields;
448   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
449   CeedChk(ierr);
450   CeedEvalMode emode;
451   CeedVector vec;
452 
453   // Setup
454   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
455 
456   // Input Evecs and Restriction
457   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
458                                          opinputfields, invec, false, impl,
459                                          request); CeedChk(ierr);
460 
461   // Output Evecs
462   for (CeedInt i=0; i<numoutputfields; i++) {
463     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
464                               &impl->edata[i + numinputfields]); CeedChk(ierr);
465   }
466 
467   // Loop through elements
468   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
469     // Output pointers
470     for (CeedInt i=0; i<numoutputfields; i++) {
471       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
472       CeedChk(ierr);
473       if (emode == CEED_EVAL_NONE) {
474         ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
475         CeedChk(ierr);
476         ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
477                                   CEED_USE_POINTER,
478                                   &impl->edata[i + numinputfields][e*Q*size]);
479         CeedChk(ierr);
480       }
481     }
482 
483     // Input basis apply
484     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
485                                           numinputfields, blksize, false, impl);
486     CeedChk(ierr);
487 
488     // Q function
489     if (!impl->identityqf) {
490       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
491       CeedChk(ierr);
492     }
493 
494     // Output basis apply
495     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields,
496                                            blksize, numinputfields,
497                                            numoutputfields, op, impl);
498     CeedChk(ierr);
499   }
500 
501   // Output restriction
502   for (CeedInt i=0; i<numoutputfields; i++) {
503     // Restore evec
504     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
505                                   &impl->edata[i + numinputfields]); CeedChk(ierr);
506     // Get output vector
507     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
508     // Active
509     if (vec == CEED_VECTOR_ACTIVE)
510       vec = outvec;
511     // Restrict
512     ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein],
513                                     CEED_TRANSPOSE, impl->evecs[i+impl->numein],
514                                     vec, request); CeedChk(ierr);
515 
516   }
517 
518   // Restore input arrays
519   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
520          opinputfields, false, impl); CeedChk(ierr);
521 
522   return 0;
523 }
524 
525 //------------------------------------------------------------------------------
526 // Assemble Linear QFunction
527 //------------------------------------------------------------------------------
528 static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op,
529     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
530   int ierr;
531   CeedOperator_Blocked *impl;
532   ierr = CeedOperatorGetData(op, &impl); CeedChk(ierr);
533   const CeedInt blksize = 8;
534   CeedInt Q, numinputfields, numoutputfields, numelements, size;
535   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
536   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
537   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
538   CeedQFunction qf;
539   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
540   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
541   CeedChk(ierr);
542   CeedOperatorField *opinputfields, *opoutputfields;
543   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
544   CeedChk(ierr);
545   CeedQFunctionField *qfinputfields, *qfoutputfields;
546   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
547   CeedChk(ierr);
548   CeedVector vec, lvec;
549   CeedInt numactivein = 0, numactiveout = 0;
550   CeedVector *activein = NULL;
551   CeedScalar *a, *tmp;
552   Ceed ceed;
553   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
554 
555   // Setup
556   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
557 
558   // Check for identity
559   if (impl->identityqf)
560     // LCOV_EXCL_START
561     return CeedError(ceed, 1, "Assembling identity qfunctions not supported");
562   // LCOV_EXCL_STOP
563 
564   // Input Evecs and Restriction
565   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
566                                          opinputfields, NULL, true, impl,
567                                          request); CeedChk(ierr);
568 
569   // Count number of active input fields
570   for (CeedInt i=0; i<numinputfields; i++) {
571     // Get input vector
572     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
573     // Check if active input
574     if (vec == CEED_VECTOR_ACTIVE) {
575       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
576       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
577       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
578       CeedChk(ierr);
579       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
580       for (CeedInt field=0; field<size; field++) {
581         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
582         CeedChk(ierr);
583         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
584                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
585         CeedChk(ierr);
586       }
587       numactivein += size;
588       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
589     }
590   }
591 
592   // Count number of active output fields
593   for (CeedInt i=0; i<numoutputfields; i++) {
594     // Get output vector
595     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
596     // Check if active output
597     if (vec == CEED_VECTOR_ACTIVE) {
598       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
599       numactiveout += size;
600     }
601   }
602 
603   // Check sizes
604   if (!numactivein || !numactiveout)
605     // LCOV_EXCL_START
606     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
607                      "and outputs");
608   // LCOV_EXCL_STOP
609 
610   // Setup lvec
611   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
612                           &lvec); CeedChk(ierr);
613   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
614 
615   // Create output restriction
616   CeedInt strides[3] = {1, Q, numactivein *numactiveout*Q};
617   ierr = CeedElemRestrictionCreateStrided(ceed, numelements, Q,
618                                           numactivein*numactiveout,
619                                           numactivein*numactiveout*numelements*Q,
620                                           strides, rstr); CeedChk(ierr);
621   // Create assembled vector
622   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
623                           assembled); CeedChk(ierr);
624 
625   // Loop through elements
626   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
627     // Input basis apply
628     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
629                                           numinputfields, blksize, true, impl);
630     CeedChk(ierr);
631 
632     // Assemble QFunction
633     for (CeedInt in=0; in<numactivein; in++) {
634       // Set Inputs
635       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
636       if (numactivein > 1) {
637         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
638                                   0.0); CeedChk(ierr);
639       }
640       // Set Outputs
641       for (CeedInt out=0; out<numoutputfields; out++) {
642         // Get output vector
643         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
644         CeedChk(ierr);
645         // Check if active output
646         if (vec == CEED_VECTOR_ACTIVE) {
647           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
648                              CEED_USE_POINTER, a); CeedChk(ierr);
649           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
650           CeedChk(ierr);
651           a += size*Q*blksize; // Advance the pointer by the size of the output
652         }
653       }
654       // Apply QFunction
655       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
656       CeedChk(ierr);
657     }
658   }
659 
660   // Un-set output Qvecs to prevent accidental overwrite of Assembled
661   for (CeedInt out=0; out<numoutputfields; out++) {
662     // Get output vector
663     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
664     CeedChk(ierr);
665     // Check if active output
666     if (vec == CEED_VECTOR_ACTIVE) {
667       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
668                          NULL); CeedChk(ierr);
669     }
670   }
671 
672   // Restore input arrays
673   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
674          opinputfields, true, impl); CeedChk(ierr);
675 
676   // Output blocked restriction
677   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
678   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
679   CeedElemRestriction blkrstr;
680   ierr = CeedElemRestrictionCreateBlockedStrided(ceed, numelements, Q, blksize,
681          numactivein*numactiveout, numactivein*numactiveout*numelements*Q,
682          strides, &blkrstr); CeedChk(ierr);
683   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled,
684                                   request); CeedChk(ierr);
685 
686   // Cleanup
687   for (CeedInt i=0; i<numactivein; i++) {
688     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
689   }
690   ierr = CeedFree(&activein); CeedChk(ierr);
691   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
692   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
693 
694   return 0;
695 }
696 
697 //------------------------------------------------------------------------------
698 // Operator Destroy
699 //------------------------------------------------------------------------------
700 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
701   int ierr;
702   CeedOperator_Blocked *impl;
703   ierr = CeedOperatorGetData(op, &impl); CeedChk(ierr);
704 
705   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
706     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
707     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
708   }
709   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
710   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
711   ierr = CeedFree(&impl->edata); CeedChk(ierr);
712   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
713 
714   for (CeedInt i=0; i<impl->numein; i++) {
715     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
716     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
717   }
718   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
719   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
720 
721   for (CeedInt i=0; i<impl->numeout; i++) {
722     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
723     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
724   }
725   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
726   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
727 
728   ierr = CeedFree(&impl); CeedChk(ierr);
729   return 0;
730 }
731 
732 //------------------------------------------------------------------------------
733 // Operator Create
734 //------------------------------------------------------------------------------
735 int CeedOperatorCreate_Blocked(CeedOperator op) {
736   int ierr;
737   Ceed ceed;
738   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
739   CeedOperator_Blocked *impl;
740 
741   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
742   ierr = CeedOperatorSetData(op, impl); CeedChk(ierr);
743 
744   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
745                                 CeedOperatorLinearAssembleQFunction_Blocked);
746   CeedChk(ierr);
747   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
748                                 CeedOperatorApplyAdd_Blocked); CeedChk(ierr);
749   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
750                                 CeedOperatorDestroy_Blocked); CeedChk(ierr);
751   return 0;
752 }
753 //------------------------------------------------------------------------------
754