xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision 4ec08b3aede85e944bebadf11512c57780813ef2)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-blocked.h"
18 #include "../ref/ceed-ref.h"
19 
20 //------------------------------------------------------------------------------
21 // Setup Input/Output Fields
22 //------------------------------------------------------------------------------
23 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
24     CeedOperator op, bool inOrOut,
25     CeedElemRestriction *blkrestr,
26     CeedVector *fullevecs, CeedVector *evecs,
27     CeedVector *qvecs, CeedInt starte,
28     CeedInt numfields, CeedInt Q) {
29   CeedInt dim, ierr, ncomp, size, P;
30   Ceed ceed;
31   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
32   CeedBasis basis;
33   CeedElemRestriction r;
34   CeedOperatorField *opfields;
35   CeedQFunctionField *qffields;
36   if (inOrOut) {
37     ierr = CeedOperatorGetFields(op, NULL, &opfields);
38     CeedChk(ierr);
39     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
40     CeedChk(ierr);
41   } else {
42     ierr = CeedOperatorGetFields(op, &opfields, NULL);
43     CeedChk(ierr);
44     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
45     CeedChk(ierr);
46   }
47   const CeedInt blksize = 8;
48 
49   // Loop over fields
50   for (CeedInt i=0; i<numfields; i++) {
51     CeedEvalMode emode;
52     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
53 
54     if (emode != CEED_EVAL_WEIGHT) {
55       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
56       CeedChk(ierr);
57       CeedElemRestriction_Ref *data;
58       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
59       Ceed ceed;
60       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
61       CeedInt nelem, elemsize, nnodes;
62       CeedInterlaceMode imode;
63       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
64       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
65       ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
66       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
67       if (data->indices) {
68         ierr = CeedElemRestrictionGetIMode(r, &imode); CeedChk(ierr);
69         ierr = CeedElemRestrictionCreateBlocked(ceed, imode, nelem, elemsize,
70                                                 blksize, nnodes, ncomp,
71                                                 CEED_MEM_HOST, CEED_COPY_VALUES,
72                                                 data->indices,
73                                                 &blkrestr[i+starte]);
74         CeedChk(ierr);
75       } else {
76         CeedInt strides[3];
77         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr);
78         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, nelem, elemsize,
79                blksize, nnodes, ncomp, strides, &blkrestr[i+starte]);
80         CeedChk(ierr);
81       }
82       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
83                                              &fullevecs[i+starte]);
84       CeedChk(ierr);
85     }
86 
87     switch(emode) {
88     case CEED_EVAL_NONE:
89       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
90       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
91       break;
92     case CEED_EVAL_INTERP:
93       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
94       ierr = CeedElemRestrictionGetElementSize(r, &P);
95       CeedChk(ierr);
96       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
97       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
98       break;
99     case CEED_EVAL_GRAD:
100       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
101       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
102       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
103       ierr = CeedElemRestrictionGetElementSize(r, &P);
104       CeedChk(ierr);
105       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
106       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
107       break;
108     case CEED_EVAL_WEIGHT: // Only on input fields
109       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
110       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
111       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
112                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]);
113       CeedChk(ierr);
114 
115       break;
116     case CEED_EVAL_DIV:
117       break; // Not implemented
118     case CEED_EVAL_CURL:
119       break; // Not implemented
120     }
121   }
122   return 0;
123 }
124 
125 //------------------------------------------------------------------------------
126 // Setup Operator
127 //------------------------------------------------------------------------------
128 static int CeedOperatorSetup_Blocked(CeedOperator op) {
129   int ierr;
130   bool setupdone;
131   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
132   if (setupdone) return 0;
133   Ceed ceed;
134   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
135   CeedOperator_Blocked *impl;
136   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
137   CeedQFunction qf;
138   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
139   CeedInt Q, numinputfields, numoutputfields;
140   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
141   ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr);
142   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
143   CeedChk(ierr);
144   CeedOperatorField *opinputfields, *opoutputfields;
145   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
146   CeedChk(ierr);
147   CeedQFunctionField *qfinputfields, *qfoutputfields;
148   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
149   CeedChk(ierr);
150 
151   // Allocate
152   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
153   CeedChk(ierr);
154   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
155   CeedChk(ierr);
156   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
157   CeedChk(ierr);
158 
159   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
160   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
161   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
162   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
163   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
164 
165   impl->numein = numinputfields; impl->numeout = numoutputfields;
166 
167   // Set up infield and outfield pointer arrays
168   // Infields
169   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr,
170                                          impl->evecs, impl->evecsin,
171                                          impl->qvecsin, 0,
172                                          numinputfields, Q);
173   CeedChk(ierr);
174   // Outfields
175   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr,
176                                          impl->evecs, impl->evecsout,
177                                          impl->qvecsout, numinputfields,
178                                          numoutputfields, Q);
179   CeedChk(ierr);
180 
181   // Identity QFunctions
182   if (impl->identityqf) {
183     CeedEvalMode inmode, outmode;
184     CeedQFunctionField *infields, *outfields;
185     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
186 
187     for (CeedInt i=0; i<numinputfields; i++) {
188       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
189       CeedChk(ierr);
190       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
191       CeedChk(ierr);
192 
193       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
194       impl->qvecsout[i] = impl->qvecsin[i];
195       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr);
196     }
197   }
198 
199   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
200 
201   return 0;
202 }
203 
204 //------------------------------------------------------------------------------
205 // Setup Operator Inputs
206 //------------------------------------------------------------------------------
207 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields,
208     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
209     CeedVector invec, bool skipactive, CeedOperator_Blocked *impl,
210     CeedRequest *request) {
211   CeedInt ierr;
212   CeedEvalMode emode;
213   CeedVector vec;
214   uint64_t state;
215 
216   for (CeedInt i=0; i<numinputfields; i++) {
217     // Get input vector
218     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
219     if (vec == CEED_VECTOR_ACTIVE) {
220       if (skipactive)
221         continue;
222       else
223         vec = invec;
224     }
225 
226     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
227     CeedChk(ierr);
228     if (emode == CEED_EVAL_WEIGHT) { // Skip
229     } else {
230       // Restrict
231       ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
232       if (state != impl->inputstate[i] || vec == invec) {
233         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
234                                         vec, impl->evecs[i], request);
235         CeedChk(ierr);
236         impl->inputstate[i] = state;
237       }
238       // Get evec
239       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
240                                     (const CeedScalar **) &impl->edata[i]);
241       CeedChk(ierr);
242     }
243   }
244   return 0;
245 }
246 
247 //------------------------------------------------------------------------------
248 // Input Basis Action
249 //------------------------------------------------------------------------------
250 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
251     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
252     CeedInt numinputfields, CeedInt blksize, bool skipactive,
253     CeedOperator_Blocked *impl) {
254   CeedInt ierr;
255   CeedInt dim, elemsize, size;
256   CeedElemRestriction Erestrict;
257   CeedEvalMode emode;
258   CeedBasis basis;
259 
260   for (CeedInt i=0; i<numinputfields; i++) {
261     // Skip active input
262     if (skipactive) {
263       CeedVector vec;
264       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
265       if (vec == CEED_VECTOR_ACTIVE)
266         continue;
267     }
268 
269     // Get elemsize, emode, size
270     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
271     CeedChk(ierr);
272     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
273     CeedChk(ierr);
274     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
275     CeedChk(ierr);
276     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
277     // Basis action
278     switch(emode) {
279     case CEED_EVAL_NONE:
280       ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
281                                 CEED_USE_POINTER,
282                                 &impl->edata[i][e*Q*size]); CeedChk(ierr);
283       break;
284     case CEED_EVAL_INTERP:
285       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
286       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
287                                 CEED_USE_POINTER,
288                                 &impl->edata[i][e*elemsize*size]);
289       CeedChk(ierr);
290       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
291                             CEED_EVAL_INTERP, impl->evecsin[i],
292                             impl->qvecsin[i]); CeedChk(ierr);
293       break;
294     case CEED_EVAL_GRAD:
295       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
296       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
297       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
298                                 CEED_USE_POINTER,
299                                 &impl->edata[i][e*elemsize*size/dim]);
300       CeedChk(ierr);
301       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
302                             CEED_EVAL_GRAD, impl->evecsin[i],
303                             impl->qvecsin[i]); CeedChk(ierr);
304       break;
305     case CEED_EVAL_WEIGHT:
306       break;  // No action
307     // LCOV_EXCL_START
308     case CEED_EVAL_DIV:
309     case CEED_EVAL_CURL: {
310       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
311       CeedChk(ierr);
312       Ceed ceed;
313       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
314       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
315       // LCOV_EXCL_STOP
316     }
317     }
318   }
319   return 0;
320 }
321 
322 //------------------------------------------------------------------------------
323 // Output Basis Action
324 //------------------------------------------------------------------------------
325 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
326     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
327     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
328     CeedOperator op, CeedOperator_Blocked *impl) {
329   CeedInt ierr;
330   CeedInt dim, elemsize, size;
331   CeedElemRestriction Erestrict;
332   CeedEvalMode emode;
333   CeedBasis basis;
334 
335   for (CeedInt i=0; i<numoutputfields; i++) {
336     // Get elemsize, emode, size
337     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
338     CeedChk(ierr);
339     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
340     CeedChk(ierr);
341     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
342     CeedChk(ierr);
343     ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
344     // Basis action
345     switch(emode) {
346     case CEED_EVAL_NONE:
347       break; // No action
348     case CEED_EVAL_INTERP:
349       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
350       CeedChk(ierr);
351       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
352                                 CEED_USE_POINTER,
353                                 &impl->edata[i + numinputfields][e*elemsize*size]);
354       CeedChk(ierr);
355       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
356                             CEED_EVAL_INTERP, impl->qvecsout[i],
357                             impl->evecsout[i]); CeedChk(ierr);
358       break;
359     case CEED_EVAL_GRAD:
360       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
361       CeedChk(ierr);
362       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
363       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
364                                 CEED_USE_POINTER,
365                                 &impl->edata[i + numinputfields][e*elemsize*size/dim]);
366       CeedChk(ierr);
367       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
368                             CEED_EVAL_GRAD, impl->qvecsout[i],
369                             impl->evecsout[i]); CeedChk(ierr);
370       break;
371     // LCOV_EXCL_START
372     case CEED_EVAL_WEIGHT: {
373       Ceed ceed;
374       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
375       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
376                        "evaluation mode");
377     }
378     case CEED_EVAL_DIV:
379     case CEED_EVAL_CURL: {
380       Ceed ceed;
381       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
382       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
383       // LCOV_EXCL_STOP
384     }
385     }
386   }
387   return 0;
388 }
389 
390 //------------------------------------------------------------------------------
391 // Restore Input Vectors
392 //------------------------------------------------------------------------------
393 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields,
394     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
395     bool skipactive, CeedOperator_Blocked *impl) {
396   CeedInt ierr;
397   CeedEvalMode emode;
398 
399   for (CeedInt i=0; i<numinputfields; i++) {
400     // Skip active inputs
401     if (skipactive) {
402       CeedVector vec;
403       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
404       if (vec == CEED_VECTOR_ACTIVE)
405         continue;
406     }
407     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
408     CeedChk(ierr);
409     if (emode == CEED_EVAL_WEIGHT) { // Skip
410     } else {
411       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
412                                         (const CeedScalar **) &impl->edata[i]);
413       CeedChk(ierr);
414     }
415   }
416   return 0;
417 }
418 
419 //------------------------------------------------------------------------------
420 // Operator Apply
421 //------------------------------------------------------------------------------
422 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec,
423                                      CeedVector outvec,
424                                      CeedRequest *request) {
425   int ierr;
426   CeedOperator_Blocked *impl;
427   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
428   const CeedInt blksize = 8;
429   CeedInt Q, numinputfields, numoutputfields, numelements, size;
430   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
431   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
432   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
433   CeedQFunction qf;
434   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
435   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
436   CeedChk(ierr);
437   CeedOperatorField *opinputfields, *opoutputfields;
438   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
439   CeedChk(ierr);
440   CeedQFunctionField *qfinputfields, *qfoutputfields;
441   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
442   CeedChk(ierr);
443   CeedEvalMode emode;
444   CeedVector vec;
445 
446   // Setup
447   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
448 
449   // Input Evecs and Restriction
450   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
451                                          opinputfields, invec, false, impl,
452                                          request); CeedChk(ierr);
453 
454   // Output Evecs
455   for (CeedInt i=0; i<numoutputfields; i++) {
456     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
457                               &impl->edata[i + numinputfields]); CeedChk(ierr);
458   }
459 
460   // Loop through elements
461   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
462     // Output pointers
463     for (CeedInt i=0; i<numoutputfields; i++) {
464       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
465       CeedChk(ierr);
466       if (emode == CEED_EVAL_NONE) {
467         ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
468         CeedChk(ierr);
469         ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
470                                   CEED_USE_POINTER,
471                                   &impl->edata[i + numinputfields][e*Q*size]);
472         CeedChk(ierr);
473       }
474     }
475 
476     // Input basis apply
477     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
478                                           numinputfields, blksize, false, impl);
479     CeedChk(ierr);
480 
481     // Q function
482     if (!impl->identityqf) {
483       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
484       CeedChk(ierr);
485     }
486 
487     // Output basis apply
488     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields,
489                                            blksize, numinputfields,
490                                            numoutputfields, op, impl);
491     CeedChk(ierr);
492   }
493 
494   // Output restriction
495   for (CeedInt i=0; i<numoutputfields; i++) {
496     // Restore evec
497     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
498                                   &impl->edata[i + numinputfields]); CeedChk(ierr);
499     // Get output vector
500     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
501     // Active
502     if (vec == CEED_VECTOR_ACTIVE)
503       vec = outvec;
504     // Restrict
505     ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein],
506                                     CEED_TRANSPOSE, impl->evecs[i+impl->numein],
507                                     vec, request); CeedChk(ierr);
508 
509   }
510 
511   // Restore input arrays
512   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
513          opinputfields, false, impl); CeedChk(ierr);
514 
515   return 0;
516 }
517 
518 //------------------------------------------------------------------------------
519 // Assemble Linear QFunction
520 //------------------------------------------------------------------------------
521 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op,
522     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
523   int ierr;
524   CeedOperator_Blocked *impl;
525   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
526   const CeedInt blksize = 8;
527   CeedInt Q, numinputfields, numoutputfields, numelements, size;
528   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
529   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
530   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
531   CeedQFunction qf;
532   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
533   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
534   CeedChk(ierr);
535   CeedOperatorField *opinputfields, *opoutputfields;
536   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
537   CeedChk(ierr);
538   CeedQFunctionField *qfinputfields, *qfoutputfields;
539   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
540   CeedChk(ierr);
541   CeedVector vec, lvec;
542   CeedInt numactivein = 0, numactiveout = 0;
543   CeedVector *activein = NULL;
544   CeedScalar *a, *tmp;
545   Ceed ceed;
546   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
547 
548   // Setup
549   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
550 
551   // Check for identity
552   if (impl->identityqf)
553     // LCOV_EXCL_START
554     return CeedError(ceed, 1, "Assembling identity qfunctions not supported");
555   // LCOV_EXCL_STOP
556 
557   // Input Evecs and Restriction
558   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
559                                          opinputfields, NULL, true, impl,
560                                          request); CeedChk(ierr);
561 
562   // Count number of active input fields
563   for (CeedInt i=0; i<numinputfields; i++) {
564     // Get input vector
565     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
566     // Check if active input
567     if (vec == CEED_VECTOR_ACTIVE) {
568       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
569       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
570       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
571       CeedChk(ierr);
572       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
573       for (CeedInt field=0; field<size; field++) {
574         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
575         CeedChk(ierr);
576         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
577                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
578         CeedChk(ierr);
579       }
580       numactivein += size;
581       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
582     }
583   }
584 
585   // Count number of active output fields
586   for (CeedInt i=0; i<numoutputfields; i++) {
587     // Get output vector
588     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
589     // Check if active output
590     if (vec == CEED_VECTOR_ACTIVE) {
591       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
592       numactiveout += size;
593     }
594   }
595 
596   // Check sizes
597   if (!numactivein || !numactiveout)
598     // LCOV_EXCL_START
599     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
600                      "and outputs");
601   // LCOV_EXCL_STOP
602 
603   // Setup lvec
604   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
605                           &lvec); CeedChk(ierr);
606   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
607 
608   // Create output restriction
609   CeedInt strides[3] = {1, Q, numactivein *numactiveout*Q};
610   ierr = CeedElemRestrictionCreateStrided(ceed, numelements, Q, numelements*Q,
611                                           numactivein*numactiveout, strides, rstr); CeedChk(ierr);
612   // Create assembled vector
613   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
614                           assembled); CeedChk(ierr);
615 
616   // Loop through elements
617   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
618     // Input basis apply
619     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
620                                           numinputfields, blksize, true, impl);
621     CeedChk(ierr);
622 
623     // Assemble QFunction
624     for (CeedInt in=0; in<numactivein; in++) {
625       // Set Inputs
626       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
627       if (numactivein > 1) {
628         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
629                                   0.0); CeedChk(ierr);
630       }
631       // Set Outputs
632       for (CeedInt out=0; out<numoutputfields; out++) {
633         // Get output vector
634         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
635         CeedChk(ierr);
636         // Check if active output
637         if (vec == CEED_VECTOR_ACTIVE) {
638           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
639                              CEED_USE_POINTER, a); CeedChk(ierr);
640           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
641           CeedChk(ierr);
642           a += size*Q*blksize; // Advance the pointer by the size of the output
643         }
644       }
645       // Apply QFunction
646       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
647       CeedChk(ierr);
648     }
649   }
650 
651   // Un-set output Qvecs to prevent accidental overwrite of Assembled
652   for (CeedInt out=0; out<numoutputfields; out++) {
653     // Get output vector
654     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
655     CeedChk(ierr);
656     // Check if active output
657     if (vec == CEED_VECTOR_ACTIVE) {
658       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
659                          NULL); CeedChk(ierr);
660     }
661   }
662 
663   // Restore input arrays
664   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
665          opinputfields, true, impl); CeedChk(ierr);
666 
667   // Output blocked restriction
668   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
669   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
670   CeedElemRestriction blkrstr;
671   ierr = CeedElemRestrictionCreateBlockedStrided(ceed, numelements, Q, blksize,
672          numelements*Q,
673          numactivein*numactiveout,
674          strides, &blkrstr);
675   CeedChk(ierr);
676   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled,
677                                   request); CeedChk(ierr);
678 
679   // Cleanup
680   for (CeedInt i=0; i<numactivein; i++) {
681     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
682   }
683   ierr = CeedFree(&activein); CeedChk(ierr);
684   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
685   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
686 
687   return 0;
688 }
689 
690 //------------------------------------------------------------------------------
691 // Operator Destroy
692 //------------------------------------------------------------------------------
693 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
694   int ierr;
695   CeedOperator_Blocked *impl;
696   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
697 
698   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
699     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
700     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
701   }
702   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
703   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
704   ierr = CeedFree(&impl->edata); CeedChk(ierr);
705   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
706 
707   for (CeedInt i=0; i<impl->numein; i++) {
708     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
709     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
710   }
711   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
712   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
713 
714   for (CeedInt i=0; i<impl->numeout; i++) {
715     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
716     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
717   }
718   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
719   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
720 
721   ierr = CeedFree(&impl); CeedChk(ierr);
722   return 0;
723 }
724 
725 //------------------------------------------------------------------------------
726 // Operator Create
727 //------------------------------------------------------------------------------
728 int CeedOperatorCreate_Blocked(CeedOperator op) {
729   int ierr;
730   Ceed ceed;
731   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
732   CeedOperator_Blocked *impl;
733 
734   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
735   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
736 
737   ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
738                                 CeedOperatorAssembleLinearQFunction_Blocked);
739   CeedChk(ierr);
740   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
741                                 CeedOperatorApply_Blocked); CeedChk(ierr);
742   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
743                                 CeedOperatorDestroy_Blocked); CeedChk(ierr);
744   return 0;
745 }
746 //------------------------------------------------------------------------------
747