xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision d99fa3c5cd91a1690aedf0679cbf290d44fec74c)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-blocked.h"
18 
19 //------------------------------------------------------------------------------
20 // Setup Input/Output Fields
21 //------------------------------------------------------------------------------
22 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
23     CeedOperator op, bool inOrOut,
24     CeedElemRestriction *blkrestr,
25     CeedVector *fullevecs, CeedVector *evecs,
26     CeedVector *qvecs, CeedInt starte,
27     CeedInt numfields, CeedInt Q) {
28   CeedInt dim, ierr, ncomp, size, P;
29   Ceed ceed;
30   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
31   CeedBasis basis;
32   CeedElemRestriction r;
33   CeedOperatorField *opfields;
34   CeedQFunctionField *qffields;
35   if (inOrOut) {
36     ierr = CeedOperatorGetFields(op, NULL, &opfields);
37     CeedChk(ierr);
38     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
39     CeedChk(ierr);
40   } else {
41     ierr = CeedOperatorGetFields(op, &opfields, NULL);
42     CeedChk(ierr);
43     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
44     CeedChk(ierr);
45   }
46   const CeedInt blksize = 8;
47 
48   // Loop over fields
49   for (CeedInt i=0; i<numfields; i++) {
50     CeedEvalMode emode;
51     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
52 
53     if (emode != CEED_EVAL_WEIGHT) {
54       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
55       CeedChk(ierr);
56       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
57       CeedInt nelem, elemsize, lsize, compstride;
58       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
59       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
60       ierr = CeedElemRestrictionGetLVectorSize(r, &lsize); CeedChk(ierr);
61       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
62 
63       bool strided;
64       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChk(ierr);
65       if (strided) {
66         CeedInt strides[3];
67         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr);
68         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, nelem, elemsize,
69                blksize, ncomp, lsize, strides, &blkrestr[i+starte]);
70         CeedChk(ierr);
71       } else {
72         const CeedInt *offsets = NULL;
73         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
74         CeedChk(ierr);
75         ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr);
76         ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
77                                                 blksize, ncomp, compstride,
78                                                 lsize, CEED_MEM_HOST,
79                                                 CEED_COPY_VALUES, offsets,
80                                                 &blkrestr[i+starte]);
81         CeedChk(ierr);
82         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChk(ierr);
83       }
84       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
85                                              &fullevecs[i+starte]);
86       CeedChk(ierr);
87     }
88 
89     switch(emode) {
90     case CEED_EVAL_NONE:
91       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
92       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
93       break;
94     case CEED_EVAL_INTERP:
95       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
96       ierr = CeedElemRestrictionGetElementSize(r, &P);
97       CeedChk(ierr);
98       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
99       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
100       break;
101     case CEED_EVAL_GRAD:
102       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
103       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
104       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
105       ierr = CeedElemRestrictionGetElementSize(r, &P);
106       CeedChk(ierr);
107       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
108       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
109       break;
110     case CEED_EVAL_WEIGHT: // Only on input fields
111       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
112       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
113       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
114                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]);
115       CeedChk(ierr);
116 
117       break;
118     case CEED_EVAL_DIV:
119       break; // Not implemented
120     case CEED_EVAL_CURL:
121       break; // Not implemented
122     }
123   }
124   return 0;
125 }
126 
127 //------------------------------------------------------------------------------
128 // Setup Operator
129 //------------------------------------------------------------------------------
130 static int CeedOperatorSetup_Blocked(CeedOperator op) {
131   int ierr;
132   bool setupdone;
133   ierr = CeedOperatorIsSetupDone(op, &setupdone); CeedChk(ierr);
134   if (setupdone) return 0;
135   Ceed ceed;
136   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
137   CeedOperator_Blocked *impl;
138   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
139   CeedQFunction qf;
140   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
141   CeedInt Q, numinputfields, numoutputfields;
142   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
143   ierr = CeedQFunctionIsIdentity(qf, &impl->identityqf); CeedChk(ierr);
144   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
145   CeedChk(ierr);
146   CeedOperatorField *opinputfields, *opoutputfields;
147   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
148   CeedChk(ierr);
149   CeedQFunctionField *qfinputfields, *qfoutputfields;
150   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
151   CeedChk(ierr);
152 
153   // Allocate
154   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
155   CeedChk(ierr);
156   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
157   CeedChk(ierr);
158   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
159   CeedChk(ierr);
160 
161   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
162   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
163   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
164   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
165   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
166 
167   impl->numein = numinputfields; impl->numeout = numoutputfields;
168 
169   // Set up infield and outfield pointer arrays
170   // Infields
171   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr,
172                                          impl->evecs, impl->evecsin,
173                                          impl->qvecsin, 0,
174                                          numinputfields, Q);
175   CeedChk(ierr);
176   // Outfields
177   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr,
178                                          impl->evecs, impl->evecsout,
179                                          impl->qvecsout, numinputfields,
180                                          numoutputfields, Q);
181   CeedChk(ierr);
182 
183   // Identity QFunctions
184   if (impl->identityqf) {
185     CeedEvalMode inmode, outmode;
186     CeedQFunctionField *infields, *outfields;
187     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
188 
189     for (CeedInt i=0; i<numinputfields; i++) {
190       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
191       CeedChk(ierr);
192       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
193       CeedChk(ierr);
194 
195       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
196       impl->qvecsout[i] = impl->qvecsin[i];
197       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr);
198     }
199   }
200 
201   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
202 
203   return 0;
204 }
205 
206 //------------------------------------------------------------------------------
207 // Setup Operator Inputs
208 //------------------------------------------------------------------------------
209 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields,
210     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
211     CeedVector invec, bool skipactive, CeedOperator_Blocked *impl,
212     CeedRequest *request) {
213   CeedInt ierr;
214   CeedEvalMode emode;
215   CeedVector vec;
216   uint64_t state;
217 
218   for (CeedInt i=0; i<numinputfields; i++) {
219     // Get input vector
220     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
221     if (vec == CEED_VECTOR_ACTIVE) {
222       if (skipactive)
223         continue;
224       else
225         vec = invec;
226     }
227 
228     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
229     CeedChk(ierr);
230     if (emode == CEED_EVAL_WEIGHT) { // Skip
231     } else {
232       // Restrict
233       ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
234       if (state != impl->inputstate[i] || vec == invec) {
235         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
236                                         vec, impl->evecs[i], request);
237         CeedChk(ierr);
238         impl->inputstate[i] = state;
239       }
240       // Get evec
241       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
242                                     (const CeedScalar **) &impl->edata[i]);
243       CeedChk(ierr);
244     }
245   }
246   return 0;
247 }
248 
249 //------------------------------------------------------------------------------
250 // Input Basis Action
251 //------------------------------------------------------------------------------
252 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
253     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
254     CeedInt numinputfields, CeedInt blksize, bool skipactive,
255     CeedOperator_Blocked *impl) {
256   CeedInt ierr;
257   CeedInt dim, elemsize, size;
258   CeedElemRestriction Erestrict;
259   CeedEvalMode emode;
260   CeedBasis basis;
261 
262   for (CeedInt i=0; i<numinputfields; i++) {
263     // Skip active input
264     if (skipactive) {
265       CeedVector vec;
266       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
267       if (vec == CEED_VECTOR_ACTIVE)
268         continue;
269     }
270 
271     // Get elemsize, emode, size
272     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
273     CeedChk(ierr);
274     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
275     CeedChk(ierr);
276     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
277     CeedChk(ierr);
278     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
279     // Basis action
280     switch(emode) {
281     case CEED_EVAL_NONE:
282       ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
283                                 CEED_USE_POINTER,
284                                 &impl->edata[i][e*Q*size]); CeedChk(ierr);
285       break;
286     case CEED_EVAL_INTERP:
287       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
288       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
289                                 CEED_USE_POINTER,
290                                 &impl->edata[i][e*elemsize*size]);
291       CeedChk(ierr);
292       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
293                             CEED_EVAL_INTERP, impl->evecsin[i],
294                             impl->qvecsin[i]); CeedChk(ierr);
295       break;
296     case CEED_EVAL_GRAD:
297       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
298       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
299       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
300                                 CEED_USE_POINTER,
301                                 &impl->edata[i][e*elemsize*size/dim]);
302       CeedChk(ierr);
303       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
304                             CEED_EVAL_GRAD, impl->evecsin[i],
305                             impl->qvecsin[i]); CeedChk(ierr);
306       break;
307     case CEED_EVAL_WEIGHT:
308       break;  // No action
309     // LCOV_EXCL_START
310     case CEED_EVAL_DIV:
311     case CEED_EVAL_CURL: {
312       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
313       CeedChk(ierr);
314       Ceed ceed;
315       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
316       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
317       // LCOV_EXCL_STOP
318     }
319     }
320   }
321   return 0;
322 }
323 
324 //------------------------------------------------------------------------------
325 // Output Basis Action
326 //------------------------------------------------------------------------------
327 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
328     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
329     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
330     CeedOperator op, CeedOperator_Blocked *impl) {
331   CeedInt ierr;
332   CeedInt dim, elemsize, size;
333   CeedElemRestriction Erestrict;
334   CeedEvalMode emode;
335   CeedBasis basis;
336 
337   for (CeedInt i=0; i<numoutputfields; i++) {
338     // Get elemsize, emode, size
339     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
340     CeedChk(ierr);
341     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
342     CeedChk(ierr);
343     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
344     CeedChk(ierr);
345     ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
346     // Basis action
347     switch(emode) {
348     case CEED_EVAL_NONE:
349       break; // No action
350     case CEED_EVAL_INTERP:
351       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
352       CeedChk(ierr);
353       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
354                                 CEED_USE_POINTER,
355                                 &impl->edata[i + numinputfields][e*elemsize*size]);
356       CeedChk(ierr);
357       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
358                             CEED_EVAL_INTERP, impl->qvecsout[i],
359                             impl->evecsout[i]); CeedChk(ierr);
360       break;
361     case CEED_EVAL_GRAD:
362       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
363       CeedChk(ierr);
364       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
365       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
366                                 CEED_USE_POINTER,
367                                 &impl->edata[i + numinputfields][e*elemsize*size/dim]);
368       CeedChk(ierr);
369       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
370                             CEED_EVAL_GRAD, impl->qvecsout[i],
371                             impl->evecsout[i]); CeedChk(ierr);
372       break;
373     // LCOV_EXCL_START
374     case CEED_EVAL_WEIGHT: {
375       Ceed ceed;
376       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
377       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
378                        "evaluation mode");
379     }
380     case CEED_EVAL_DIV:
381     case CEED_EVAL_CURL: {
382       Ceed ceed;
383       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
384       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
385       // LCOV_EXCL_STOP
386     }
387     }
388   }
389   return 0;
390 }
391 
392 //------------------------------------------------------------------------------
393 // Restore Input Vectors
394 //------------------------------------------------------------------------------
395 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields,
396     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
397     bool skipactive, CeedOperator_Blocked *impl) {
398   CeedInt ierr;
399   CeedEvalMode emode;
400 
401   for (CeedInt i=0; i<numinputfields; i++) {
402     // Skip active inputs
403     if (skipactive) {
404       CeedVector vec;
405       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
406       if (vec == CEED_VECTOR_ACTIVE)
407         continue;
408     }
409     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
410     CeedChk(ierr);
411     if (emode == CEED_EVAL_WEIGHT) { // Skip
412     } else {
413       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
414                                         (const CeedScalar **) &impl->edata[i]);
415       CeedChk(ierr);
416     }
417   }
418   return 0;
419 }
420 
421 //------------------------------------------------------------------------------
422 // Operator Apply
423 //------------------------------------------------------------------------------
424 static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector invec,
425                                         CeedVector outvec,
426                                         CeedRequest *request) {
427   int ierr;
428   CeedOperator_Blocked *impl;
429   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
430   const CeedInt blksize = 8;
431   CeedInt Q, numinputfields, numoutputfields, numelements, size;
432   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
433   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
434   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
435   CeedQFunction qf;
436   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
437   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
438   CeedChk(ierr);
439   CeedOperatorField *opinputfields, *opoutputfields;
440   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
441   CeedChk(ierr);
442   CeedQFunctionField *qfinputfields, *qfoutputfields;
443   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
444   CeedChk(ierr);
445   CeedEvalMode emode;
446   CeedVector vec;
447 
448   // Setup
449   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
450 
451   // Input Evecs and Restriction
452   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
453                                          opinputfields, invec, false, impl,
454                                          request); CeedChk(ierr);
455 
456   // Output Evecs
457   for (CeedInt i=0; i<numoutputfields; i++) {
458     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
459                               &impl->edata[i + numinputfields]); CeedChk(ierr);
460   }
461 
462   // Loop through elements
463   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
464     // Output pointers
465     for (CeedInt i=0; i<numoutputfields; i++) {
466       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
467       CeedChk(ierr);
468       if (emode == CEED_EVAL_NONE) {
469         ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
470         CeedChk(ierr);
471         ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
472                                   CEED_USE_POINTER,
473                                   &impl->edata[i + numinputfields][e*Q*size]);
474         CeedChk(ierr);
475       }
476     }
477 
478     // Input basis apply
479     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
480                                           numinputfields, blksize, false, impl);
481     CeedChk(ierr);
482 
483     // Q function
484     if (!impl->identityqf) {
485       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
486       CeedChk(ierr);
487     }
488 
489     // Output basis apply
490     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields,
491                                            blksize, numinputfields,
492                                            numoutputfields, op, impl);
493     CeedChk(ierr);
494   }
495 
496   // Output restriction
497   for (CeedInt i=0; i<numoutputfields; i++) {
498     // Restore evec
499     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
500                                   &impl->edata[i + numinputfields]); CeedChk(ierr);
501     // Get output vector
502     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
503     // Active
504     if (vec == CEED_VECTOR_ACTIVE)
505       vec = outvec;
506     // Restrict
507     ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein],
508                                     CEED_TRANSPOSE, impl->evecs[i+impl->numein],
509                                     vec, request); CeedChk(ierr);
510 
511   }
512 
513   // Restore input arrays
514   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
515          opinputfields, false, impl); CeedChk(ierr);
516 
517   return 0;
518 }
519 
520 //------------------------------------------------------------------------------
521 // Assemble Linear QFunction
522 //------------------------------------------------------------------------------
523 static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op,
524     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
525   int ierr;
526   CeedOperator_Blocked *impl;
527   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
528   const CeedInt blksize = 8;
529   CeedInt Q, numinputfields, numoutputfields, numelements, size;
530   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
531   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
532   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
533   CeedQFunction qf;
534   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
535   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
536   CeedChk(ierr);
537   CeedOperatorField *opinputfields, *opoutputfields;
538   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
539   CeedChk(ierr);
540   CeedQFunctionField *qfinputfields, *qfoutputfields;
541   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
542   CeedChk(ierr);
543   CeedVector vec, lvec;
544   CeedInt numactivein = 0, numactiveout = 0;
545   CeedVector *activein = NULL;
546   CeedScalar *a, *tmp;
547   Ceed ceed;
548   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
549 
550   // Setup
551   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
552 
553   // Check for identity
554   if (impl->identityqf)
555     // LCOV_EXCL_START
556     return CeedError(ceed, 1, "Assembling identity qfunctions not supported");
557   // LCOV_EXCL_STOP
558 
559   // Input Evecs and Restriction
560   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
561                                          opinputfields, NULL, true, impl,
562                                          request); CeedChk(ierr);
563 
564   // Count number of active input fields
565   for (CeedInt i=0; i<numinputfields; i++) {
566     // Get input vector
567     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
568     // Check if active input
569     if (vec == CEED_VECTOR_ACTIVE) {
570       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
571       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
572       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
573       CeedChk(ierr);
574       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
575       for (CeedInt field=0; field<size; field++) {
576         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
577         CeedChk(ierr);
578         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
579                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
580         CeedChk(ierr);
581       }
582       numactivein += size;
583       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
584     }
585   }
586 
587   // Count number of active output fields
588   for (CeedInt i=0; i<numoutputfields; i++) {
589     // Get output vector
590     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
591     // Check if active output
592     if (vec == CEED_VECTOR_ACTIVE) {
593       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
594       numactiveout += size;
595     }
596   }
597 
598   // Check sizes
599   if (!numactivein || !numactiveout)
600     // LCOV_EXCL_START
601     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
602                      "and outputs");
603   // LCOV_EXCL_STOP
604 
605   // Setup lvec
606   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
607                           &lvec); CeedChk(ierr);
608   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
609 
610   // Create output restriction
611   CeedInt strides[3] = {1, Q, numactivein *numactiveout*Q};
612   ierr = CeedElemRestrictionCreateStrided(ceed, numelements, Q,
613                                           numactivein*numactiveout,
614                                           numactivein*numactiveout*numelements*Q,
615                                           strides, rstr); CeedChk(ierr);
616   // Create assembled vector
617   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
618                           assembled); CeedChk(ierr);
619 
620   // Loop through elements
621   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
622     // Input basis apply
623     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
624                                           numinputfields, blksize, true, impl);
625     CeedChk(ierr);
626 
627     // Assemble QFunction
628     for (CeedInt in=0; in<numactivein; in++) {
629       // Set Inputs
630       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
631       if (numactivein > 1) {
632         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
633                                   0.0); CeedChk(ierr);
634       }
635       // Set Outputs
636       for (CeedInt out=0; out<numoutputfields; out++) {
637         // Get output vector
638         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
639         CeedChk(ierr);
640         // Check if active output
641         if (vec == CEED_VECTOR_ACTIVE) {
642           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
643                              CEED_USE_POINTER, a); CeedChk(ierr);
644           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
645           CeedChk(ierr);
646           a += size*Q*blksize; // Advance the pointer by the size of the output
647         }
648       }
649       // Apply QFunction
650       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
651       CeedChk(ierr);
652     }
653   }
654 
655   // Un-set output Qvecs to prevent accidental overwrite of Assembled
656   for (CeedInt out=0; out<numoutputfields; out++) {
657     // Get output vector
658     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
659     CeedChk(ierr);
660     // Check if active output
661     if (vec == CEED_VECTOR_ACTIVE) {
662       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
663                          NULL); CeedChk(ierr);
664     }
665   }
666 
667   // Restore input arrays
668   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
669          opinputfields, true, impl); CeedChk(ierr);
670 
671   // Output blocked restriction
672   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
673   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
674   CeedElemRestriction blkrstr;
675   ierr = CeedElemRestrictionCreateBlockedStrided(ceed, numelements, Q, blksize,
676          numactivein*numactiveout, numactivein*numactiveout*numelements*Q,
677          strides, &blkrstr); CeedChk(ierr);
678   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled,
679                                   request); CeedChk(ierr);
680 
681   // Cleanup
682   for (CeedInt i=0; i<numactivein; i++) {
683     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
684   }
685   ierr = CeedFree(&activein); CeedChk(ierr);
686   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
687   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
688 
689   return 0;
690 }
691 
692 //------------------------------------------------------------------------------
693 // Operator Destroy
694 //------------------------------------------------------------------------------
695 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
696   int ierr;
697   CeedOperator_Blocked *impl;
698   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
699 
700   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
701     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
702     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
703   }
704   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
705   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
706   ierr = CeedFree(&impl->edata); CeedChk(ierr);
707   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
708 
709   for (CeedInt i=0; i<impl->numein; i++) {
710     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
711     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
712   }
713   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
714   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
715 
716   for (CeedInt i=0; i<impl->numeout; i++) {
717     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
718     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
719   }
720   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
721   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
722 
723   ierr = CeedFree(&impl); CeedChk(ierr);
724   return 0;
725 }
726 
727 //------------------------------------------------------------------------------
728 // Operator Create
729 //------------------------------------------------------------------------------
730 int CeedOperatorCreate_Blocked(CeedOperator op) {
731   int ierr;
732   Ceed ceed;
733   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
734   CeedOperator_Blocked *impl;
735 
736   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
737   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
738 
739   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
740                                 CeedOperatorLinearAssembleQFunction_Blocked);
741   CeedChk(ierr);
742   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
743                                 CeedOperatorApplyAdd_Blocked); CeedChk(ierr);
744   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
745                                 CeedOperatorDestroy_Blocked); CeedChk(ierr);
746   return 0;
747 }
748 //------------------------------------------------------------------------------
749