xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision ccaff0309dc399f656ea11018b919b30feb8b0fa) !
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-blocked.h"
18 #include "../ref/ceed-ref.h"
19 
20 //------------------------------------------------------------------------------
21 // Setup Input/Output Fields
22 //------------------------------------------------------------------------------
23 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
24     CeedOperator op, bool inOrOut,
25     CeedElemRestriction *blkrestr,
26     CeedVector *fullevecs, CeedVector *evecs,
27     CeedVector *qvecs, CeedInt starte,
28     CeedInt numfields, CeedInt Q) {
29   CeedInt dim, ierr, ncomp, size, P;
30   Ceed ceed;
31   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
32   CeedBasis basis;
33   CeedElemRestriction r;
34   CeedOperatorField *opfields;
35   CeedQFunctionField *qffields;
36   if (inOrOut) {
37     ierr = CeedOperatorGetFields(op, NULL, &opfields);
38     CeedChk(ierr);
39     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
40     CeedChk(ierr);
41   } else {
42     ierr = CeedOperatorGetFields(op, &opfields, NULL);
43     CeedChk(ierr);
44     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
45     CeedChk(ierr);
46   }
47   const CeedInt blksize = 8;
48 
49   // Loop over fields
50   for (CeedInt i=0; i<numfields; i++) {
51     CeedEvalMode emode;
52     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
53 
54     if (emode != CEED_EVAL_WEIGHT) {
55       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
56       CeedChk(ierr);
57       CeedElemRestriction_Ref *data;
58       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
59       Ceed ceed;
60       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
61       CeedInt nelem, elemsize, nnodes;
62       CeedInterlaceMode imode;
63       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
64       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
65       ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
66       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
67       if (data->indices) {
68         ierr = CeedElemRestrictionGetIMode(r, &imode); CeedChk(ierr);
69         ierr = CeedElemRestrictionCreateBlocked(ceed, imode, nelem, elemsize,
70                                                 blksize, nnodes, ncomp,
71                                                 CEED_MEM_HOST, CEED_COPY_VALUES,
72                                                 data->indices,
73                                                 &blkrestr[i+starte]);
74         CeedChk(ierr);
75       } else {
76         CeedInt strides[3];
77         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr);
78         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, nelem, elemsize,
79                blksize, nnodes, ncomp, strides, &blkrestr[i+starte]);
80         CeedChk(ierr);
81       }
82       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
83                                              &fullevecs[i+starte]);
84       CeedChk(ierr);
85     }
86 
87     switch(emode) {
88     case CEED_EVAL_NONE:
89       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
90       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
91       break;
92     case CEED_EVAL_INTERP:
93       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
94       ierr = CeedElemRestrictionGetElementSize(r, &P);
95       CeedChk(ierr);
96       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
97       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
98       break;
99     case CEED_EVAL_GRAD:
100       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
101       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
102       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
103       ierr = CeedElemRestrictionGetElementSize(r, &P);
104       CeedChk(ierr);
105       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
106       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
107       break;
108     case CEED_EVAL_WEIGHT: // Only on input fields
109       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
110       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
111       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
112                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]);
113       CeedChk(ierr);
114 
115       break;
116     case CEED_EVAL_DIV:
117       break; // Not implemented
118     case CEED_EVAL_CURL:
119       break; // Not implemented
120     }
121   }
122   return 0;
123 }
124 
125 //------------------------------------------------------------------------------
126 // Setup Operator
127 //------------------------------------------------------------------------------
128 static int CeedOperatorSetup_Blocked(CeedOperator op) {
129   int ierr;
130   bool setupdone;
131   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
132   if (setupdone) return 0;
133   Ceed ceed;
134   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
135   CeedOperator_Blocked *impl;
136   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
137   CeedQFunction qf;
138   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
139   CeedInt Q, numinputfields, numoutputfields;
140   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
141   ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr);
142   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
143   CeedChk(ierr);
144   CeedOperatorField *opinputfields, *opoutputfields;
145   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
146   CeedChk(ierr);
147   CeedQFunctionField *qfinputfields, *qfoutputfields;
148   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
149   CeedChk(ierr);
150 
151   // Allocate
152   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
153   CeedChk(ierr);
154   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
155   CeedChk(ierr);
156   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
157   CeedChk(ierr);
158 
159   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
160   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
161   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
162   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
163   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
164 
165   impl->numein = numinputfields; impl->numeout = numoutputfields;
166 
167   // Set up infield and outfield pointer arrays
168   // Infields
169   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr,
170                                          impl->evecs, impl->evecsin,
171                                          impl->qvecsin, 0,
172                                          numinputfields, Q);
173   CeedChk(ierr);
174   // Outfields
175   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr,
176                                          impl->evecs, impl->evecsout,
177                                          impl->qvecsout, numinputfields,
178                                          numoutputfields, Q);
179   CeedChk(ierr);
180 
181   // Identity QFunctions
182   if (impl->identityqf) {
183     CeedEvalMode inmode, outmode;
184     CeedQFunctionField *infields, *outfields;
185     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
186 
187     for (CeedInt i=0; i<numinputfields; i++) {
188       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
189       CeedChk(ierr);
190       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
191       CeedChk(ierr);
192 
193       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
194       impl->qvecsout[i] = impl->qvecsin[i];
195       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr);
196     }
197   }
198 
199   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
200 
201   return 0;
202 }
203 
204 //------------------------------------------------------------------------------
205 // Setup Operator Inputs
206 //------------------------------------------------------------------------------
207 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields,
208     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
209     CeedVector invec, bool skipactive, CeedOperator_Blocked *impl,
210     CeedRequest *request) {
211   CeedInt ierr;
212   CeedEvalMode emode;
213   CeedVector vec;
214   uint64_t state;
215 
216   for (CeedInt i=0; i<numinputfields; i++) {
217     // Get input vector
218     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
219     if (vec == CEED_VECTOR_ACTIVE) {
220       if (skipactive)
221         continue;
222       else
223         vec = invec;
224     }
225 
226     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
227     CeedChk(ierr);
228     if (emode == CEED_EVAL_WEIGHT) { // Skip
229     } else {
230       // Restrict
231       ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
232       if (state != impl->inputstate[i] || vec == invec) {
233         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
234                                         vec, impl->evecs[i], request);
235         CeedChk(ierr);
236         impl->inputstate[i] = state;
237       }
238       // Get evec
239       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
240                                     (const CeedScalar **) &impl->edata[i]);
241       CeedChk(ierr);
242     }
243   }
244   return 0;
245 }
246 
247 //------------------------------------------------------------------------------
248 // Input Basis Action
249 //------------------------------------------------------------------------------
250 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
251     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
252     CeedInt numinputfields, CeedInt blksize, bool skipactive,
253     CeedOperator_Blocked *impl) {
254   CeedInt ierr;
255   CeedInt dim, elemsize, size;
256   CeedElemRestriction Erestrict;
257   CeedEvalMode emode;
258   CeedBasis basis;
259 
260   for (CeedInt i=0; i<numinputfields; i++) {
261     // Skip active input
262     if (skipactive) {
263       CeedVector vec;
264       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
265       if (vec == CEED_VECTOR_ACTIVE)
266         continue;
267     }
268 
269     // Get elemsize, emode, size
270     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
271     CeedChk(ierr);
272     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
273     CeedChk(ierr);
274     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
275     CeedChk(ierr);
276     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
277     // Basis action
278     switch(emode) {
279     case CEED_EVAL_NONE:
280       ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
281                                 CEED_USE_POINTER,
282                                 &impl->edata[i][e*Q*size]); CeedChk(ierr);
283       break;
284     case CEED_EVAL_INTERP:
285       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
286       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
287                                 CEED_USE_POINTER,
288                                 &impl->edata[i][e*elemsize*size]);
289       CeedChk(ierr);
290       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
291                             CEED_EVAL_INTERP, impl->evecsin[i],
292                             impl->qvecsin[i]); CeedChk(ierr);
293       break;
294     case CEED_EVAL_GRAD:
295       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
296       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
297       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
298                                 CEED_USE_POINTER,
299                                 &impl->edata[i][e*elemsize*size/dim]);
300       CeedChk(ierr);
301       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
302                             CEED_EVAL_GRAD, impl->evecsin[i],
303                             impl->qvecsin[i]); CeedChk(ierr);
304       break;
305     case CEED_EVAL_WEIGHT:
306       break;  // No action
307     // LCOV_EXCL_START
308     case CEED_EVAL_DIV:
309     case CEED_EVAL_CURL: {
310       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
311       CeedChk(ierr);
312       Ceed ceed;
313       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
314       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
315       break; // Not implemented
316       // LCOV_EXCL_STOP
317     }
318     }
319   }
320   return 0;
321 }
322 
323 //------------------------------------------------------------------------------
324 // Output Basis Action
325 //------------------------------------------------------------------------------
326 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
327     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
328     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
329     CeedOperator op, CeedOperator_Blocked *impl) {
330   CeedInt ierr;
331   CeedInt dim, elemsize, size;
332   CeedElemRestriction Erestrict;
333   CeedEvalMode emode;
334   CeedBasis basis;
335 
336   for (CeedInt i=0; i<numoutputfields; i++) {
337     // Get elemsize, emode, size
338     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
339     CeedChk(ierr);
340     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
341     CeedChk(ierr);
342     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
343     CeedChk(ierr);
344     ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
345     // Basis action
346     switch(emode) {
347     case CEED_EVAL_NONE:
348       break; // No action
349     case CEED_EVAL_INTERP:
350       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
351       CeedChk(ierr);
352       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
353                                 CEED_USE_POINTER,
354                                 &impl->edata[i + numinputfields][e*elemsize*size]);
355       CeedChk(ierr);
356       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
357                             CEED_EVAL_INTERP, impl->qvecsout[i],
358                             impl->evecsout[i]); CeedChk(ierr);
359       break;
360     case CEED_EVAL_GRAD:
361       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
362       CeedChk(ierr);
363       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
364       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
365                                 CEED_USE_POINTER,
366                                 &impl->edata[i + numinputfields][e*elemsize*size/dim]);
367       CeedChk(ierr);
368       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
369                             CEED_EVAL_GRAD, impl->qvecsout[i],
370                             impl->evecsout[i]); CeedChk(ierr);
371       break;
372     // LCOV_EXCL_START
373     case CEED_EVAL_WEIGHT: {
374       Ceed ceed;
375       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
376       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
377                        "evaluation mode");
378       break; // Should not occur
379     }
380     case CEED_EVAL_DIV:
381     case CEED_EVAL_CURL: {
382       Ceed ceed;
383       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
384       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
385       break; // Not implemented
386       // LCOV_EXCL_STOP
387     }
388     }
389   }
390   return 0;
391 }
392 
393 //------------------------------------------------------------------------------
394 // Restore Input Vectors
395 //------------------------------------------------------------------------------
396 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields,
397     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
398     bool skipactive, CeedOperator_Blocked *impl) {
399   CeedInt ierr;
400   CeedEvalMode emode;
401 
402   for (CeedInt i=0; i<numinputfields; i++) {
403     // Skip active inputs
404     if (skipactive) {
405       CeedVector vec;
406       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
407       if (vec == CEED_VECTOR_ACTIVE)
408         continue;
409     }
410     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
411     CeedChk(ierr);
412     if (emode == CEED_EVAL_WEIGHT) { // Skip
413     } else {
414       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
415                                         (const CeedScalar **) &impl->edata[i]);
416       CeedChk(ierr);
417     }
418   }
419   return 0;
420 }
421 
422 //------------------------------------------------------------------------------
423 // Operator Apply
424 //------------------------------------------------------------------------------
425 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec,
426                                      CeedVector outvec,
427                                      CeedRequest *request) {
428   int ierr;
429   CeedOperator_Blocked *impl;
430   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
431   const CeedInt blksize = 8;
432   CeedInt Q, numinputfields, numoutputfields, numelements, size;
433   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
434   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
435   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
436   CeedQFunction qf;
437   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
438   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
439   CeedChk(ierr);
440   CeedOperatorField *opinputfields, *opoutputfields;
441   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
442   CeedChk(ierr);
443   CeedQFunctionField *qfinputfields, *qfoutputfields;
444   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
445   CeedChk(ierr);
446   CeedEvalMode emode;
447   CeedVector vec;
448 
449   // Setup
450   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
451 
452   // Input Evecs and Restriction
453   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
454                                          opinputfields, invec, false, impl,
455                                          request); CeedChk(ierr);
456 
457   // Output Evecs
458   for (CeedInt i=0; i<numoutputfields; i++) {
459     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
460                               &impl->edata[i + numinputfields]); CeedChk(ierr);
461   }
462 
463   // Loop through elements
464   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
465     // Output pointers
466     for (CeedInt i=0; i<numoutputfields; i++) {
467       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
468       CeedChk(ierr);
469       if (emode == CEED_EVAL_NONE) {
470         ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
471         CeedChk(ierr);
472         ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
473                                   CEED_USE_POINTER,
474                                   &impl->edata[i + numinputfields][e*Q*size]);
475         CeedChk(ierr);
476       }
477     }
478 
479     // Input basis apply
480     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
481                                           numinputfields, blksize, false, impl);
482     CeedChk(ierr);
483 
484     // Q function
485     if (!impl->identityqf) {
486       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
487       CeedChk(ierr);
488     }
489 
490     // Output basis apply
491     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields,
492                                            blksize, numinputfields,
493                                            numoutputfields, op, impl);
494     CeedChk(ierr);
495   }
496 
497   // Output restriction
498   for (CeedInt i=0; i<numoutputfields; i++) {
499     // Restore evec
500     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
501                                   &impl->edata[i + numinputfields]); CeedChk(ierr);
502     // Get output vector
503     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
504     // Active
505     if (vec == CEED_VECTOR_ACTIVE)
506       vec = outvec;
507     // Restrict
508     ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein],
509                                     CEED_TRANSPOSE, impl->evecs[i+impl->numein],
510                                     vec, request); CeedChk(ierr);
511 
512   }
513 
514   // Restore input arrays
515   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
516          opinputfields, false, impl); CeedChk(ierr);
517 
518   return 0;
519 }
520 
521 //------------------------------------------------------------------------------
522 // Assemble Linear QFunction
523 //------------------------------------------------------------------------------
524 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op,
525     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
526   int ierr;
527   CeedOperator_Blocked *impl;
528   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
529   const CeedInt blksize = 8;
530   CeedInt Q, numinputfields, numoutputfields, numelements, size;
531   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
532   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
533   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
534   CeedQFunction qf;
535   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
536   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
537   CeedChk(ierr);
538   CeedOperatorField *opinputfields, *opoutputfields;
539   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
540   CeedChk(ierr);
541   CeedQFunctionField *qfinputfields, *qfoutputfields;
542   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
543   CeedChk(ierr);
544   CeedVector vec, lvec;
545   CeedInt numactivein = 0, numactiveout = 0;
546   CeedVector *activein = NULL;
547   CeedScalar *a, *tmp;
548   Ceed ceed;
549   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
550 
551   // Setup
552   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
553 
554   // Check for identity
555   if (impl->identityqf)
556     // LCOV_EXCL_START
557     return CeedError(ceed, 1, "Assembling identity qfunctions not supported");
558   // LCOV_EXCL_STOP
559 
560   // Input Evecs and Restriction
561   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
562                                          opinputfields, NULL, true, impl,
563                                          request); CeedChk(ierr);
564 
565   // Count number of active input fields
566   for (CeedInt i=0; i<numinputfields; i++) {
567     // Get input vector
568     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
569     // Check if active input
570     if (vec == CEED_VECTOR_ACTIVE) {
571       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
572       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
573       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
574       CeedChk(ierr);
575       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
576       for (CeedInt field=0; field<size; field++) {
577         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
578         CeedChk(ierr);
579         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
580                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
581         CeedChk(ierr);
582       }
583       numactivein += size;
584       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
585     }
586   }
587 
588   // Count number of active output fields
589   for (CeedInt i=0; i<numoutputfields; i++) {
590     // Get output vector
591     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
592     // Check if active output
593     if (vec == CEED_VECTOR_ACTIVE) {
594       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
595       numactiveout += size;
596     }
597   }
598 
599   // Check sizes
600   if (!numactivein || !numactiveout)
601     // LCOV_EXCL_START
602     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
603                      "and outputs");
604   // LCOV_EXCL_STOP
605 
606   // Setup lvec
607   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
608                           &lvec); CeedChk(ierr);
609   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
610 
611   // Create output restriction
612   CeedInt strides[3] = {1, Q, numactivein *numactiveout*Q};
613   ierr = CeedElemRestrictionCreateStrided(ceed, numelements, Q, numelements*Q,
614                                           numactivein*numactiveout, strides, rstr); CeedChk(ierr);
615   // Create assembled vector
616   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
617                           assembled); CeedChk(ierr);
618 
619   // Loop through elements
620   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
621     // Input basis apply
622     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
623                                           numinputfields, blksize, true, impl);
624     CeedChk(ierr);
625 
626     // Assemble QFunction
627     for (CeedInt in=0; in<numactivein; in++) {
628       // Set Inputs
629       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
630       if (numactivein > 1) {
631         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
632                                   0.0); CeedChk(ierr);
633       }
634       // Set Outputs
635       for (CeedInt out=0; out<numoutputfields; out++) {
636         // Get output vector
637         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
638         CeedChk(ierr);
639         // Check if active output
640         if (vec == CEED_VECTOR_ACTIVE) {
641           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
642                              CEED_USE_POINTER, a); CeedChk(ierr);
643           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
644           CeedChk(ierr);
645           a += size*Q*blksize; // Advance the pointer by the size of the output
646         }
647       }
648       // Apply QFunction
649       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
650       CeedChk(ierr);
651     }
652   }
653 
654   // Un-set output Qvecs to prevent accidental overwrite of Assembled
655   for (CeedInt out=0; out<numoutputfields; out++) {
656     // Get output vector
657     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
658     CeedChk(ierr);
659     // Check if active output
660     if (vec == CEED_VECTOR_ACTIVE) {
661       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
662                          NULL); CeedChk(ierr);
663     }
664   }
665 
666   // Restore input arrays
667   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
668          opinputfields, true, impl); CeedChk(ierr);
669 
670   // Output blocked restriction
671   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
672   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
673   CeedElemRestriction blkrstr;
674   ierr = CeedElemRestrictionCreateBlockedStrided(ceed, numelements, Q, blksize,
675          numelements*Q,
676          numactivein*numactiveout,
677          strides, &blkrstr);
678   CeedChk(ierr);
679   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled,
680                                   request); CeedChk(ierr);
681 
682   // Cleanup
683   for (CeedInt i=0; i<numactivein; i++) {
684     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
685   }
686   ierr = CeedFree(&activein); CeedChk(ierr);
687   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
688   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
689 
690   return 0;
691 }
692 
693 //------------------------------------------------------------------------------
694 // Operator Destroy
695 //------------------------------------------------------------------------------
696 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
697   int ierr;
698   CeedOperator_Blocked *impl;
699   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
700 
701   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
702     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
703     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
704   }
705   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
706   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
707   ierr = CeedFree(&impl->edata); CeedChk(ierr);
708   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
709 
710   for (CeedInt i=0; i<impl->numein; i++) {
711     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
712     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
713   }
714   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
715   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
716 
717   for (CeedInt i=0; i<impl->numeout; i++) {
718     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
719     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
720   }
721   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
722   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
723 
724   ierr = CeedFree(&impl); CeedChk(ierr);
725   return 0;
726 }
727 
728 //------------------------------------------------------------------------------
729 // Operator Create
730 //------------------------------------------------------------------------------
731 int CeedOperatorCreate_Blocked(CeedOperator op) {
732   int ierr;
733   Ceed ceed;
734   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
735   CeedOperator_Blocked *impl;
736 
737   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
738   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
739 
740   ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
741                                 CeedOperatorAssembleLinearQFunction_Blocked);
742   CeedChk(ierr);
743   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
744                                 CeedOperatorApply_Blocked); CeedChk(ierr);
745   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
746                                 CeedOperatorDestroy_Blocked); CeedChk(ierr);
747   return 0;
748 }
749 //------------------------------------------------------------------------------
750