xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision cfa59c5b0c41ee3c7ca5285b7ce2579e871e5f29)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed.h>
18 #include <ceed-backend.h>
19 #include <stdbool.h>
20 #include <stddef.h>
21 #include <stdint.h>
22 #include "ceed-blocked.h"
23 
24 //------------------------------------------------------------------------------
25 // Setup Input/Output Fields
26 //------------------------------------------------------------------------------
27 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
28     CeedOperator op, bool inOrOut,
29     CeedElemRestriction *blkrestr,
30     CeedVector *fullevecs, CeedVector *evecs,
31     CeedVector *qvecs, CeedInt starte,
32     CeedInt numfields, CeedInt Q) {
33   CeedInt dim, ierr, ncomp, size, P;
34   Ceed ceed;
35   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
36   CeedBasis basis;
37   CeedElemRestriction r;
38   CeedOperatorField *opfields;
39   CeedQFunctionField *qffields;
40   if (inOrOut) {
41     ierr = CeedOperatorGetFields(op, NULL, &opfields);
42     CeedChkBackend(ierr);
43     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
44     CeedChkBackend(ierr);
45   } else {
46     ierr = CeedOperatorGetFields(op, &opfields, NULL);
47     CeedChkBackend(ierr);
48     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
49     CeedChkBackend(ierr);
50   }
51   const CeedInt blksize = 8;
52 
53   // Loop over fields
54   for (CeedInt i=0; i<numfields; i++) {
55     CeedEvalMode emode;
56     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChkBackend(ierr);
57 
58     if (emode != CEED_EVAL_WEIGHT) {
59       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
60       CeedChkBackend(ierr);
61       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
62       CeedInt nelem, elemsize, lsize, compstride;
63       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChkBackend(ierr);
64       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChkBackend(ierr);
65       ierr = CeedElemRestrictionGetLVectorSize(r, &lsize); CeedChkBackend(ierr);
66       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChkBackend(ierr);
67 
68       bool strided;
69       ierr = CeedElemRestrictionIsStrided(r, &strided); CeedChkBackend(ierr);
70       if (strided) {
71         CeedInt strides[3];
72         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
73         ierr = CeedElemRestrictionCreateBlockedStrided(ceed, nelem, elemsize,
74                blksize, ncomp, lsize, strides, &blkrestr[i+starte]);
75         CeedChkBackend(ierr);
76       } else {
77         const CeedInt *offsets = NULL;
78         ierr = CeedElemRestrictionGetOffsets(r, CEED_MEM_HOST, &offsets);
79         CeedChkBackend(ierr);
80         ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChkBackend(ierr);
81         ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
82                                                 blksize, ncomp, compstride,
83                                                 lsize, CEED_MEM_HOST,
84                                                 CEED_COPY_VALUES, offsets,
85                                                 &blkrestr[i+starte]);
86         CeedChkBackend(ierr);
87         ierr = CeedElemRestrictionRestoreOffsets(r, &offsets); CeedChkBackend(ierr);
88       }
89       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
90                                              &fullevecs[i+starte]);
91       CeedChkBackend(ierr);
92     }
93 
94     switch(emode) {
95     case CEED_EVAL_NONE:
96       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChkBackend(ierr);
97       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChkBackend(ierr);
98       break;
99     case CEED_EVAL_INTERP:
100       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChkBackend(ierr);
101       ierr = CeedElemRestrictionGetElementSize(r, &P);
102       CeedChkBackend(ierr);
103       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChkBackend(ierr);
104       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChkBackend(ierr);
105       break;
106     case CEED_EVAL_GRAD:
107       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChkBackend(ierr);
108       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChkBackend(ierr);
109       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
110       ierr = CeedElemRestrictionGetElementSize(r, &P);
111       CeedChkBackend(ierr);
112       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]);
113       CeedChkBackend(ierr);
114       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChkBackend(ierr);
115       break;
116     case CEED_EVAL_WEIGHT: // Only on input fields
117       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChkBackend(ierr);
118       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChkBackend(ierr);
119       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
120                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]);
121       CeedChkBackend(ierr);
122 
123       break;
124     case CEED_EVAL_DIV:
125       break; // Not implemented
126     case CEED_EVAL_CURL:
127       break; // Not implemented
128     }
129   }
130   return CEED_ERROR_SUCCESS;
131 }
132 
133 //------------------------------------------------------------------------------
134 // Setup Operator
135 //------------------------------------------------------------------------------
136 static int CeedOperatorSetup_Blocked(CeedOperator op) {
137   int ierr;
138   bool setupdone;
139   ierr = CeedOperatorIsSetupDone(op, &setupdone); CeedChkBackend(ierr);
140   if (setupdone) return CEED_ERROR_SUCCESS;
141   Ceed ceed;
142   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
143   CeedOperator_Blocked *impl;
144   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
145   CeedQFunction qf;
146   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
147   CeedInt Q, numinputfields, numoutputfields;
148   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
149   ierr = CeedQFunctionIsIdentity(qf, &impl->identityqf); CeedChkBackend(ierr);
150   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
151   CeedChkBackend(ierr);
152   CeedOperatorField *opinputfields, *opoutputfields;
153   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
154   CeedChkBackend(ierr);
155   CeedQFunctionField *qfinputfields, *qfoutputfields;
156   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
157   CeedChkBackend(ierr);
158 
159   // Allocate
160   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
161   CeedChkBackend(ierr);
162   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
163   CeedChkBackend(ierr);
164   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
165   CeedChkBackend(ierr);
166 
167   ierr = CeedCalloc(16, &impl->inputstate); CeedChkBackend(ierr);
168   ierr = CeedCalloc(16, &impl->evecsin); CeedChkBackend(ierr);
169   ierr = CeedCalloc(16, &impl->evecsout); CeedChkBackend(ierr);
170   ierr = CeedCalloc(16, &impl->qvecsin); CeedChkBackend(ierr);
171   ierr = CeedCalloc(16, &impl->qvecsout); CeedChkBackend(ierr);
172 
173   impl->numein = numinputfields; impl->numeout = numoutputfields;
174 
175   // Set up infield and outfield pointer arrays
176   // Infields
177   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr,
178                                          impl->evecs, impl->evecsin,
179                                          impl->qvecsin, 0,
180                                          numinputfields, Q);
181   CeedChkBackend(ierr);
182   // Outfields
183   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr,
184                                          impl->evecs, impl->evecsout,
185                                          impl->qvecsout, numinputfields,
186                                          numoutputfields, Q);
187   CeedChkBackend(ierr);
188 
189   // Identity QFunctions
190   if (impl->identityqf) {
191     CeedEvalMode inmode, outmode;
192     CeedQFunctionField *infields, *outfields;
193     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChkBackend(ierr);
194 
195     for (CeedInt i=0; i<numinputfields; i++) {
196       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
197       CeedChkBackend(ierr);
198       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
199       CeedChkBackend(ierr);
200 
201       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChkBackend(ierr);
202       impl->qvecsout[i] = impl->qvecsin[i];
203       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChkBackend(ierr);
204     }
205   }
206 
207   ierr = CeedOperatorSetSetupDone(op); CeedChkBackend(ierr);
208 
209   return CEED_ERROR_SUCCESS;
210 }
211 
212 //------------------------------------------------------------------------------
213 // Setup Operator Inputs
214 //------------------------------------------------------------------------------
215 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields,
216     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
217     CeedVector invec, bool skipactive, CeedOperator_Blocked *impl,
218     CeedRequest *request) {
219   CeedInt ierr;
220   CeedEvalMode emode;
221   CeedVector vec;
222   uint64_t state;
223 
224   for (CeedInt i=0; i<numinputfields; i++) {
225     // Get input vector
226     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr);
227     if (vec == CEED_VECTOR_ACTIVE) {
228       if (skipactive)
229         continue;
230       else
231         vec = invec;
232     }
233 
234     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
235     CeedChkBackend(ierr);
236     if (emode == CEED_EVAL_WEIGHT) { // Skip
237     } else {
238       // Restrict
239       ierr = CeedVectorGetState(vec, &state); CeedChkBackend(ierr);
240       if (state != impl->inputstate[i] || vec == invec) {
241         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
242                                         vec, impl->evecs[i], request);
243         CeedChkBackend(ierr);
244         impl->inputstate[i] = state;
245       }
246       // Get evec
247       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
248                                     (const CeedScalar **) &impl->edata[i]);
249       CeedChkBackend(ierr);
250     }
251   }
252   return CEED_ERROR_SUCCESS;
253 }
254 
255 //------------------------------------------------------------------------------
256 // Input Basis Action
257 //------------------------------------------------------------------------------
258 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
259     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
260     CeedInt numinputfields, CeedInt blksize, bool skipactive,
261     CeedOperator_Blocked *impl) {
262   CeedInt ierr;
263   CeedInt dim, elemsize, size;
264   CeedElemRestriction Erestrict;
265   CeedEvalMode emode;
266   CeedBasis basis;
267 
268   for (CeedInt i=0; i<numinputfields; i++) {
269     // Skip active input
270     if (skipactive) {
271       CeedVector vec;
272       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr);
273       if (vec == CEED_VECTOR_ACTIVE)
274         continue;
275     }
276 
277     // Get elemsize, emode, size
278     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
279     CeedChkBackend(ierr);
280     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
281     CeedChkBackend(ierr);
282     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
283     CeedChkBackend(ierr);
284     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChkBackend(ierr);
285     // Basis action
286     switch(emode) {
287     case CEED_EVAL_NONE:
288       ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
289                                 CEED_USE_POINTER,
290                                 &impl->edata[i][e*Q*size]); CeedChkBackend(ierr);
291       break;
292     case CEED_EVAL_INTERP:
293       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
294       CeedChkBackend(ierr);
295       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
296                                 CEED_USE_POINTER,
297                                 &impl->edata[i][e*elemsize*size]);
298       CeedChkBackend(ierr);
299       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
300                             CEED_EVAL_INTERP, impl->evecsin[i],
301                             impl->qvecsin[i]); CeedChkBackend(ierr);
302       break;
303     case CEED_EVAL_GRAD:
304       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
305       CeedChkBackend(ierr);
306       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
307       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
308                                 CEED_USE_POINTER,
309                                 &impl->edata[i][e*elemsize*size/dim]);
310       CeedChkBackend(ierr);
311       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
312                             CEED_EVAL_GRAD, impl->evecsin[i],
313                             impl->qvecsin[i]); CeedChkBackend(ierr);
314       break;
315     case CEED_EVAL_WEIGHT:
316       break;  // No action
317     // LCOV_EXCL_START
318     case CEED_EVAL_DIV:
319     case CEED_EVAL_CURL: {
320       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
321       CeedChkBackend(ierr);
322       Ceed ceed;
323       ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr);
324       return CeedError(ceed, CEED_ERROR_BACKEND,
325                        "Ceed evaluation mode not implemented");
326       // LCOV_EXCL_STOP
327     }
328     }
329   }
330   return CEED_ERROR_SUCCESS;
331 }
332 
333 //------------------------------------------------------------------------------
334 // Output Basis Action
335 //------------------------------------------------------------------------------
336 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
337     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
338     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
339     CeedOperator op, CeedOperator_Blocked *impl) {
340   CeedInt ierr;
341   CeedInt dim, elemsize, size;
342   CeedElemRestriction Erestrict;
343   CeedEvalMode emode;
344   CeedBasis basis;
345 
346   for (CeedInt i=0; i<numoutputfields; i++) {
347     // Get elemsize, emode, size
348     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
349     CeedChkBackend(ierr);
350     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
351     CeedChkBackend(ierr);
352     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
353     CeedChkBackend(ierr);
354     ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
355     CeedChkBackend(ierr);
356     // Basis action
357     switch(emode) {
358     case CEED_EVAL_NONE:
359       break; // No action
360     case CEED_EVAL_INTERP:
361       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
362       CeedChkBackend(ierr);
363       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
364                                 CEED_USE_POINTER,
365                                 &impl->edata[i + numinputfields][e*elemsize*size]);
366       CeedChkBackend(ierr);
367       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
368                             CEED_EVAL_INTERP, impl->qvecsout[i],
369                             impl->evecsout[i]); CeedChkBackend(ierr);
370       break;
371     case CEED_EVAL_GRAD:
372       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
373       CeedChkBackend(ierr);
374       ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr);
375       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
376                                 CEED_USE_POINTER,
377                                 &impl->edata[i + numinputfields][e*elemsize*size/dim]);
378       CeedChkBackend(ierr);
379       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
380                             CEED_EVAL_GRAD, impl->qvecsout[i],
381                             impl->evecsout[i]); CeedChkBackend(ierr);
382       break;
383     // LCOV_EXCL_START
384     case CEED_EVAL_WEIGHT: {
385       Ceed ceed;
386       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
387       return CeedError(ceed, CEED_ERROR_BACKEND,
388                        "CEED_EVAL_WEIGHT cannot be an output "
389                        "evaluation mode");
390     }
391     case CEED_EVAL_DIV:
392     case CEED_EVAL_CURL: {
393       Ceed ceed;
394       ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
395       return CeedError(ceed, CEED_ERROR_BACKEND,
396                        "Ceed evaluation mode not implemented");
397       // LCOV_EXCL_STOP
398     }
399     }
400   }
401   return CEED_ERROR_SUCCESS;
402 }
403 
404 //------------------------------------------------------------------------------
405 // Restore Input Vectors
406 //------------------------------------------------------------------------------
407 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields,
408     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
409     bool skipactive, CeedOperator_Blocked *impl) {
410   CeedInt ierr;
411   CeedEvalMode emode;
412 
413   for (CeedInt i=0; i<numinputfields; i++) {
414     // Skip active inputs
415     if (skipactive) {
416       CeedVector vec;
417       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr);
418       if (vec == CEED_VECTOR_ACTIVE)
419         continue;
420     }
421     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
422     CeedChkBackend(ierr);
423     if (emode == CEED_EVAL_WEIGHT) { // Skip
424     } else {
425       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
426                                         (const CeedScalar **) &impl->edata[i]);
427       CeedChkBackend(ierr);
428     }
429   }
430   return CEED_ERROR_SUCCESS;
431 }
432 
433 //------------------------------------------------------------------------------
434 // Operator Apply
435 //------------------------------------------------------------------------------
436 static int CeedOperatorApplyAdd_Blocked(CeedOperator op, CeedVector invec,
437                                         CeedVector outvec,
438                                         CeedRequest *request) {
439   int ierr;
440   CeedOperator_Blocked *impl;
441   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
442   const CeedInt blksize = 8;
443   CeedInt Q, numinputfields, numoutputfields, numelements, size;
444   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChkBackend(ierr);
445   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
446   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
447   CeedQFunction qf;
448   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
449   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
450   CeedChkBackend(ierr);
451   CeedOperatorField *opinputfields, *opoutputfields;
452   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
453   CeedChkBackend(ierr);
454   CeedQFunctionField *qfinputfields, *qfoutputfields;
455   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
456   CeedChkBackend(ierr);
457   CeedEvalMode emode;
458   CeedVector vec;
459 
460   // Setup
461   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
462 
463   // Input Evecs and Restriction
464   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
465                                          opinputfields, invec, false, impl,
466                                          request); CeedChkBackend(ierr);
467 
468   // Output Evecs
469   for (CeedInt i=0; i<numoutputfields; i++) {
470     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
471                               &impl->edata[i + numinputfields]); CeedChkBackend(ierr);
472   }
473 
474   // Loop through elements
475   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
476     // Output pointers
477     for (CeedInt i=0; i<numoutputfields; i++) {
478       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
479       CeedChkBackend(ierr);
480       if (emode == CEED_EVAL_NONE) {
481         ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
482         CeedChkBackend(ierr);
483         ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
484                                   CEED_USE_POINTER,
485                                   &impl->edata[i + numinputfields][e*Q*size]);
486         CeedChkBackend(ierr);
487       }
488     }
489 
490     // Input basis apply
491     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
492                                           numinputfields, blksize, false, impl);
493     CeedChkBackend(ierr);
494 
495     // Q function
496     if (!impl->identityqf) {
497       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
498       CeedChkBackend(ierr);
499     }
500 
501     // Output basis apply
502     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields,
503                                            blksize, numinputfields,
504                                            numoutputfields, op, impl);
505     CeedChkBackend(ierr);
506   }
507 
508   // Output restriction
509   for (CeedInt i=0; i<numoutputfields; i++) {
510     // Restore evec
511     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
512                                   &impl->edata[i + numinputfields]); CeedChkBackend(ierr);
513     // Get output vector
514     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec);
515     CeedChkBackend(ierr);
516     // Active
517     if (vec == CEED_VECTOR_ACTIVE)
518       vec = outvec;
519     // Restrict
520     ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein],
521                                     CEED_TRANSPOSE, impl->evecs[i+impl->numein],
522                                     vec, request); CeedChkBackend(ierr);
523 
524   }
525 
526   // Restore input arrays
527   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
528          opinputfields, false, impl); CeedChkBackend(ierr);
529 
530   return CEED_ERROR_SUCCESS;
531 }
532 
533 //------------------------------------------------------------------------------
534 // Assemble Linear QFunction
535 //------------------------------------------------------------------------------
536 static int CeedOperatorLinearAssembleQFunction_Blocked(CeedOperator op,
537     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
538   int ierr;
539   CeedOperator_Blocked *impl;
540   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
541   const CeedInt blksize = 8;
542   CeedInt Q, numinputfields, numoutputfields, numelements, size;
543   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChkBackend(ierr);
544   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChkBackend(ierr);
545   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
546   CeedQFunction qf;
547   ierr = CeedOperatorGetQFunction(op, &qf); CeedChkBackend(ierr);
548   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
549   CeedChkBackend(ierr);
550   CeedOperatorField *opinputfields, *opoutputfields;
551   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
552   CeedChkBackend(ierr);
553   CeedQFunctionField *qfinputfields, *qfoutputfields;
554   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
555   CeedChkBackend(ierr);
556   CeedVector vec, lvec;
557   CeedInt numactivein = 0, numactiveout = 0;
558   CeedVector *activein = NULL;
559   CeedScalar *a, *tmp;
560   Ceed ceed;
561   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
562 
563   // Setup
564   ierr = CeedOperatorSetup_Blocked(op); CeedChkBackend(ierr);
565 
566   // Check for identity
567   if (impl->identityqf)
568     // LCOV_EXCL_START
569     return CeedError(ceed, CEED_ERROR_BACKEND,
570                      "Assembling identity qfunctions not supported");
571   // LCOV_EXCL_STOP
572 
573   // Input Evecs and Restriction
574   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
575                                          opinputfields, NULL, true, impl,
576                                          request); CeedChkBackend(ierr);
577 
578   // Count number of active input fields
579   for (CeedInt i=0; i<numinputfields; i++) {
580     // Get input vector
581     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChkBackend(ierr);
582     // Check if active input
583     if (vec == CEED_VECTOR_ACTIVE) {
584       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChkBackend(ierr);
585       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChkBackend(ierr);
586       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
587       CeedChkBackend(ierr);
588       ierr = CeedRealloc(numactivein + size, &activein); CeedChkBackend(ierr);
589       for (CeedInt field=0; field<size; field++) {
590         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
591         CeedChkBackend(ierr);
592         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
593                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
594         CeedChkBackend(ierr);
595       }
596       numactivein += size;
597       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChkBackend(ierr);
598     }
599   }
600 
601   // Count number of active output fields
602   for (CeedInt i=0; i<numoutputfields; i++) {
603     // Get output vector
604     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec);
605     CeedChkBackend(ierr);
606     // Check if active output
607     if (vec == CEED_VECTOR_ACTIVE) {
608       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
609       CeedChkBackend(ierr);
610       numactiveout += size;
611     }
612   }
613 
614   // Check sizes
615   if (!numactivein || !numactiveout)
616     // LCOV_EXCL_START
617     return CeedError(ceed, CEED_ERROR_BACKEND,
618                      "Cannot assemble QFunction without active inputs "
619                      "and outputs");
620   // LCOV_EXCL_STOP
621 
622   // Setup lvec
623   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
624                           &lvec); CeedChkBackend(ierr);
625   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChkBackend(ierr);
626 
627   // Create output restriction
628   CeedInt strides[3] = {1, Q, numactivein *numactiveout*Q};
629   ierr = CeedElemRestrictionCreateStrided(ceed, numelements, Q,
630                                           numactivein*numactiveout,
631                                           numactivein*numactiveout*numelements*Q,
632                                           strides, rstr); CeedChkBackend(ierr);
633   // Create assembled vector
634   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
635                           assembled); CeedChkBackend(ierr);
636 
637   // Loop through elements
638   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
639     // Input basis apply
640     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
641                                           numinputfields, blksize, true, impl);
642     CeedChkBackend(ierr);
643 
644     // Assemble QFunction
645     for (CeedInt in=0; in<numactivein; in++) {
646       // Set Inputs
647       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChkBackend(ierr);
648       if (numactivein > 1) {
649         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
650                                   0.0); CeedChkBackend(ierr);
651       }
652       // Set Outputs
653       for (CeedInt out=0; out<numoutputfields; out++) {
654         // Get output vector
655         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
656         CeedChkBackend(ierr);
657         // Check if active output
658         if (vec == CEED_VECTOR_ACTIVE) {
659           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
660                              CEED_USE_POINTER, a); CeedChkBackend(ierr);
661           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
662           CeedChkBackend(ierr);
663           a += size*Q*blksize; // Advance the pointer by the size of the output
664         }
665       }
666       // Apply QFunction
667       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
668       CeedChkBackend(ierr);
669     }
670   }
671 
672   // Un-set output Qvecs to prevent accidental overwrite of Assembled
673   for (CeedInt out=0; out<numoutputfields; out++) {
674     // Get output vector
675     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
676     CeedChkBackend(ierr);
677     // Check if active output
678     if (vec == CEED_VECTOR_ACTIVE) {
679       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
680                          NULL); CeedChkBackend(ierr);
681     }
682   }
683 
684   // Restore input arrays
685   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
686          opinputfields, true, impl); CeedChkBackend(ierr);
687 
688   // Output blocked restriction
689   ierr = CeedVectorRestoreArray(lvec, &a); CeedChkBackend(ierr);
690   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChkBackend(ierr);
691   CeedElemRestriction blkrstr;
692   ierr = CeedElemRestrictionCreateBlockedStrided(ceed, numelements, Q, blksize,
693          numactivein*numactiveout, numactivein*numactiveout*numelements*Q,
694          strides, &blkrstr); CeedChkBackend(ierr);
695   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled,
696                                   request); CeedChkBackend(ierr);
697 
698   // Cleanup
699   for (CeedInt i=0; i<numactivein; i++) {
700     ierr = CeedVectorDestroy(&activein[i]); CeedChkBackend(ierr);
701   }
702   ierr = CeedFree(&activein); CeedChkBackend(ierr);
703   ierr = CeedVectorDestroy(&lvec); CeedChkBackend(ierr);
704   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChkBackend(ierr);
705 
706   return CEED_ERROR_SUCCESS;
707 }
708 
709 //------------------------------------------------------------------------------
710 // Operator Destroy
711 //------------------------------------------------------------------------------
712 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
713   int ierr;
714   CeedOperator_Blocked *impl;
715   ierr = CeedOperatorGetData(op, &impl); CeedChkBackend(ierr);
716 
717   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
718     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChkBackend(ierr);
719     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChkBackend(ierr);
720   }
721   ierr = CeedFree(&impl->blkrestr); CeedChkBackend(ierr);
722   ierr = CeedFree(&impl->evecs); CeedChkBackend(ierr);
723   ierr = CeedFree(&impl->edata); CeedChkBackend(ierr);
724   ierr = CeedFree(&impl->inputstate); CeedChkBackend(ierr);
725 
726   for (CeedInt i=0; i<impl->numein; i++) {
727     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChkBackend(ierr);
728     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChkBackend(ierr);
729   }
730   ierr = CeedFree(&impl->evecsin); CeedChkBackend(ierr);
731   ierr = CeedFree(&impl->qvecsin); CeedChkBackend(ierr);
732 
733   for (CeedInt i=0; i<impl->numeout; i++) {
734     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChkBackend(ierr);
735     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChkBackend(ierr);
736   }
737   ierr = CeedFree(&impl->evecsout); CeedChkBackend(ierr);
738   ierr = CeedFree(&impl->qvecsout); CeedChkBackend(ierr);
739 
740   ierr = CeedFree(&impl); CeedChkBackend(ierr);
741   return CEED_ERROR_SUCCESS;
742 }
743 
744 //------------------------------------------------------------------------------
745 // Operator Create
746 //------------------------------------------------------------------------------
747 int CeedOperatorCreate_Blocked(CeedOperator op) {
748   int ierr;
749   Ceed ceed;
750   ierr = CeedOperatorGetCeed(op, &ceed); CeedChkBackend(ierr);
751   CeedOperator_Blocked *impl;
752 
753   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
754   ierr = CeedOperatorSetData(op, impl); CeedChkBackend(ierr);
755 
756   ierr = CeedSetBackendFunction(ceed, "Operator", op, "LinearAssembleQFunction",
757                                 CeedOperatorLinearAssembleQFunction_Blocked);
758   CeedChkBackend(ierr);
759   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
760                                 CeedOperatorApplyAdd_Blocked); CeedChkBackend(ierr);
761   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
762                                 CeedOperatorDestroy_Blocked); CeedChkBackend(ierr);
763   return CEED_ERROR_SUCCESS;
764 }
765 //------------------------------------------------------------------------------
766