xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision a7b7f929ab476a681f8a846f95c06992dbac3dd3)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-blocked.h"
18 #include "../ref/ceed-ref.h"
19 
20 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
21   int ierr;
22   CeedOperator_Blocked *impl;
23   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
24 
25   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
26     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
27     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
28   }
29   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
30   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
31   ierr = CeedFree(&impl->edata); CeedChk(ierr);
32   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
33 
34   for (CeedInt i=0; i<impl->numein; i++) {
35     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
36     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
37   }
38   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
39   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
40 
41   for (CeedInt i=0; i<impl->numeout; i++) {
42     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
43     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
44   }
45   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
46   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
47 
48   ierr = CeedFree(&impl); CeedChk(ierr);
49   return 0;
50 }
51 
52 /*
53   Setup infields or outfields
54  */
55 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
56     CeedOperator op, bool inOrOut,
57     CeedElemRestriction *blkrestr,
58     CeedVector *fullevecs, CeedVector *evecs,
59     CeedVector *qvecs, CeedInt starte,
60     CeedInt numfields, CeedInt Q) {
61   CeedInt dim, ierr, ncomp, size, P;
62   Ceed ceed;
63   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
64   CeedBasis basis;
65   CeedElemRestriction r;
66   CeedOperatorField *opfields;
67   CeedQFunctionField *qffields;
68   if (inOrOut) {
69     ierr = CeedOperatorGetFields(op, NULL, &opfields);
70     CeedChk(ierr);
71     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
72     CeedChk(ierr);
73   } else {
74     ierr = CeedOperatorGetFields(op, &opfields, NULL);
75     CeedChk(ierr);
76     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
77     CeedChk(ierr);
78   }
79   const CeedInt blksize = 8;
80 
81   // Loop over fields
82   for (CeedInt i=0; i<numfields; i++) {
83     CeedEvalMode emode;
84     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
85 
86     if (emode != CEED_EVAL_WEIGHT) {
87       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
88       CeedChk(ierr);
89       CeedElemRestriction_Ref *data;
90       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
91       Ceed ceed;
92       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
93       CeedInt nelem, elemsize, nnodes;
94       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
95       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
96       ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
97       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
98       ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
99                                               blksize, nnodes, ncomp,
100                                               CEED_MEM_HOST, CEED_COPY_VALUES,
101                                               data->indices, &blkrestr[i+starte]);
102       CeedChk(ierr);
103       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
104                                              &fullevecs[i+starte]);
105       CeedChk(ierr);
106     }
107 
108     switch(emode) {
109     case CEED_EVAL_NONE:
110       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
111       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
112       break;
113     case CEED_EVAL_INTERP:
114       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
115       ierr = CeedElemRestrictionGetElementSize(r, &P);
116       CeedChk(ierr);
117       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
118       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
119       break;
120     case CEED_EVAL_GRAD:
121       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
122       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
123       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
124       ierr = CeedElemRestrictionGetElementSize(r, &P);
125       CeedChk(ierr);
126       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
127       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
128       break;
129     case CEED_EVAL_WEIGHT: // Only on input fields
130       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
131       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
132       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
133                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]);
134       CeedChk(ierr);
135 
136       break;
137     case CEED_EVAL_DIV:
138       break; // Not implemented
139     case CEED_EVAL_CURL:
140       break; // Not implemented
141     }
142   }
143   return 0;
144 }
145 
146 /*
147   CeedOperator needs to connect all the named fields (be they active or passive)
148   to the named inputs and outputs of its CeedQFunction.
149  */
150 static int CeedOperatorSetup_Blocked(CeedOperator op) {
151   int ierr;
152   bool setupdone;
153   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
154   if (setupdone) return 0;
155   Ceed ceed;
156   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
157   CeedOperator_Blocked *impl;
158   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
159   CeedQFunction qf;
160   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
161   CeedInt Q, numinputfields, numoutputfields;
162   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
163   ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr);
164   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
165   CeedChk(ierr);
166   CeedOperatorField *opinputfields, *opoutputfields;
167   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
168   CeedChk(ierr);
169   CeedQFunctionField *qfinputfields, *qfoutputfields;
170   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
171   CeedChk(ierr);
172 
173   // Allocate
174   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
175   CeedChk(ierr);
176   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
177   CeedChk(ierr);
178   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
179   CeedChk(ierr);
180 
181   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
182   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
183   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
184   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
185   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
186 
187   impl->numein = numinputfields; impl->numeout = numoutputfields;
188 
189   // Set up infield and outfield pointer arrays
190   // Infields
191   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr,
192                                          impl->evecs, impl->evecsin,
193                                          impl->qvecsin, 0,
194                                          numinputfields, Q);
195   CeedChk(ierr);
196   // Outfields
197   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr,
198                                          impl->evecs, impl->evecsout,
199                                          impl->qvecsout, numinputfields,
200                                          numoutputfields, Q);
201   CeedChk(ierr);
202 
203   // Identity QFunctions
204   if (impl->identityqf) {
205     CeedEvalMode inmode, outmode;
206     CeedQFunctionField *infields, *outfields;
207     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
208 
209     for (CeedInt i=0; i<numinputfields; i++) {
210       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
211       CeedChk(ierr);
212       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
213       CeedChk(ierr);
214 
215       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
216       impl->qvecsout[i] = impl->qvecsin[i];
217       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr);
218     }
219   }
220 
221   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
222 
223   return 0;
224 }
225 
226 // Setup Input fields
227 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields,
228     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
229     CeedVector invec, bool skipactive, CeedOperator_Blocked *impl,
230     CeedRequest *request) {
231   CeedInt ierr;
232   CeedEvalMode emode;
233   CeedVector vec;
234   CeedTransposeMode lmode;
235   uint64_t state;
236 
237   for (CeedInt i=0; i<numinputfields; i++) {
238     // Get input vector
239     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
240     if (vec == CEED_VECTOR_ACTIVE) {
241       if (skipactive)
242         continue;
243       else
244         vec = invec;
245     }
246 
247     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
248     CeedChk(ierr);
249     if (emode == CEED_EVAL_WEIGHT) { // Skip
250     } else {
251       // Restrict
252       ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
253       if (state != impl->inputstate[i] || vec == invec) {
254         ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
255         CeedChk(ierr);
256         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
257                                         lmode, vec, impl->evecs[i],
258                                         request); CeedChk(ierr); CeedChk(ierr);
259         impl->inputstate[i] = state;
260       }
261       // Get evec
262       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
263                                     (const CeedScalar **) &impl->edata[i]);
264       CeedChk(ierr);
265     }
266   }
267   return 0;
268 }
269 
270 // Input basis action
271 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
272     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
273     CeedInt numinputfields, CeedInt blksize, bool skipactive,
274     CeedOperator_Blocked *impl) {
275   CeedInt ierr;
276   CeedInt dim, elemsize, size;
277   CeedElemRestriction Erestrict;
278   CeedEvalMode emode;
279   CeedBasis basis;
280 
281   for (CeedInt i=0; i<numinputfields; i++) {
282     // Skip active input
283     if (skipactive) {
284       CeedVector vec;
285       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
286       if (vec == CEED_VECTOR_ACTIVE)
287         continue;
288     }
289 
290     // Get elemsize, emode, size
291     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
292     CeedChk(ierr);
293     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
294     CeedChk(ierr);
295     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
296     CeedChk(ierr);
297     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
298     // Basis action
299     switch(emode) {
300     case CEED_EVAL_NONE:
301       ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
302                                 CEED_USE_POINTER,
303                                 &impl->edata[i][e*Q*size]); CeedChk(ierr);
304       break;
305     case CEED_EVAL_INTERP:
306       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
307       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
308                                 CEED_USE_POINTER,
309                                 &impl->edata[i][e*elemsize*size]);
310       CeedChk(ierr);
311       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
312                             CEED_EVAL_INTERP, impl->evecsin[i],
313                             impl->qvecsin[i]); CeedChk(ierr);
314       break;
315     case CEED_EVAL_GRAD:
316       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
317       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
318       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
319                                 CEED_USE_POINTER,
320                                 &impl->edata[i][e*elemsize*size/dim]);
321       CeedChk(ierr);
322       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
323                             CEED_EVAL_GRAD, impl->evecsin[i],
324                             impl->qvecsin[i]); CeedChk(ierr);
325       break;
326     case CEED_EVAL_WEIGHT:
327       break;  // No action
328     case CEED_EVAL_DIV:
329     case CEED_EVAL_CURL: {
330       // LCOV_EXCL_START
331       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
332       CeedChk(ierr);
333       Ceed ceed;
334       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
335       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
336       // LCOV_EXCL_STOP
337       break; // Not implemented
338     }
339     }
340   }
341   return 0;
342 }
343 
344 // Output basis action
345 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
346     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
347     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
348     CeedOperator op, CeedOperator_Blocked *impl) {
349   CeedInt ierr;
350   CeedInt dim, elemsize, size;
351   CeedElemRestriction Erestrict;
352   CeedEvalMode emode;
353   CeedBasis basis;
354 
355   for (CeedInt i=0; i<numoutputfields; i++) {
356     // Get elemsize, emode, size
357     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
358     CeedChk(ierr);
359     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
360     CeedChk(ierr);
361     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
362     CeedChk(ierr);
363     ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
364     // Basis action
365     switch(emode) {
366     case CEED_EVAL_NONE:
367       break; // No action
368     case CEED_EVAL_INTERP:
369       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
370       CeedChk(ierr);
371       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
372                                 CEED_USE_POINTER,
373                                 &impl->edata[i + numinputfields][e*elemsize*size]);
374       CeedChk(ierr);
375       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
376                             CEED_EVAL_INTERP, impl->qvecsout[i],
377                             impl->evecsout[i]); CeedChk(ierr);
378       break;
379     case CEED_EVAL_GRAD:
380       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
381       CeedChk(ierr);
382       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
383       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
384                                 CEED_USE_POINTER,
385                                 &impl->edata[i + numinputfields][e*elemsize*size/dim]);
386       CeedChk(ierr);
387       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
388                             CEED_EVAL_GRAD, impl->qvecsout[i],
389                             impl->evecsout[i]); CeedChk(ierr);
390       break;
391     case CEED_EVAL_WEIGHT: {
392       // LCOV_EXCL_START
393       Ceed ceed;
394       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
395       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
396                        "evaluation mode");
397       // LCOV_EXCL_STOP
398       break; // Should not occur
399     }
400     case CEED_EVAL_DIV:
401     case CEED_EVAL_CURL: {
402       // LCOV_EXCL_START
403       Ceed ceed;
404       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
405       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
406       // LCOV_EXCL_STOP
407       break; // Not implemented
408     }
409     }
410   }
411   return 0;
412 }
413 
414 // Restore Inputs
415 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields,
416     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
417     bool skipactive, CeedOperator_Blocked *impl) {
418   CeedInt ierr;
419   CeedEvalMode emode;
420 
421   for (CeedInt i=0; i<numinputfields; i++) {
422     // Skip active inputs
423     if (skipactive) {
424       CeedVector vec;
425       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
426       if (vec == CEED_VECTOR_ACTIVE)
427         continue;
428     }
429     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
430     CeedChk(ierr);
431     if (emode == CEED_EVAL_WEIGHT) { // Skip
432     } else {
433       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
434                                         (const CeedScalar **) &impl->edata[i]);
435       CeedChk(ierr);
436     }
437   }
438   return 0;
439 }
440 
441 // Apply Ceed Operator
442 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec,
443                                      CeedVector outvec,
444                                      CeedRequest *request) {
445   int ierr;
446   CeedOperator_Blocked *impl;
447   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
448   const CeedInt blksize = 8;
449   CeedInt Q, numinputfields, numoutputfields, numelements, size;
450   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
451   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
452   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
453   CeedQFunction qf;
454   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
455   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
456   CeedChk(ierr);
457   CeedTransposeMode lmode;
458   CeedOperatorField *opinputfields, *opoutputfields;
459   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
460   CeedChk(ierr);
461   CeedQFunctionField *qfinputfields, *qfoutputfields;
462   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
463   CeedChk(ierr);
464   CeedEvalMode emode;
465   CeedVector vec;
466 
467   // Setup
468   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
469 
470   // Input Evecs and Restriction
471   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
472                                          opinputfields, invec, false, impl,
473                                          request); CeedChk(ierr);
474 
475   // Output Evecs
476   for (CeedInt i=0; i<numoutputfields; i++) {
477     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
478                               &impl->edata[i + numinputfields]); CeedChk(ierr);
479   }
480 
481   // Loop through elements
482   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
483     // Output pointers
484     for (CeedInt i=0; i<numoutputfields; i++) {
485       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
486       CeedChk(ierr);
487       if (emode == CEED_EVAL_NONE) {
488         ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
489         CeedChk(ierr);
490         ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
491                                   CEED_USE_POINTER,
492                                   &impl->edata[i + numinputfields][e*Q*size]);
493         CeedChk(ierr);
494       }
495     }
496 
497     // Input basis apply
498     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
499                                           numinputfields, blksize, false, impl);
500     CeedChk(ierr);
501 
502     // Q function
503     if (!impl->identityqf) {
504       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
505       CeedChk(ierr);
506     }
507 
508     // Output basis apply
509     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields,
510                                            blksize, numinputfields,
511                                            numoutputfields, op, impl);
512     CeedChk(ierr);
513   }
514 
515   // Zero lvecs
516   for (CeedInt i=0; i<numoutputfields; i++) {
517     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
518     if (vec == CEED_VECTOR_ACTIVE) {
519       if (!impl->add) {
520         vec = outvec;
521         ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
522       }
523     } else {
524       ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
525     }
526   }
527   impl->add = false;
528 
529   // Output restriction
530   for (CeedInt i=0; i<numoutputfields; i++) {
531     // Restore evec
532     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
533                                   &impl->edata[i + numinputfields]); CeedChk(ierr);
534     // Get output vector
535     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
536     // Active
537     if (vec == CEED_VECTOR_ACTIVE)
538       vec = outvec;
539     // Restrict
540     ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); CeedChk(ierr);
541     ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE,
542                                     lmode, impl->evecs[i+impl->numein], vec,
543                                     request); CeedChk(ierr);
544 
545   }
546 
547   // Restore input arrays
548   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
549          opinputfields, false, impl); CeedChk(ierr);
550 
551   return 0;
552 }
553 
554 // Assemble Linear QFunction
555 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op,
556     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
557   int ierr;
558   CeedOperator_Blocked *impl;
559   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
560   const CeedInt blksize = 8;
561   CeedInt Q, numinputfields, numoutputfields, numelements, size;
562   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
563   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
564   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
565   CeedQFunction qf;
566   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
567   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
568   CeedChk(ierr);
569   CeedOperatorField *opinputfields, *opoutputfields;
570   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
571   CeedChk(ierr);
572   CeedQFunctionField *qfinputfields, *qfoutputfields;
573   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
574   CeedChk(ierr);
575   CeedVector vec, lvec;
576   CeedInt numactivein = 0, numactiveout = 0;
577   CeedVector *activein = NULL;
578   CeedScalar *a, *tmp;
579   Ceed ceed;
580   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
581 
582   // Setup
583   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
584 
585   // Check for identity
586   if (impl->identityqf)
587     // LCOV_EXCL_START
588     return CeedError(ceed, 1, "Assembling identity qfunctions not supported");
589   // LCOV_EXCL_STOP
590 
591   // Input Evecs and Restriction
592   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
593                                          opinputfields, NULL, true, impl,
594                                          request); CeedChk(ierr);
595 
596   // Count number of active input fields
597   for (CeedInt i=0; i<numinputfields; i++) {
598     // Get input vector
599     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
600     // Check if active input
601     if (vec == CEED_VECTOR_ACTIVE) {
602       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
603       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
604       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
605       CeedChk(ierr);
606       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
607       for (CeedInt field=0; field<size; field++) {
608         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
609         CeedChk(ierr);
610         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
611                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
612         CeedChk(ierr);
613       }
614       numactivein += size;
615       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
616     }
617   }
618 
619   // Count number of active output fields
620   for (CeedInt i=0; i<numoutputfields; i++) {
621     // Get output vector
622     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
623     // Check if active output
624     if (vec == CEED_VECTOR_ACTIVE) {
625       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
626       numactiveout += size;
627     }
628   }
629 
630   // Check sizes
631   if (!numactivein || !numactiveout)
632     // LCOV_EXCL_START
633     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
634                      "and outputs");
635   // LCOV_EXCL_STOP
636 
637   // Setup lvec
638   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
639                           &lvec); CeedChk(ierr);
640   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
641 
642   // Create output restriction
643   ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q,
644          numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr);
645   // Create assembled vector
646   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
647                           assembled); CeedChk(ierr);
648 
649   // Loop through elements
650   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
651     // Input basis apply
652     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
653                                           numinputfields, blksize, true, impl);
654     CeedChk(ierr);
655 
656     // Assemble QFunction
657     for (CeedInt in=0; in<numactivein; in++) {
658       // Set Inputs
659       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
660       if (numactivein > 1) {
661         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
662                                   0.0); CeedChk(ierr);
663       }
664       // Set Outputs
665       for (CeedInt out=0; out<numoutputfields; out++) {
666         // Get output vector
667         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
668         CeedChk(ierr);
669         // Check if active output
670         if (vec == CEED_VECTOR_ACTIVE) {
671           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
672                              CEED_USE_POINTER, a); CeedChk(ierr);
673           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
674           CeedChk(ierr);
675           a += size*Q*blksize; // Advance the pointer by the size of the output
676         }
677       }
678       // Apply QFunction
679       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
680       CeedChk(ierr);
681     }
682   }
683 
684   // Un-set output Qvecs to prevent accidental overwrite of Assembled
685   for (CeedInt out=0; out<numoutputfields; out++) {
686     // Get output vector
687     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
688     CeedChk(ierr);
689     // Check if active output
690     if (vec == CEED_VECTOR_ACTIVE) {
691       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
692                          NULL); CeedChk(ierr);
693     }
694   }
695 
696   // Restore input arrays
697   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
698          opinputfields, true, impl); CeedChk(ierr);
699 
700   // Output blocked restriction
701   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
702   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
703   CeedElemRestriction blkrstr;
704   ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize,
705                                           numelements*Q,
706                                           numactivein*numactiveout,
707                                           CEED_MEM_HOST, CEED_COPY_VALUES,
708                                           NULL, &blkrstr); CeedChk(ierr);
709   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE,
710                                   lvec, *assembled, request); CeedChk(ierr);
711 
712   // Cleanup
713   for (CeedInt i=0; i<numactivein; i++) {
714     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
715   }
716   ierr = CeedFree(&activein); CeedChk(ierr);
717   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
718   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
719 
720   return 0;
721 }
722 
723 int CeedOperatorCreate_Blocked(CeedOperator op) {
724   int ierr;
725   Ceed ceed;
726   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
727   CeedOperator_Blocked *impl;
728 
729   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
730   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
731 
732   ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
733                                 CeedOperatorAssembleLinearQFunction_Blocked);
734   CeedChk(ierr);
735   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply",
736                                 CeedOperatorApply_Blocked); CeedChk(ierr);
737   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
738                                 CeedOperatorDestroy_Blocked); CeedChk(ierr);
739   return 0;
740 }
741