xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision ed768a3cf06a2a740bbfe20134286531e89dc9b1)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-blocked.h"
18 #include "../ref/ceed-ref.h"
19 
20 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
21   int ierr;
22   CeedOperator_Blocked *impl;
23   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
24 
25   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
26     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
27     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
28   }
29   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
30   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
31   ierr = CeedFree(&impl->edata); CeedChk(ierr);
32   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
33 
34   for (CeedInt i=0; i<impl->numein; i++) {
35     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
36     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
37   }
38   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
39   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
40 
41   for (CeedInt i=0; i<impl->numeout; i++) {
42     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
43     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
44   }
45   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
46   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
47 
48   ierr = CeedFree(&impl); CeedChk(ierr);
49   return 0;
50 }
51 
52 /*
53   Setup infields or outfields
54  */
55 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
56     CeedOperator op, bool inOrOut,
57     CeedElemRestriction *blkrestr,
58     CeedVector *fullevecs, CeedVector *evecs,
59     CeedVector *qvecs, CeedInt starte,
60     CeedInt numfields, CeedInt Q) {
61   CeedInt dim, ierr, ncomp, size, P;
62   Ceed ceed;
63   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
64   CeedBasis basis;
65   CeedElemRestriction r;
66   CeedOperatorField *opfields;
67   CeedQFunctionField *qffields;
68   if (inOrOut) {
69     ierr = CeedOperatorGetFields(op, NULL, &opfields);
70     CeedChk(ierr);
71     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
72     CeedChk(ierr);
73   } else {
74     ierr = CeedOperatorGetFields(op, &opfields, NULL);
75     CeedChk(ierr);
76     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
77     CeedChk(ierr);
78   }
79   const CeedInt blksize = 8;
80 
81   // Loop over fields
82   for (CeedInt i=0; i<numfields; i++) {
83     CeedEvalMode emode;
84     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
85 
86     if (emode != CEED_EVAL_WEIGHT) {
87       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
88       CeedChk(ierr);
89       CeedElemRestriction_Ref *data;
90       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
91       Ceed ceed;
92       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
93       CeedInt nelem, elemsize, nnodes;
94       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
95       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
96       ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
97       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
98       ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
99                                               blksize, nnodes, ncomp,
100                                               CEED_MEM_HOST, CEED_COPY_VALUES,
101                                               data->indices, &blkrestr[i+starte]);
102       CeedChk(ierr);
103       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
104                                              &fullevecs[i+starte]);
105       CeedChk(ierr);
106     }
107 
108     switch(emode) {
109     case CEED_EVAL_NONE:
110       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
111       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
112       break;
113     case CEED_EVAL_INTERP:
114       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
115       ierr = CeedElemRestrictionGetElementSize(r, &P);
116       CeedChk(ierr);
117       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
118       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
119       break;
120     case CEED_EVAL_GRAD:
121       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
122       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
123       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
124       ierr = CeedElemRestrictionGetElementSize(r, &P);
125       CeedChk(ierr);
126       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
127       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
128       break;
129     case CEED_EVAL_WEIGHT: // Only on input fields
130       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
131       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
132       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
133                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]);
134       CeedChk(ierr);
135 
136       break;
137     case CEED_EVAL_DIV:
138       break; // Not implemented
139     case CEED_EVAL_CURL:
140       break; // Not implemented
141     }
142   }
143   return 0;
144 }
145 
146 /*
147   CeedOperator needs to connect all the named fields (be they active or passive)
148   to the named inputs and outputs of its CeedQFunction.
149  */
150 static int CeedOperatorSetup_Blocked(CeedOperator op) {
151   int ierr;
152   bool setupdone;
153   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
154   if (setupdone) return 0;
155   Ceed ceed;
156   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
157   CeedOperator_Blocked *impl;
158   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
159   CeedQFunction qf;
160   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
161   CeedInt Q, numinputfields, numoutputfields;
162   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
163   ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr);
164   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
165   CeedChk(ierr);
166   CeedOperatorField *opinputfields, *opoutputfields;
167   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
168   CeedChk(ierr);
169   CeedQFunctionField *qfinputfields, *qfoutputfields;
170   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
171   CeedChk(ierr);
172 
173   // Allocate
174   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
175   CeedChk(ierr);
176   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
177   CeedChk(ierr);
178   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
179   CeedChk(ierr);
180 
181   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
182   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
183   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
184   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
185   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
186 
187   impl->numein = numinputfields; impl->numeout = numoutputfields;
188 
189   // Set up infield and outfield pointer arrays
190   // Infields
191   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr,
192                                          impl->evecs, impl->evecsin,
193                                          impl->qvecsin, 0,
194                                          numinputfields, Q);
195   CeedChk(ierr);
196   // Outfields
197   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr,
198                                          impl->evecs, impl->evecsout,
199                                          impl->qvecsout, numinputfields,
200                                          numoutputfields, Q);
201   CeedChk(ierr);
202 
203   // Identity QFunctions
204   if (impl->identityqf) {
205     CeedEvalMode inmode, outmode;
206     CeedQFunctionField *infields, *outfields;
207     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
208 
209     for (CeedInt i=0; i<numinputfields; i++) {
210       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
211       CeedChk(ierr);
212       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
213       CeedChk(ierr);
214 
215       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
216       impl->qvecsout[i] = impl->qvecsin[i];
217       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr);
218     }
219   }
220 
221   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
222 
223   return 0;
224 }
225 
226 // Setup Input fields
227 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields,
228     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
229     CeedVector invec, bool skipactive, CeedOperator_Blocked *impl,
230     CeedRequest *request) {
231   CeedInt ierr;
232   CeedEvalMode emode;
233   CeedVector vec;
234   CeedTransposeMode lmode;
235   uint64_t state;
236 
237   for (CeedInt i=0; i<numinputfields; i++) {
238     // Get input vector
239     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
240     if (vec == CEED_VECTOR_ACTIVE) {
241       if (skipactive)
242         continue;
243       else
244         vec = invec;
245     }
246 
247     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
248     CeedChk(ierr);
249     if (emode == CEED_EVAL_WEIGHT) { // Skip
250     } else {
251       // Restrict
252       ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
253       if (state != impl->inputstate[i] || vec == invec) {
254         ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
255         CeedChk(ierr);
256         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
257                                         lmode, vec, impl->evecs[i],
258                                         request); CeedChk(ierr); CeedChk(ierr);
259         impl->inputstate[i] = state;
260       }
261       // Get evec
262       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
263                                     (const CeedScalar **) &impl->edata[i]);
264       CeedChk(ierr);
265     }
266   }
267   return 0;
268 }
269 
270 // Input basis action
271 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
272     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
273     CeedInt numinputfields, CeedInt blksize, bool skipactive,
274     CeedOperator_Blocked *impl) {
275   CeedInt ierr;
276   CeedInt dim, elemsize, size;
277   CeedElemRestriction Erestrict;
278   CeedEvalMode emode;
279   CeedBasis basis;
280 
281   for (CeedInt i=0; i<numinputfields; i++) {
282     // Skip active input
283     if (skipactive) {
284       CeedVector vec;
285       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
286       if (vec == CEED_VECTOR_ACTIVE)
287         continue;
288     }
289 
290     // Get elemsize, emode, size
291     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
292     CeedChk(ierr);
293     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
294     CeedChk(ierr);
295     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
296     CeedChk(ierr);
297     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
298     // Basis action
299     switch(emode) {
300     case CEED_EVAL_NONE:
301       ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
302                                 CEED_USE_POINTER,
303                                 &impl->edata[i][e*Q*size]); CeedChk(ierr);
304       break;
305     case CEED_EVAL_INTERP:
306       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
307       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
308                                 CEED_USE_POINTER,
309                                 &impl->edata[i][e*elemsize*size]);
310       CeedChk(ierr);
311       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
312                             CEED_EVAL_INTERP, impl->evecsin[i],
313                             impl->qvecsin[i]); CeedChk(ierr);
314       break;
315     case CEED_EVAL_GRAD:
316       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
317       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
318       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
319                                 CEED_USE_POINTER,
320                                 &impl->edata[i][e*elemsize*size/dim]);
321       CeedChk(ierr);
322       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
323                             CEED_EVAL_GRAD, impl->evecsin[i],
324                             impl->qvecsin[i]); CeedChk(ierr);
325       break;
326     case CEED_EVAL_WEIGHT:
327       break;  // No action
328     case CEED_EVAL_DIV:
329     case CEED_EVAL_CURL: {
330       // LCOV_EXCL_START
331       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
332       CeedChk(ierr);
333       Ceed ceed;
334       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
335       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
336       // LCOV_EXCL_STOP
337       break; // Not implemented
338     }
339     }
340   }
341   return 0;
342 }
343 
344 // Output basis action
345 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
346     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
347     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
348     CeedOperator op, CeedOperator_Blocked *impl) {
349   CeedInt ierr;
350   CeedInt dim, elemsize, size;
351   CeedElemRestriction Erestrict;
352   CeedEvalMode emode;
353   CeedBasis basis;
354 
355   for (CeedInt i=0; i<numoutputfields; i++) {
356     // Get elemsize, emode, size
357     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
358     CeedChk(ierr);
359     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
360     CeedChk(ierr);
361     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
362     CeedChk(ierr);
363     ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
364     // Basis action
365     switch(emode) {
366     case CEED_EVAL_NONE:
367       break; // No action
368     case CEED_EVAL_INTERP:
369       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
370       CeedChk(ierr);
371       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
372                                 CEED_USE_POINTER,
373                                 &impl->edata[i + numinputfields][e*elemsize*size]);
374       CeedChk(ierr);
375       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
376                             CEED_EVAL_INTERP, impl->qvecsout[i],
377                             impl->evecsout[i]); CeedChk(ierr);
378       break;
379     case CEED_EVAL_GRAD:
380       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
381       CeedChk(ierr);
382       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
383       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
384                                 CEED_USE_POINTER,
385                                 &impl->edata[i + numinputfields][e*elemsize*size/dim]);
386       CeedChk(ierr);
387       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
388                             CEED_EVAL_GRAD, impl->qvecsout[i],
389                             impl->evecsout[i]); CeedChk(ierr);
390       break;
391     case CEED_EVAL_WEIGHT: {
392       // LCOV_EXCL_START
393       Ceed ceed;
394       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
395       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
396                        "evaluation mode");
397       // LCOV_EXCL_STOP
398       break; // Should not occur
399     }
400     case CEED_EVAL_DIV:
401     case CEED_EVAL_CURL: {
402       // LCOV_EXCL_START
403       Ceed ceed;
404       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
405       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
406       // LCOV_EXCL_STOP
407       break; // Not implemented
408     }
409     }
410   }
411   return 0;
412 }
413 
414 // Restore Inputs
415 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields,
416     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
417     bool skipactive, CeedOperator_Blocked *impl) {
418   CeedInt ierr;
419   CeedEvalMode emode;
420 
421   for (CeedInt i=0; i<numinputfields; i++) {
422     // Skip active inputs
423     if (skipactive) {
424       CeedVector vec;
425       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
426       if (vec == CEED_VECTOR_ACTIVE)
427         continue;
428     }
429     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
430     CeedChk(ierr);
431     if (emode == CEED_EVAL_WEIGHT) { // Skip
432     } else {
433       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
434                                         (const CeedScalar **) &impl->edata[i]);
435       CeedChk(ierr);
436     }
437   }
438   return 0;
439 }
440 
441 // Apply Ceed Operator
442 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec,
443                                      CeedVector outvec,
444                                      CeedRequest *request) {
445   int ierr;
446   CeedOperator_Blocked *impl;
447   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
448   const CeedInt blksize = 8;
449   CeedInt Q, numinputfields, numoutputfields, numelements, size;
450   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
451   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
452   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
453   CeedQFunction qf;
454   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
455   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
456   CeedChk(ierr);
457   CeedTransposeMode lmode;
458   CeedOperatorField *opinputfields, *opoutputfields;
459   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
460   CeedChk(ierr);
461   CeedQFunctionField *qfinputfields, *qfoutputfields;
462   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
463   CeedChk(ierr);
464   CeedEvalMode emode;
465   CeedVector vec;
466 
467   // Setup
468   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
469 
470   // Input Evecs and Restriction
471   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
472                                          opinputfields, invec, false, impl,
473                                          request); CeedChk(ierr);
474 
475   // Output Evecs
476   for (CeedInt i=0; i<numoutputfields; i++) {
477     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
478                               &impl->edata[i + numinputfields]); CeedChk(ierr);
479   }
480 
481   // Loop through elements
482   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
483     // Output pointers
484     for (CeedInt i=0; i<numoutputfields; i++) {
485       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
486       CeedChk(ierr);
487       if (emode == CEED_EVAL_NONE) {
488         ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
489         CeedChk(ierr);
490         ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
491                                   CEED_USE_POINTER,
492                                   &impl->edata[i + numinputfields][e*Q*size]);
493         CeedChk(ierr);
494       }
495     }
496 
497     // Input basis apply
498     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
499                                           numinputfields, blksize, false, impl);
500     CeedChk(ierr);
501 
502     // Q function
503     if (!impl->identityqf) {
504       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
505       CeedChk(ierr);
506     }
507 
508     // Output basis apply
509     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields,
510                                            blksize, numinputfields,
511                                            numoutputfields, op, impl);
512     CeedChk(ierr);
513   }
514 
515   // Output restriction
516   for (CeedInt i=0; i<numoutputfields; i++) {
517     // Restore evec
518     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
519                                   &impl->edata[i + numinputfields]); CeedChk(ierr);
520     // Get output vector
521     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
522     // Active
523     if (vec == CEED_VECTOR_ACTIVE)
524       vec = outvec;
525     // Restrict
526     ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); CeedChk(ierr);
527     ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE,
528                                     lmode, impl->evecs[i+impl->numein], vec,
529                                     request); CeedChk(ierr);
530 
531   }
532 
533   // Restore input arrays
534   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
535          opinputfields, false, impl); CeedChk(ierr);
536 
537   return 0;
538 }
539 
540 // Assemble Linear QFunction
541 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op,
542     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
543   int ierr;
544   CeedOperator_Blocked *impl;
545   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
546   const CeedInt blksize = 8;
547   CeedInt Q, numinputfields, numoutputfields, numelements, size;
548   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
549   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
550   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
551   CeedQFunction qf;
552   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
553   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
554   CeedChk(ierr);
555   CeedOperatorField *opinputfields, *opoutputfields;
556   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
557   CeedChk(ierr);
558   CeedQFunctionField *qfinputfields, *qfoutputfields;
559   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
560   CeedChk(ierr);
561   CeedVector vec, lvec;
562   CeedInt numactivein = 0, numactiveout = 0;
563   CeedVector *activein = NULL;
564   CeedScalar *a, *tmp;
565   Ceed ceed;
566   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
567 
568   // Setup
569   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
570 
571   // Check for identity
572   if (impl->identityqf)
573     // LCOV_EXCL_START
574     return CeedError(ceed, 1, "Assembling identity qfunctions not supported");
575   // LCOV_EXCL_STOP
576 
577   // Input Evecs and Restriction
578   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
579                                          opinputfields, NULL, true, impl,
580                                          request); CeedChk(ierr);
581 
582   // Count number of active input fields
583   for (CeedInt i=0; i<numinputfields; i++) {
584     // Get input vector
585     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
586     // Check if active input
587     if (vec == CEED_VECTOR_ACTIVE) {
588       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
589       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
590       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
591       CeedChk(ierr);
592       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
593       for (CeedInt field=0; field<size; field++) {
594         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
595         CeedChk(ierr);
596         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
597                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
598         CeedChk(ierr);
599       }
600       numactivein += size;
601       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
602     }
603   }
604 
605   // Count number of active output fields
606   for (CeedInt i=0; i<numoutputfields; i++) {
607     // Get output vector
608     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
609     // Check if active output
610     if (vec == CEED_VECTOR_ACTIVE) {
611       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
612       numactiveout += size;
613     }
614   }
615 
616   // Check sizes
617   if (!numactivein || !numactiveout)
618     // LCOV_EXCL_START
619     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
620                      "and outputs");
621   // LCOV_EXCL_STOP
622 
623   // Setup lvec
624   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
625                           &lvec); CeedChk(ierr);
626   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
627 
628   // Create output restriction
629   ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q,
630          numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr);
631   // Create assembled vector
632   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
633                           assembled); CeedChk(ierr);
634 
635   // Loop through elements
636   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
637     // Input basis apply
638     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
639                                           numinputfields, blksize, true, impl);
640     CeedChk(ierr);
641 
642     // Assemble QFunction
643     for (CeedInt in=0; in<numactivein; in++) {
644       // Set Inputs
645       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
646       if (numactivein > 1) {
647         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
648                                   0.0); CeedChk(ierr);
649       }
650       // Set Outputs
651       for (CeedInt out=0; out<numoutputfields; out++) {
652         // Get output vector
653         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
654         CeedChk(ierr);
655         // Check if active output
656         if (vec == CEED_VECTOR_ACTIVE) {
657           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
658                              CEED_USE_POINTER, a); CeedChk(ierr);
659           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
660           CeedChk(ierr);
661           a += size*Q*blksize; // Advance the pointer by the size of the output
662         }
663       }
664       // Apply QFunction
665       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
666       CeedChk(ierr);
667     }
668   }
669 
670   // Un-set output Qvecs to prevent accidental overwrite of Assembled
671   for (CeedInt out=0; out<numoutputfields; out++) {
672     // Get output vector
673     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
674     CeedChk(ierr);
675     // Check if active output
676     if (vec == CEED_VECTOR_ACTIVE) {
677       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
678                          NULL); CeedChk(ierr);
679     }
680   }
681 
682   // Restore input arrays
683   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
684          opinputfields, true, impl); CeedChk(ierr);
685 
686   // Output blocked restriction
687   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
688   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
689   CeedElemRestriction blkrstr;
690   ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize,
691                                           numelements*Q,
692                                           numactivein*numactiveout,
693                                           CEED_MEM_HOST, CEED_COPY_VALUES,
694                                           NULL, &blkrstr); CeedChk(ierr);
695   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE,
696                                   lvec, *assembled, request); CeedChk(ierr);
697 
698   // Cleanup
699   for (CeedInt i=0; i<numactivein; i++) {
700     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
701   }
702   ierr = CeedFree(&activein); CeedChk(ierr);
703   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
704   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
705 
706   return 0;
707 }
708 
709 int CeedOperatorCreate_Blocked(CeedOperator op) {
710   int ierr;
711   Ceed ceed;
712   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
713   CeedOperator_Blocked *impl;
714 
715   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
716   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
717 
718   ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
719                                 CeedOperatorAssembleLinearQFunction_Blocked);
720   CeedChk(ierr);
721   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
722                                 CeedOperatorApply_Blocked); CeedChk(ierr);
723   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
724                                 CeedOperatorDestroy_Blocked); CeedChk(ierr);
725   return 0;
726 }
727