xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision 16911fdad6ca4b1dd37f9d3206958ee664667dbe)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-blocked.h"
18 #include "../ref/ceed-ref.h"
19 
20 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
21   int ierr;
22   CeedOperator_Blocked *impl;
23   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
24 
25   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
26     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
27     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
28   }
29   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
30   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
31   ierr = CeedFree(&impl->edata); CeedChk(ierr);
32   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
33 
34   for (CeedInt i=0; i<impl->numein; i++) {
35     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
36     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
37   }
38   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
39   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
40 
41   for (CeedInt i=0; i<impl->numeout; i++) {
42     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
43     if (!impl->identityqf) {
44       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
45     }
46   }
47   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
48   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
49 
50   ierr = CeedFree(&impl); CeedChk(ierr);
51   return 0;
52 }
53 
54 /*
55   Setup infields or outfields
56  */
57 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
58     CeedOperator op, bool inOrOut,
59     CeedElemRestriction *blkrestr,
60     CeedVector *fullevecs, CeedVector *evecs,
61     CeedVector *qvecs, CeedInt starte,
62     CeedInt numfields, CeedInt Q) {
63   CeedInt dim, ierr, ncomp, size, P;
64   Ceed ceed;
65   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
66   CeedBasis basis;
67   CeedElemRestriction r;
68   CeedOperatorField *opfields;
69   CeedQFunctionField *qffields;
70   if (inOrOut) {
71     ierr = CeedOperatorGetFields(op, NULL, &opfields);
72     CeedChk(ierr);
73     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
74     CeedChk(ierr);
75   } else {
76     ierr = CeedOperatorGetFields(op, &opfields, NULL);
77     CeedChk(ierr);
78     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
79     CeedChk(ierr);
80   }
81   const CeedInt blksize = 8;
82 
83   // Loop over fields
84   for (CeedInt i=0; i<numfields; i++) {
85     CeedEvalMode emode;
86     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
87 
88     if (emode != CEED_EVAL_WEIGHT) {
89       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
90       CeedChk(ierr);
91       CeedElemRestriction_Ref *data;
92       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
93       Ceed ceed;
94       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
95       CeedInt nelem, elemsize, nnodes;
96       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
97       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
98       ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
99       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
100       ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
101                                               blksize, nnodes, ncomp,
102                                               CEED_MEM_HOST, CEED_COPY_VALUES,
103                                               data->indices, &blkrestr[i+starte]);
104       CeedChk(ierr);
105       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
106                                              &fullevecs[i+starte]);
107       CeedChk(ierr);
108     }
109 
110     switch(emode) {
111     case CEED_EVAL_NONE:
112       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
113       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
114       break;
115     case CEED_EVAL_INTERP:
116       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
117       ierr = CeedElemRestrictionGetElementSize(r, &P);
118       CeedChk(ierr);
119       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
120       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
121       break;
122     case CEED_EVAL_GRAD:
123       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
124       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
125       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
126       ierr = CeedElemRestrictionGetElementSize(r, &P);
127       CeedChk(ierr);
128       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
129       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
130       break;
131     case CEED_EVAL_WEIGHT: // Only on input fields
132       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
133       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
134       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
135                             CEED_EVAL_WEIGHT, NULL, qvecs[i]); CeedChk(ierr);
136 
137       break;
138     case CEED_EVAL_DIV:
139       break; // Not implemented
140     case CEED_EVAL_CURL:
141       break; // Not implemented
142     }
143   }
144   return 0;
145 }
146 
147 /*
148   CeedOperator needs to connect all the named fields (be they active or passive)
149   to the named inputs and outputs of its CeedQFunction.
150  */
151 static int CeedOperatorSetup_Blocked(CeedOperator op) {
152   int ierr;
153   bool setupdone;
154   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
155   if (setupdone) return 0;
156   Ceed ceed;
157   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
158   CeedOperator_Blocked *impl;
159   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
160   CeedQFunction qf;
161   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
162   CeedInt Q, numinputfields, numoutputfields;
163   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
164   ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr);
165   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
166   CeedChk(ierr);
167   CeedOperatorField *opinputfields, *opoutputfields;
168   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
169   CeedChk(ierr);
170   CeedQFunctionField *qfinputfields, *qfoutputfields;
171   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
172   CeedChk(ierr);
173 
174   // Allocate
175   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
176   CeedChk(ierr);
177   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
178   CeedChk(ierr);
179   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
180   CeedChk(ierr);
181 
182   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
183   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
184   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
185   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
186   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
187 
188   impl->numein = numinputfields; impl->numeout = numoutputfields;
189 
190   // Set up infield and outfield pointer arrays
191   // Infields
192   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr,
193                                          impl->evecs, impl->evecsin,
194                                          impl->qvecsin, 0,
195                                          numinputfields, Q);
196   CeedChk(ierr);
197   // Outfields
198   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr,
199                                          impl->evecs, impl->evecsout,
200                                          impl->qvecsout, numinputfields,
201                                          numoutputfields, Q);
202   CeedChk(ierr);
203 
204   // Identity QFunctions
205   if (impl->identityqf) {
206     CeedEvalMode inmode, outmode;
207     CeedQFunctionField *infields, *outfields;
208     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
209 
210     for (CeedInt i=0; i<numinputfields; i++) {
211       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
212       CeedChk(ierr);
213       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
214       CeedChk(ierr);
215 
216       if (inmode == CEED_EVAL_NONE && outmode == CEED_EVAL_NONE)
217        // LCOV_EXCL_START
218         return CeedError(ceed, 1, "CEED_EVAL_NONE for a matching input and "
219                          "output does not make sense with identity QFunction");
220       // LCOV_EXCL_STOP
221 
222       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
223       impl->qvecsout[i] = impl->qvecsin[i];
224     }
225   }
226 
227   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
228 
229   return 0;
230 }
231 
232 // Setup Input fields
233 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields,
234     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
235     CeedVector invec, bool skipactive, CeedOperator_Blocked *impl,
236     CeedRequest *request) {
237   CeedInt ierr;
238   CeedEvalMode emode;
239   CeedVector vec;
240   CeedTransposeMode lmode;
241   uint64_t state;
242 
243   for (CeedInt i=0; i<numinputfields; i++) {
244     // Get input vector
245     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
246     if (vec == CEED_VECTOR_ACTIVE) {
247       if (skipactive)
248         continue;
249       else
250         vec = invec;
251     }
252 
253     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
254     CeedChk(ierr);
255     if (emode == CEED_EVAL_WEIGHT) { // Skip
256     } else {
257       // Restrict
258       ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
259       if (state != impl->inputstate[i] || vec == invec) {
260         ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
261         CeedChk(ierr);
262         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
263                                         lmode, vec, impl->evecs[i],
264                                         request); CeedChk(ierr); CeedChk(ierr);
265         impl->inputstate[i] = state;
266       }
267       // Get evec
268       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
269                                     (const CeedScalar **) &impl->edata[i]);
270       CeedChk(ierr);
271     }
272   }
273   return 0;
274 }
275 
276 // Input basis action
277 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
278     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
279     CeedInt numinputfields, CeedInt blksize, bool skipactive,
280     CeedOperator_Blocked *impl) {
281   CeedInt ierr;
282   CeedInt dim, elemsize, size;
283   CeedElemRestriction Erestrict;
284   CeedEvalMode emode;
285   CeedBasis basis;
286 
287   for (CeedInt i=0; i<numinputfields; i++) {
288     // Skip active input
289     if (skipactive) {
290       CeedVector vec;
291       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
292       if (vec == CEED_VECTOR_ACTIVE)
293         continue;
294     }
295 
296     // Get elemsize, emode, size
297     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
298     CeedChk(ierr);
299     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
300     CeedChk(ierr);
301     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
302     CeedChk(ierr);
303     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
304     // Basis action
305     switch(emode) {
306     case CEED_EVAL_NONE:
307       ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
308                                 CEED_USE_POINTER,
309                                 &impl->edata[i][e*Q*size]); CeedChk(ierr);
310       break;
311     case CEED_EVAL_INTERP:
312       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
313       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
314                                 CEED_USE_POINTER,
315                                 &impl->edata[i][e*elemsize*size]);
316       CeedChk(ierr);
317       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
318                             CEED_EVAL_INTERP, impl->evecsin[i],
319                             impl->qvecsin[i]); CeedChk(ierr);
320       break;
321     case CEED_EVAL_GRAD:
322       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
323       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
324       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
325                                 CEED_USE_POINTER,
326                                 &impl->edata[i][e*elemsize*size/dim]);
327       CeedChk(ierr);
328       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
329                             CEED_EVAL_GRAD, impl->evecsin[i],
330                             impl->qvecsin[i]); CeedChk(ierr);
331       break;
332     case CEED_EVAL_WEIGHT:
333       break;  // No action
334     case CEED_EVAL_DIV:
335     case CEED_EVAL_CURL: {
336       // LCOV_EXCL_START
337       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
338       CeedChk(ierr);
339       Ceed ceed;
340       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
341       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
342       // LCOV_EXCL_STOP
343       break; // Not implemented
344     }
345     }
346   }
347   return 0;
348 }
349 
350 // Output basis action
351 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
352     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
353     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
354     CeedOperator op, CeedOperator_Blocked *impl) {
355   CeedInt ierr;
356   CeedInt dim, elemsize, size;
357   CeedElemRestriction Erestrict;
358   CeedEvalMode emode;
359   CeedBasis basis;
360 
361   for (CeedInt i=0; i<numoutputfields; i++) {
362     // Get elemsize, emode, size
363     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
364     CeedChk(ierr);
365     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
366     CeedChk(ierr);
367     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
368     CeedChk(ierr);
369     ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
370     // Basis action
371     switch(emode) {
372     case CEED_EVAL_NONE:
373       break; // No action
374     case CEED_EVAL_INTERP:
375       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
376       CeedChk(ierr);
377       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
378                                 CEED_USE_POINTER,
379                                 &impl->edata[i + numinputfields][e*elemsize*size]);
380       CeedChk(ierr);
381       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
382                             CEED_EVAL_INTERP, impl->qvecsout[i],
383                             impl->evecsout[i]); CeedChk(ierr);
384       break;
385     case CEED_EVAL_GRAD:
386       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
387       CeedChk(ierr);
388       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
389       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
390                                 CEED_USE_POINTER,
391                                 &impl->edata[i + numinputfields][e*elemsize*size/dim]);
392       CeedChk(ierr);
393       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
394                             CEED_EVAL_GRAD, impl->qvecsout[i],
395                             impl->evecsout[i]); CeedChk(ierr);
396       break;
397     case CEED_EVAL_WEIGHT: {
398       // LCOV_EXCL_START
399       Ceed ceed;
400       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
401       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
402                        "evaluation mode");
403       // LCOV_EXCL_STOP
404       break; // Should not occur
405     }
406     case CEED_EVAL_DIV:
407     case CEED_EVAL_CURL: {
408       // LCOV_EXCL_START
409       Ceed ceed;
410       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
411       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
412       // LCOV_EXCL_STOP
413       break; // Not implemented
414     }
415     }
416   }
417   return 0;
418 }
419 
420 // Restore Inputs
421 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields,
422     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
423     bool skipactive, CeedOperator_Blocked *impl) {
424   CeedInt ierr;
425   CeedEvalMode emode;
426 
427   for (CeedInt i=0; i<numinputfields; i++) {
428     // Skip active inputs
429     if (skipactive) {
430       CeedVector vec;
431       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
432       if (vec == CEED_VECTOR_ACTIVE)
433         continue;
434     }
435     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
436     CeedChk(ierr);
437     if (emode == CEED_EVAL_WEIGHT) { // Skip
438     } else {
439       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
440                                         (const CeedScalar **) &impl->edata[i]);
441       CeedChk(ierr);
442     }
443   }
444   return 0;
445 }
446 
447 // Apply Ceed Operator
448 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec,
449                                      CeedVector outvec,
450                                      CeedRequest *request) {
451   int ierr;
452   CeedOperator_Blocked *impl;
453   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
454   const CeedInt blksize = 8;
455   CeedInt Q, numinputfields, numoutputfields, numelements, size;
456   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
457   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
458   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
459   CeedQFunction qf;
460   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
461   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
462   CeedChk(ierr);
463   CeedTransposeMode lmode;
464   CeedOperatorField *opinputfields, *opoutputfields;
465   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
466   CeedChk(ierr);
467   CeedQFunctionField *qfinputfields, *qfoutputfields;
468   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
469   CeedChk(ierr);
470   CeedEvalMode emode;
471   CeedVector vec;
472 
473   // Setup
474   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
475 
476   // Input Evecs and Restriction
477   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
478                                          opinputfields, invec, false, impl,
479                                          request); CeedChk(ierr);
480 
481   // Output Evecs
482   for (CeedInt i=0; i<numoutputfields; i++) {
483     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
484                               &impl->edata[i + numinputfields]); CeedChk(ierr);
485   }
486 
487   // Loop through elements
488   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
489     // Output pointers
490     for (CeedInt i=0; i<numoutputfields; i++) {
491       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
492       CeedChk(ierr);
493       if (emode == CEED_EVAL_NONE) {
494         ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
495         CeedChk(ierr);
496         ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
497                                   CEED_USE_POINTER,
498                                   &impl->edata[i + numinputfields][e*Q*size]);
499         CeedChk(ierr);
500       }
501     }
502 
503     // Input basis apply
504     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
505                                           numinputfields, blksize, false, impl);
506     CeedChk(ierr);
507 
508     // Q function
509     if (!impl->identityqf) {
510       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
511       CeedChk(ierr);
512     }
513 
514     // Output basis apply
515     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields,
516                                            blksize, numinputfields,
517                                            numoutputfields, op, impl);
518     CeedChk(ierr);
519   }
520 
521   // Zero lvecs
522   for (CeedInt i=0; i<numoutputfields; i++) {
523     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
524     if (vec == CEED_VECTOR_ACTIVE) {
525       if (!impl->add) {
526         vec = outvec;
527         ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
528       }
529     } else {
530       ierr = CeedVectorSetValue(vec, 0.0); CeedChk(ierr);
531     }
532   }
533   impl->add = false;
534 
535   // Output restriction
536   for (CeedInt i=0; i<numoutputfields; i++) {
537     // Restore evec
538     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
539                                   &impl->edata[i + numinputfields]); CeedChk(ierr);
540     // Get output vector
541     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
542     // Active
543     if (vec == CEED_VECTOR_ACTIVE)
544       vec = outvec;
545     // Restrict
546     ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); CeedChk(ierr);
547     ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE,
548                                     lmode, impl->evecs[i+impl->numein], vec,
549                                     request); CeedChk(ierr);
550 
551   }
552 
553   // Restore input arrays
554   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
555          opinputfields, false, impl); CeedChk(ierr);
556 
557   return 0;
558 }
559 
560 // Assemble Linear QFunction
561 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op,
562     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
563   int ierr;
564   CeedOperator_Blocked *impl;
565   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
566   const CeedInt blksize = 8;
567   CeedInt Q, numinputfields, numoutputfields, numelements, size;
568   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
569   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
570   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
571   CeedQFunction qf;
572   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
573   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
574   CeedChk(ierr);
575   CeedOperatorField *opinputfields, *opoutputfields;
576   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
577   CeedChk(ierr);
578   CeedQFunctionField *qfinputfields, *qfoutputfields;
579   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
580   CeedChk(ierr);
581   CeedVector vec, lvec;
582   CeedInt numactivein = 0, numactiveout = 0;
583   CeedVector *activein = NULL;
584   CeedScalar *a, *tmp;
585   Ceed ceed;
586   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
587 
588   // Setup
589   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
590 
591   // Check for identity
592   if (impl->identityqf)
593    // LCOV_EXCL_START
594     return CeedError(ceed, 1, "Assembling identity qfunction does not make sense");
595   // LCOV_EXCL_STOP
596 
597   // Input Evecs and Restriction
598   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
599                                          opinputfields, NULL, true, impl,
600                                          request); CeedChk(ierr);
601 
602   // Count number of active input fields
603   for (CeedInt i=0; i<numinputfields; i++) {
604     // Get input vector
605     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
606     // Check if active input
607     if (vec == CEED_VECTOR_ACTIVE) {
608       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
609       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
610       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
611       CeedChk(ierr);
612       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
613       for (CeedInt field=0; field<size; field++) {
614         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
615         CeedChk(ierr);
616         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
617                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
618         CeedChk(ierr);
619       }
620       numactivein += size;
621       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
622     }
623   }
624 
625   // Count number of active output fields
626   for (CeedInt i=0; i<numoutputfields; i++) {
627     // Get output vector
628     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
629     // Check if active output
630     if (vec == CEED_VECTOR_ACTIVE) {
631       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
632       numactiveout += size;
633     }
634   }
635 
636   // Check sizes
637   if (!numactivein || !numactiveout)
638     // LCOV_EXCL_START
639     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
640                      "and outputs");
641   // LCOV_EXCL_STOP
642 
643   // Setup lvec
644   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
645                           &lvec); CeedChk(ierr);
646   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
647 
648   // Create output restriction
649   ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q,
650          numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr);
651   // Create assembled vector
652   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
653                           assembled); CeedChk(ierr);
654 
655   // Loop through elements
656   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
657     // Input basis apply
658     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
659                                           numinputfields, blksize, true, impl);
660     CeedChk(ierr);
661 
662     // Assemble QFunction
663     for (CeedInt in=0; in<numactivein; in++) {
664       // Set Inputs
665       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
666       if (numactivein > 1) {
667         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
668                                   0.0); CeedChk(ierr);
669       }
670       // Set Outputs
671       for (CeedInt out=0; out<numoutputfields; out++) {
672         // Get output vector
673         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
674         CeedChk(ierr);
675         // Check if active output
676         if (vec == CEED_VECTOR_ACTIVE) {
677           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
678                              CEED_USE_POINTER, a); CeedChk(ierr);
679           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
680           CeedChk(ierr);
681           a += size*Q*blksize; // Advance the pointer by the size of the output
682         }
683       }
684       // Apply QFunction
685       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
686       CeedChk(ierr);
687     }
688   }
689 
690   // Un-set output Qvecs to prevent accidental overwrite of Assembled
691   for (CeedInt out=0; out<numoutputfields; out++) {
692     // Get output vector
693     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
694     CeedChk(ierr);
695     // Check if active output
696     if (vec == CEED_VECTOR_ACTIVE) {
697       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
698                          NULL); CeedChk(ierr);
699     }
700   }
701 
702   // Restore input arrays
703   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
704          opinputfields, true, impl); CeedChk(ierr);
705 
706   // Output blocked restriction
707   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
708   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
709   CeedElemRestriction blkrstr;
710   ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize,
711                                           numelements*Q,
712                                           numactivein*numactiveout,
713                                           CEED_MEM_HOST, CEED_COPY_VALUES,
714                                           NULL, &blkrstr); CeedChk(ierr);
715   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE,
716                                   lvec, *assembled, request); CeedChk(ierr);
717 
718   // Cleanup
719   for (CeedInt i=0; i<numactivein; i++) {
720     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
721   }
722   ierr = CeedFree(&activein); CeedChk(ierr);
723   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
724   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
725 
726   return 0;
727 }
728 
729 int CeedOperatorCreate_Blocked(CeedOperator op) {
730   int ierr;
731   Ceed ceed;
732   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
733   CeedOperator_Blocked *impl;
734 
735   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
736   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
737 
738   ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
739                                 CeedOperatorAssembleLinearQFunction_Blocked);
740   CeedChk(ierr);
741   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Apply",
742                                 CeedOperatorApply_Blocked); CeedChk(ierr);
743   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
744                                 CeedOperatorDestroy_Blocked); CeedChk(ierr);
745   return 0;
746 }
747