xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision 39b2de37682296be8460181179eb4e44de5cc3de)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-blocked.h"
18 #include "../ref/ceed-ref.h"
19 
20 //------------------------------------------------------------------------------
21 // Setup Input/Output Fields
22 //------------------------------------------------------------------------------
23 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
24     CeedOperator op, bool inOrOut,
25     CeedElemRestriction *blkrestr,
26     CeedVector *fullevecs, CeedVector *evecs,
27     CeedVector *qvecs, CeedInt starte,
28     CeedInt numfields, CeedInt Q) {
29   CeedInt dim, ierr, ncomp, size, P;
30   Ceed ceed;
31   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
32   CeedBasis basis;
33   CeedElemRestriction r;
34   CeedOperatorField *opfields;
35   CeedQFunctionField *qffields;
36   if (inOrOut) {
37     ierr = CeedOperatorGetFields(op, NULL, &opfields);
38     CeedChk(ierr);
39     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
40     CeedChk(ierr);
41   } else {
42     ierr = CeedOperatorGetFields(op, &opfields, NULL);
43     CeedChk(ierr);
44     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
45     CeedChk(ierr);
46   }
47   const CeedInt blksize = 8;
48 
49   // Loop over fields
50   for (CeedInt i=0; i<numfields; i++) {
51     CeedEvalMode emode;
52     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
53 
54     if (emode != CEED_EVAL_WEIGHT) {
55       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
56       CeedChk(ierr);
57       CeedElemRestriction_Ref *data;
58       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
59       Ceed ceed;
60       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
61       CeedInt nelem, elemsize, nnodes;
62       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
63       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
64       ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
65       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
66       ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
67                                               blksize, nnodes, ncomp,
68                                               CEED_MEM_HOST, CEED_COPY_VALUES,
69                                               data->indices, &blkrestr[i+starte]);
70       CeedChk(ierr);
71       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
72                                              &fullevecs[i+starte]);
73       CeedChk(ierr);
74     }
75 
76     switch(emode) {
77     case CEED_EVAL_NONE:
78       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
79       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
80       break;
81     case CEED_EVAL_INTERP:
82       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
83       ierr = CeedElemRestrictionGetElementSize(r, &P);
84       CeedChk(ierr);
85       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
86       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
87       break;
88     case CEED_EVAL_GRAD:
89       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
90       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
91       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
92       ierr = CeedElemRestrictionGetElementSize(r, &P);
93       CeedChk(ierr);
94       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
95       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
96       break;
97     case CEED_EVAL_WEIGHT: // Only on input fields
98       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
99       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
100       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
101                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]);
102       CeedChk(ierr);
103 
104       break;
105     case CEED_EVAL_DIV:
106       break; // Not implemented
107     case CEED_EVAL_CURL:
108       break; // Not implemented
109     }
110   }
111   return 0;
112 }
113 
114 //------------------------------------------------------------------------------
115 // Setup Operator
116 //------------------------------------------------------------------------------
117 static int CeedOperatorSetup_Blocked(CeedOperator op) {
118   int ierr;
119   bool setupdone;
120   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
121   if (setupdone) return 0;
122   Ceed ceed;
123   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
124   CeedOperator_Blocked *impl;
125   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
126   CeedQFunction qf;
127   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
128   CeedInt Q, numinputfields, numoutputfields;
129   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
130   ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr);
131   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
132   CeedChk(ierr);
133   CeedOperatorField *opinputfields, *opoutputfields;
134   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
135   CeedChk(ierr);
136   CeedQFunctionField *qfinputfields, *qfoutputfields;
137   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
138   CeedChk(ierr);
139 
140   // Allocate
141   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
142   CeedChk(ierr);
143   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
144   CeedChk(ierr);
145   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
146   CeedChk(ierr);
147 
148   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
149   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
150   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
151   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
152   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
153 
154   impl->numein = numinputfields; impl->numeout = numoutputfields;
155 
156   // Set up infield and outfield pointer arrays
157   // Infields
158   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr,
159                                          impl->evecs, impl->evecsin,
160                                          impl->qvecsin, 0,
161                                          numinputfields, Q);
162   CeedChk(ierr);
163   // Outfields
164   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr,
165                                          impl->evecs, impl->evecsout,
166                                          impl->qvecsout, numinputfields,
167                                          numoutputfields, Q);
168   CeedChk(ierr);
169 
170   // Identity QFunctions
171   if (impl->identityqf) {
172     CeedEvalMode inmode, outmode;
173     CeedQFunctionField *infields, *outfields;
174     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
175 
176     for (CeedInt i=0; i<numinputfields; i++) {
177       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
178       CeedChk(ierr);
179       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
180       CeedChk(ierr);
181 
182       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
183       impl->qvecsout[i] = impl->qvecsin[i];
184       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr);
185     }
186   }
187 
188   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
189 
190   return 0;
191 }
192 
193 //------------------------------------------------------------------------------
194 // Setup Operator Inputs
195 //------------------------------------------------------------------------------
196 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields,
197     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
198     CeedVector invec, bool skipactive, CeedOperator_Blocked *impl,
199     CeedRequest *request) {
200   CeedInt ierr;
201   CeedEvalMode emode;
202   CeedVector vec;
203   CeedTransposeMode lmode;
204   uint64_t state;
205 
206   for (CeedInt i=0; i<numinputfields; i++) {
207     // Get input vector
208     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
209     if (vec == CEED_VECTOR_ACTIVE) {
210       if (skipactive)
211         continue;
212       else
213         vec = invec;
214     }
215 
216     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
217     CeedChk(ierr);
218     if (emode == CEED_EVAL_WEIGHT) { // Skip
219     } else {
220       // Restrict
221       ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
222       if (state != impl->inputstate[i] || vec == invec) {
223         ierr = CeedOperatorFieldGetLMode(opinputfields[i], &lmode);
224         CeedChk(ierr);
225         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
226                                         lmode, vec, impl->evecs[i],
227                                         request); CeedChk(ierr); CeedChk(ierr);
228         impl->inputstate[i] = state;
229       }
230       // Get evec
231       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
232                                     (const CeedScalar **) &impl->edata[i]);
233       CeedChk(ierr);
234     }
235   }
236   return 0;
237 }
238 
239 //------------------------------------------------------------------------------
240 // Input Basis Action
241 //------------------------------------------------------------------------------
242 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
243     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
244     CeedInt numinputfields, CeedInt blksize, bool skipactive,
245     CeedOperator_Blocked *impl) {
246   CeedInt ierr;
247   CeedInt dim, elemsize, size;
248   CeedElemRestriction Erestrict;
249   CeedEvalMode emode;
250   CeedBasis basis;
251 
252   for (CeedInt i=0; i<numinputfields; i++) {
253     // Skip active input
254     if (skipactive) {
255       CeedVector vec;
256       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
257       if (vec == CEED_VECTOR_ACTIVE)
258         continue;
259     }
260 
261     // Get elemsize, emode, size
262     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
263     CeedChk(ierr);
264     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
265     CeedChk(ierr);
266     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
267     CeedChk(ierr);
268     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
269     // Basis action
270     switch(emode) {
271     case CEED_EVAL_NONE:
272       ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
273                                 CEED_USE_POINTER,
274                                 &impl->edata[i][e*Q*size]); CeedChk(ierr);
275       break;
276     case CEED_EVAL_INTERP:
277       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
278       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
279                                 CEED_USE_POINTER,
280                                 &impl->edata[i][e*elemsize*size]);
281       CeedChk(ierr);
282       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
283                             CEED_EVAL_INTERP, impl->evecsin[i],
284                             impl->qvecsin[i]); CeedChk(ierr);
285       break;
286     case CEED_EVAL_GRAD:
287       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
288       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
289       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
290                                 CEED_USE_POINTER,
291                                 &impl->edata[i][e*elemsize*size/dim]);
292       CeedChk(ierr);
293       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
294                             CEED_EVAL_GRAD, impl->evecsin[i],
295                             impl->qvecsin[i]); CeedChk(ierr);
296       break;
297     case CEED_EVAL_WEIGHT:
298       break;  // No action
299     case CEED_EVAL_DIV:
300     case CEED_EVAL_CURL: {
301       // LCOV_EXCL_START
302       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
303       CeedChk(ierr);
304       Ceed ceed;
305       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
306       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
307       // LCOV_EXCL_STOP
308       break; // Not implemented
309     }
310     }
311   }
312   return 0;
313 }
314 
315 //------------------------------------------------------------------------------
316 // Output Basis Action
317 //------------------------------------------------------------------------------
318 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
319     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
320     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
321     CeedOperator op, CeedOperator_Blocked *impl) {
322   CeedInt ierr;
323   CeedInt dim, elemsize, size;
324   CeedElemRestriction Erestrict;
325   CeedEvalMode emode;
326   CeedBasis basis;
327 
328   for (CeedInt i=0; i<numoutputfields; i++) {
329     // Get elemsize, emode, size
330     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
331     CeedChk(ierr);
332     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
333     CeedChk(ierr);
334     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
335     CeedChk(ierr);
336     ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
337     // Basis action
338     switch(emode) {
339     case CEED_EVAL_NONE:
340       break; // No action
341     case CEED_EVAL_INTERP:
342       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
343       CeedChk(ierr);
344       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
345                                 CEED_USE_POINTER,
346                                 &impl->edata[i + numinputfields][e*elemsize*size]);
347       CeedChk(ierr);
348       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
349                             CEED_EVAL_INTERP, impl->qvecsout[i],
350                             impl->evecsout[i]); CeedChk(ierr);
351       break;
352     case CEED_EVAL_GRAD:
353       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
354       CeedChk(ierr);
355       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
356       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
357                                 CEED_USE_POINTER,
358                                 &impl->edata[i + numinputfields][e*elemsize*size/dim]);
359       CeedChk(ierr);
360       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
361                             CEED_EVAL_GRAD, impl->qvecsout[i],
362                             impl->evecsout[i]); CeedChk(ierr);
363       break;
364     case CEED_EVAL_WEIGHT: {
365       // LCOV_EXCL_START
366       Ceed ceed;
367       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
368       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
369                        "evaluation mode");
370       // LCOV_EXCL_STOP
371       break; // Should not occur
372     }
373     case CEED_EVAL_DIV:
374     case CEED_EVAL_CURL: {
375       // LCOV_EXCL_START
376       Ceed ceed;
377       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
378       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
379       // LCOV_EXCL_STOP
380       break; // Not implemented
381     }
382     }
383   }
384   return 0;
385 }
386 
387 //------------------------------------------------------------------------------
388 // Restore Input Vectors
389 //------------------------------------------------------------------------------
390 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields,
391     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
392     bool skipactive, CeedOperator_Blocked *impl) {
393   CeedInt ierr;
394   CeedEvalMode emode;
395 
396   for (CeedInt i=0; i<numinputfields; i++) {
397     // Skip active inputs
398     if (skipactive) {
399       CeedVector vec;
400       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
401       if (vec == CEED_VECTOR_ACTIVE)
402         continue;
403     }
404     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
405     CeedChk(ierr);
406     if (emode == CEED_EVAL_WEIGHT) { // Skip
407     } else {
408       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
409                                         (const CeedScalar **) &impl->edata[i]);
410       CeedChk(ierr);
411     }
412   }
413   return 0;
414 }
415 
416 //------------------------------------------------------------------------------
417 // Operator Apply
418 //------------------------------------------------------------------------------
419 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec,
420                                      CeedVector outvec,
421                                      CeedRequest *request) {
422   int ierr;
423   CeedOperator_Blocked *impl;
424   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
425   const CeedInt blksize = 8;
426   CeedInt Q, numinputfields, numoutputfields, numelements, size;
427   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
428   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
429   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
430   CeedQFunction qf;
431   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
432   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
433   CeedChk(ierr);
434   CeedTransposeMode lmode;
435   CeedOperatorField *opinputfields, *opoutputfields;
436   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
437   CeedChk(ierr);
438   CeedQFunctionField *qfinputfields, *qfoutputfields;
439   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
440   CeedChk(ierr);
441   CeedEvalMode emode;
442   CeedVector vec;
443 
444   // Setup
445   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
446 
447   // Input Evecs and Restriction
448   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
449                                          opinputfields, invec, false, impl,
450                                          request); CeedChk(ierr);
451 
452   // Output Evecs
453   for (CeedInt i=0; i<numoutputfields; i++) {
454     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
455                               &impl->edata[i + numinputfields]); CeedChk(ierr);
456   }
457 
458   // Loop through elements
459   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
460     // Output pointers
461     for (CeedInt i=0; i<numoutputfields; i++) {
462       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
463       CeedChk(ierr);
464       if (emode == CEED_EVAL_NONE) {
465         ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
466         CeedChk(ierr);
467         ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
468                                   CEED_USE_POINTER,
469                                   &impl->edata[i + numinputfields][e*Q*size]);
470         CeedChk(ierr);
471       }
472     }
473 
474     // Input basis apply
475     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
476                                           numinputfields, blksize, false, impl);
477     CeedChk(ierr);
478 
479     // Q function
480     if (!impl->identityqf) {
481       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
482       CeedChk(ierr);
483     }
484 
485     // Output basis apply
486     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields,
487                                            blksize, numinputfields,
488                                            numoutputfields, op, impl);
489     CeedChk(ierr);
490   }
491 
492   // Output restriction
493   for (CeedInt i=0; i<numoutputfields; i++) {
494     // Restore evec
495     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
496                                   &impl->edata[i + numinputfields]); CeedChk(ierr);
497     // Get output vector
498     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
499     // Active
500     if (vec == CEED_VECTOR_ACTIVE)
501       vec = outvec;
502     // Restrict
503     ierr = CeedOperatorFieldGetLMode(opoutputfields[i], &lmode); CeedChk(ierr);
504     ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE,
505                                     lmode, impl->evecs[i+impl->numein], vec,
506                                     request); CeedChk(ierr);
507 
508   }
509 
510   // Restore input arrays
511   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
512          opinputfields, false, impl); CeedChk(ierr);
513 
514   return 0;
515 }
516 
517 //------------------------------------------------------------------------------
518 // Assemble Linear QFunction
519 //------------------------------------------------------------------------------
520 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op,
521     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
522   int ierr;
523   CeedOperator_Blocked *impl;
524   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
525   const CeedInt blksize = 8;
526   CeedInt Q, numinputfields, numoutputfields, numelements, size;
527   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
528   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
529   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
530   CeedQFunction qf;
531   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
532   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
533   CeedChk(ierr);
534   CeedOperatorField *opinputfields, *opoutputfields;
535   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
536   CeedChk(ierr);
537   CeedQFunctionField *qfinputfields, *qfoutputfields;
538   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
539   CeedChk(ierr);
540   CeedVector vec, lvec;
541   CeedInt numactivein = 0, numactiveout = 0;
542   CeedVector *activein = NULL;
543   CeedScalar *a, *tmp;
544   Ceed ceed;
545   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
546 
547   // Setup
548   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
549 
550   // Check for identity
551   if (impl->identityqf)
552     // LCOV_EXCL_START
553     return CeedError(ceed, 1, "Assembling identity qfunctions not supported");
554   // LCOV_EXCL_STOP
555 
556   // Input Evecs and Restriction
557   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
558                                          opinputfields, NULL, true, impl,
559                                          request); CeedChk(ierr);
560 
561   // Count number of active input fields
562   for (CeedInt i=0; i<numinputfields; i++) {
563     // Get input vector
564     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
565     // Check if active input
566     if (vec == CEED_VECTOR_ACTIVE) {
567       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
568       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
569       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
570       CeedChk(ierr);
571       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
572       for (CeedInt field=0; field<size; field++) {
573         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
574         CeedChk(ierr);
575         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
576                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
577         CeedChk(ierr);
578       }
579       numactivein += size;
580       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
581     }
582   }
583 
584   // Count number of active output fields
585   for (CeedInt i=0; i<numoutputfields; i++) {
586     // Get output vector
587     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
588     // Check if active output
589     if (vec == CEED_VECTOR_ACTIVE) {
590       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
591       numactiveout += size;
592     }
593   }
594 
595   // Check sizes
596   if (!numactivein || !numactiveout)
597     // LCOV_EXCL_START
598     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
599                      "and outputs");
600   // LCOV_EXCL_STOP
601 
602   // Setup lvec
603   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
604                           &lvec); CeedChk(ierr);
605   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
606 
607   // Create output restriction
608   ierr = CeedElemRestrictionCreateIdentity(ceed, numelements, Q,
609          numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr);
610   // Create assembled vector
611   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
612                           assembled); CeedChk(ierr);
613 
614   // Loop through elements
615   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
616     // Input basis apply
617     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
618                                           numinputfields, blksize, true, impl);
619     CeedChk(ierr);
620 
621     // Assemble QFunction
622     for (CeedInt in=0; in<numactivein; in++) {
623       // Set Inputs
624       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
625       if (numactivein > 1) {
626         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
627                                   0.0); CeedChk(ierr);
628       }
629       // Set Outputs
630       for (CeedInt out=0; out<numoutputfields; out++) {
631         // Get output vector
632         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
633         CeedChk(ierr);
634         // Check if active output
635         if (vec == CEED_VECTOR_ACTIVE) {
636           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
637                              CEED_USE_POINTER, a); CeedChk(ierr);
638           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
639           CeedChk(ierr);
640           a += size*Q*blksize; // Advance the pointer by the size of the output
641         }
642       }
643       // Apply QFunction
644       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
645       CeedChk(ierr);
646     }
647   }
648 
649   // Un-set output Qvecs to prevent accidental overwrite of Assembled
650   for (CeedInt out=0; out<numoutputfields; out++) {
651     // Get output vector
652     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
653     CeedChk(ierr);
654     // Check if active output
655     if (vec == CEED_VECTOR_ACTIVE) {
656       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
657                          NULL); CeedChk(ierr);
658     }
659   }
660 
661   // Restore input arrays
662   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
663          opinputfields, true, impl); CeedChk(ierr);
664 
665   // Output blocked restriction
666   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
667   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
668   CeedElemRestriction blkrstr;
669   ierr = CeedElemRestrictionCreateBlocked(ceed, numelements, Q, blksize,
670                                           numelements*Q,
671                                           numactivein*numactiveout,
672                                           CEED_MEM_HOST, CEED_COPY_VALUES,
673                                           NULL, &blkrstr); CeedChk(ierr);
674   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, CEED_NOTRANSPOSE,
675                                   lvec, *assembled, request); CeedChk(ierr);
676 
677   // Cleanup
678   for (CeedInt i=0; i<numactivein; i++) {
679     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
680   }
681   ierr = CeedFree(&activein); CeedChk(ierr);
682   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
683   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
684 
685   return 0;
686 }
687 
688 //------------------------------------------------------------------------------
689 // Operator Destroy
690 //------------------------------------------------------------------------------
691 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
692   int ierr;
693   CeedOperator_Blocked *impl;
694   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
695 
696   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
697     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
698     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
699   }
700   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
701   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
702   ierr = CeedFree(&impl->edata); CeedChk(ierr);
703   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
704 
705   for (CeedInt i=0; i<impl->numein; i++) {
706     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
707     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
708   }
709   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
710   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
711 
712   for (CeedInt i=0; i<impl->numeout; i++) {
713     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
714     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
715   }
716   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
717   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
718 
719   ierr = CeedFree(&impl); CeedChk(ierr);
720   return 0;
721 }
722 
723 //------------------------------------------------------------------------------
724 // Operator Create
725 //------------------------------------------------------------------------------
726 int CeedOperatorCreate_Blocked(CeedOperator op) {
727   int ierr;
728   Ceed ceed;
729   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
730   CeedOperator_Blocked *impl;
731 
732   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
733   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
734 
735   ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
736                                 CeedOperatorAssembleLinearQFunction_Blocked);
737   CeedChk(ierr);
738   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
739                                 CeedOperatorApply_Blocked); CeedChk(ierr);
740   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
741                                 CeedOperatorDestroy_Blocked); CeedChk(ierr);
742   return 0;
743 }
744 //------------------------------------------------------------------------------
745