xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision a8d322087fa8f150327cdc2bf14a171452b711ec)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-blocked.h"
18 #include "../ref/ceed-ref.h"
19 
20 //------------------------------------------------------------------------------
21 // Setup Input/Output Fields
22 //------------------------------------------------------------------------------
23 static int CeedOperatorSetupFields_Blocked(CeedQFunction qf,
24     CeedOperator op, bool inOrOut,
25     CeedElemRestriction *blkrestr,
26     CeedVector *fullevecs, CeedVector *evecs,
27     CeedVector *qvecs, CeedInt starte,
28     CeedInt numfields, CeedInt Q) {
29   CeedInt dim, ierr, ncomp, size, P;
30   Ceed ceed;
31   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
32   CeedBasis basis;
33   CeedElemRestriction r;
34   CeedOperatorField *opfields;
35   CeedQFunctionField *qffields;
36   if (inOrOut) {
37     ierr = CeedOperatorGetFields(op, NULL, &opfields);
38     CeedChk(ierr);
39     ierr = CeedQFunctionGetFields(qf, NULL, &qffields);
40     CeedChk(ierr);
41   } else {
42     ierr = CeedOperatorGetFields(op, &opfields, NULL);
43     CeedChk(ierr);
44     ierr = CeedQFunctionGetFields(qf, &qffields, NULL);
45     CeedChk(ierr);
46   }
47   const CeedInt blksize = 8;
48 
49   // Loop over fields
50   for (CeedInt i=0; i<numfields; i++) {
51     CeedEvalMode emode;
52     ierr = CeedQFunctionFieldGetEvalMode(qffields[i], &emode); CeedChk(ierr);
53 
54     if (emode != CEED_EVAL_WEIGHT) {
55       ierr = CeedOperatorFieldGetElemRestriction(opfields[i], &r);
56       CeedChk(ierr);
57       CeedElemRestriction_Ref *data;
58       ierr = CeedElemRestrictionGetData(r, (void *)&data); CeedChk(ierr);
59       Ceed ceed;
60       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
61       CeedInt nelem, elemsize, nnodes;
62       CeedTransposeMode lmode;
63       ierr = CeedElemRestrictionGetLMode(r, &lmode); CeedChk(ierr);
64       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
65       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
66       ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
67       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
68       ierr = CeedElemRestrictionCreateBlocked(ceed, lmode, nelem, elemsize,
69                                               blksize, nnodes, ncomp,
70                                               CEED_MEM_HOST, CEED_COPY_VALUES,
71                                               data->indices, &blkrestr[i+starte]);
72       CeedChk(ierr);
73       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starte], NULL,
74                                              &fullevecs[i+starte]);
75       CeedChk(ierr);
76     }
77 
78     switch(emode) {
79     case CEED_EVAL_NONE:
80       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
81       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
82       break;
83     case CEED_EVAL_INTERP:
84       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
85       ierr = CeedElemRestrictionGetElementSize(r, &P);
86       CeedChk(ierr);
87       ierr = CeedVectorCreate(ceed, P*size*blksize, &evecs[i]); CeedChk(ierr);
88       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
89       break;
90     case CEED_EVAL_GRAD:
91       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
92       ierr = CeedQFunctionFieldGetSize(qffields[i], &size); CeedChk(ierr);
93       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
94       ierr = CeedElemRestrictionGetElementSize(r, &P);
95       CeedChk(ierr);
96       ierr = CeedVectorCreate(ceed, P*size/dim*blksize, &evecs[i]); CeedChk(ierr);
97       ierr = CeedVectorCreate(ceed, Q*size*blksize, &qvecs[i]); CeedChk(ierr);
98       break;
99     case CEED_EVAL_WEIGHT: // Only on input fields
100       ierr = CeedOperatorFieldGetBasis(opfields[i], &basis); CeedChk(ierr);
101       ierr = CeedVectorCreate(ceed, Q*blksize, &qvecs[i]); CeedChk(ierr);
102       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
103                             CEED_EVAL_WEIGHT, CEED_VECTOR_NONE, qvecs[i]);
104       CeedChk(ierr);
105 
106       break;
107     case CEED_EVAL_DIV:
108       break; // Not implemented
109     case CEED_EVAL_CURL:
110       break; // Not implemented
111     }
112   }
113   return 0;
114 }
115 
116 //------------------------------------------------------------------------------
117 // Setup Operator
118 //------------------------------------------------------------------------------
119 static int CeedOperatorSetup_Blocked(CeedOperator op) {
120   int ierr;
121   bool setupdone;
122   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
123   if (setupdone) return 0;
124   Ceed ceed;
125   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
126   CeedOperator_Blocked *impl;
127   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
128   CeedQFunction qf;
129   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
130   CeedInt Q, numinputfields, numoutputfields;
131   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
132   ierr = CeedQFunctionGetIdentityStatus(qf, &impl->identityqf); CeedChk(ierr);
133   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
134   CeedChk(ierr);
135   CeedOperatorField *opinputfields, *opoutputfields;
136   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
137   CeedChk(ierr);
138   CeedQFunctionField *qfinputfields, *qfoutputfields;
139   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
140   CeedChk(ierr);
141 
142   // Allocate
143   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->blkrestr);
144   CeedChk(ierr);
145   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->evecs);
146   CeedChk(ierr);
147   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->edata);
148   CeedChk(ierr);
149 
150   ierr = CeedCalloc(16, &impl->inputstate); CeedChk(ierr);
151   ierr = CeedCalloc(16, &impl->evecsin); CeedChk(ierr);
152   ierr = CeedCalloc(16, &impl->evecsout); CeedChk(ierr);
153   ierr = CeedCalloc(16, &impl->qvecsin); CeedChk(ierr);
154   ierr = CeedCalloc(16, &impl->qvecsout); CeedChk(ierr);
155 
156   impl->numein = numinputfields; impl->numeout = numoutputfields;
157 
158   // Set up infield and outfield pointer arrays
159   // Infields
160   ierr = CeedOperatorSetupFields_Blocked(qf, op, 0, impl->blkrestr,
161                                          impl->evecs, impl->evecsin,
162                                          impl->qvecsin, 0,
163                                          numinputfields, Q);
164   CeedChk(ierr);
165   // Outfields
166   ierr = CeedOperatorSetupFields_Blocked(qf, op, 1, impl->blkrestr,
167                                          impl->evecs, impl->evecsout,
168                                          impl->qvecsout, numinputfields,
169                                          numoutputfields, Q);
170   CeedChk(ierr);
171 
172   // Identity QFunctions
173   if (impl->identityqf) {
174     CeedEvalMode inmode, outmode;
175     CeedQFunctionField *infields, *outfields;
176     ierr = CeedQFunctionGetFields(qf, &infields, &outfields); CeedChk(ierr);
177 
178     for (CeedInt i=0; i<numinputfields; i++) {
179       ierr = CeedQFunctionFieldGetEvalMode(infields[i], &inmode);
180       CeedChk(ierr);
181       ierr = CeedQFunctionFieldGetEvalMode(outfields[i], &outmode);
182       CeedChk(ierr);
183 
184       ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
185       impl->qvecsout[i] = impl->qvecsin[i];
186       ierr = CeedVectorAddReference(impl->qvecsin[i]); CeedChk(ierr);
187     }
188   }
189 
190   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
191 
192   return 0;
193 }
194 
195 //------------------------------------------------------------------------------
196 // Setup Operator Inputs
197 //------------------------------------------------------------------------------
198 static inline int CeedOperatorSetupInputs_Blocked(CeedInt numinputfields,
199     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
200     CeedVector invec, bool skipactive, CeedOperator_Blocked *impl,
201     CeedRequest *request) {
202   CeedInt ierr;
203   CeedEvalMode emode;
204   CeedVector vec;
205   uint64_t state;
206 
207   for (CeedInt i=0; i<numinputfields; i++) {
208     // Get input vector
209     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
210     if (vec == CEED_VECTOR_ACTIVE) {
211       if (skipactive)
212         continue;
213       else
214         vec = invec;
215     }
216 
217     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
218     CeedChk(ierr);
219     if (emode == CEED_EVAL_WEIGHT) { // Skip
220     } else {
221       // Restrict
222       ierr = CeedVectorGetState(vec, &state); CeedChk(ierr);
223       if (state != impl->inputstate[i] || vec == invec) {
224         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
225                                         vec, impl->evecs[i], request);
226         CeedChk(ierr);
227         impl->inputstate[i] = state;
228       }
229       // Get evec
230       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
231                                     (const CeedScalar **) &impl->edata[i]);
232       CeedChk(ierr);
233     }
234   }
235   return 0;
236 }
237 
238 //------------------------------------------------------------------------------
239 // Input Basis Action
240 //------------------------------------------------------------------------------
241 static inline int CeedOperatorInputBasis_Blocked(CeedInt e, CeedInt Q,
242     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
243     CeedInt numinputfields, CeedInt blksize, bool skipactive,
244     CeedOperator_Blocked *impl) {
245   CeedInt ierr;
246   CeedInt dim, elemsize, size;
247   CeedElemRestriction Erestrict;
248   CeedEvalMode emode;
249   CeedBasis basis;
250 
251   for (CeedInt i=0; i<numinputfields; i++) {
252     // Skip active input
253     if (skipactive) {
254       CeedVector vec;
255       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
256       if (vec == CEED_VECTOR_ACTIVE)
257         continue;
258     }
259 
260     // Get elemsize, emode, size
261     ierr = CeedOperatorFieldGetElemRestriction(opinputfields[i], &Erestrict);
262     CeedChk(ierr);
263     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
264     CeedChk(ierr);
265     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
266     CeedChk(ierr);
267     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
268     // Basis action
269     switch(emode) {
270     case CEED_EVAL_NONE:
271       ierr = CeedVectorSetArray(impl->qvecsin[i], CEED_MEM_HOST,
272                                 CEED_USE_POINTER,
273                                 &impl->edata[i][e*Q*size]); CeedChk(ierr);
274       break;
275     case CEED_EVAL_INTERP:
276       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
277       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
278                                 CEED_USE_POINTER,
279                                 &impl->edata[i][e*elemsize*size]);
280       CeedChk(ierr);
281       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
282                             CEED_EVAL_INTERP, impl->evecsin[i],
283                             impl->qvecsin[i]); CeedChk(ierr);
284       break;
285     case CEED_EVAL_GRAD:
286       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis); CeedChk(ierr);
287       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
288       ierr = CeedVectorSetArray(impl->evecsin[i], CEED_MEM_HOST,
289                                 CEED_USE_POINTER,
290                                 &impl->edata[i][e*elemsize*size/dim]);
291       CeedChk(ierr);
292       ierr = CeedBasisApply(basis, blksize, CEED_NOTRANSPOSE,
293                             CEED_EVAL_GRAD, impl->evecsin[i],
294                             impl->qvecsin[i]); CeedChk(ierr);
295       break;
296     case CEED_EVAL_WEIGHT:
297       break;  // No action
298     case CEED_EVAL_DIV:
299     case CEED_EVAL_CURL: {
300       // LCOV_EXCL_START
301       ierr = CeedOperatorFieldGetBasis(opinputfields[i], &basis);
302       CeedChk(ierr);
303       Ceed ceed;
304       ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr);
305       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
306       // LCOV_EXCL_STOP
307       break; // Not implemented
308     }
309     }
310   }
311   return 0;
312 }
313 
314 //------------------------------------------------------------------------------
315 // Output Basis Action
316 //------------------------------------------------------------------------------
317 static inline int CeedOperatorOutputBasis_Blocked(CeedInt e, CeedInt Q,
318     CeedQFunctionField *qfoutputfields, CeedOperatorField *opoutputfields,
319     CeedInt blksize, CeedInt numinputfields, CeedInt numoutputfields,
320     CeedOperator op, CeedOperator_Blocked *impl) {
321   CeedInt ierr;
322   CeedInt dim, elemsize, size;
323   CeedElemRestriction Erestrict;
324   CeedEvalMode emode;
325   CeedBasis basis;
326 
327   for (CeedInt i=0; i<numoutputfields; i++) {
328     // Get elemsize, emode, size
329     ierr = CeedOperatorFieldGetElemRestriction(opoutputfields[i], &Erestrict);
330     CeedChk(ierr);
331     ierr = CeedElemRestrictionGetElementSize(Erestrict, &elemsize);
332     CeedChk(ierr);
333     ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
334     CeedChk(ierr);
335     ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
336     // Basis action
337     switch(emode) {
338     case CEED_EVAL_NONE:
339       break; // No action
340     case CEED_EVAL_INTERP:
341       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
342       CeedChk(ierr);
343       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
344                                 CEED_USE_POINTER,
345                                 &impl->edata[i + numinputfields][e*elemsize*size]);
346       CeedChk(ierr);
347       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
348                             CEED_EVAL_INTERP, impl->qvecsout[i],
349                             impl->evecsout[i]); CeedChk(ierr);
350       break;
351     case CEED_EVAL_GRAD:
352       ierr = CeedOperatorFieldGetBasis(opoutputfields[i], &basis);
353       CeedChk(ierr);
354       ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr);
355       ierr = CeedVectorSetArray(impl->evecsout[i], CEED_MEM_HOST,
356                                 CEED_USE_POINTER,
357                                 &impl->edata[i + numinputfields][e*elemsize*size/dim]);
358       CeedChk(ierr);
359       ierr = CeedBasisApply(basis, blksize, CEED_TRANSPOSE,
360                             CEED_EVAL_GRAD, impl->qvecsout[i],
361                             impl->evecsout[i]); CeedChk(ierr);
362       break;
363     case CEED_EVAL_WEIGHT: {
364       // LCOV_EXCL_START
365       Ceed ceed;
366       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
367       return CeedError(ceed, 1, "CEED_EVAL_WEIGHT cannot be an output "
368                        "evaluation mode");
369       // LCOV_EXCL_STOP
370       break; // Should not occur
371     }
372     case CEED_EVAL_DIV:
373     case CEED_EVAL_CURL: {
374       // LCOV_EXCL_START
375       Ceed ceed;
376       ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
377       return CeedError(ceed, 1, "Ceed evaluation mode not implemented");
378       // LCOV_EXCL_STOP
379       break; // Not implemented
380     }
381     }
382   }
383   return 0;
384 }
385 
386 //------------------------------------------------------------------------------
387 // Restore Input Vectors
388 //------------------------------------------------------------------------------
389 static inline int CeedOperatorRestoreInputs_Blocked(CeedInt numinputfields,
390     CeedQFunctionField *qfinputfields, CeedOperatorField *opinputfields,
391     bool skipactive, CeedOperator_Blocked *impl) {
392   CeedInt ierr;
393   CeedEvalMode emode;
394 
395   for (CeedInt i=0; i<numinputfields; i++) {
396     // Skip active inputs
397     if (skipactive) {
398       CeedVector vec;
399       ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
400       if (vec == CEED_VECTOR_ACTIVE)
401         continue;
402     }
403     ierr = CeedQFunctionFieldGetEvalMode(qfinputfields[i], &emode);
404     CeedChk(ierr);
405     if (emode == CEED_EVAL_WEIGHT) { // Skip
406     } else {
407       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
408                                         (const CeedScalar **) &impl->edata[i]);
409       CeedChk(ierr);
410     }
411   }
412   return 0;
413 }
414 
415 //------------------------------------------------------------------------------
416 // Operator Apply
417 //------------------------------------------------------------------------------
418 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec,
419                                      CeedVector outvec,
420                                      CeedRequest *request) {
421   int ierr;
422   CeedOperator_Blocked *impl;
423   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
424   const CeedInt blksize = 8;
425   CeedInt Q, numinputfields, numoutputfields, numelements, size;
426   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
427   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
428   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
429   CeedQFunction qf;
430   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
431   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
432   CeedChk(ierr);
433   CeedOperatorField *opinputfields, *opoutputfields;
434   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
435   CeedChk(ierr);
436   CeedQFunctionField *qfinputfields, *qfoutputfields;
437   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
438   CeedChk(ierr);
439   CeedEvalMode emode;
440   CeedVector vec;
441 
442   // Setup
443   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
444 
445   // Input Evecs and Restriction
446   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
447                                          opinputfields, invec, false, impl,
448                                          request); CeedChk(ierr);
449 
450   // Output Evecs
451   for (CeedInt i=0; i<numoutputfields; i++) {
452     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
453                               &impl->edata[i + numinputfields]); CeedChk(ierr);
454   }
455 
456   // Loop through elements
457   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
458     // Output pointers
459     for (CeedInt i=0; i<numoutputfields; i++) {
460       ierr = CeedQFunctionFieldGetEvalMode(qfoutputfields[i], &emode);
461       CeedChk(ierr);
462       if (emode == CEED_EVAL_NONE) {
463         ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size);
464         CeedChk(ierr);
465         ierr = CeedVectorSetArray(impl->qvecsout[i], CEED_MEM_HOST,
466                                   CEED_USE_POINTER,
467                                   &impl->edata[i + numinputfields][e*Q*size]);
468         CeedChk(ierr);
469       }
470     }
471 
472     // Input basis apply
473     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
474                                           numinputfields, blksize, false, impl);
475     CeedChk(ierr);
476 
477     // Q function
478     if (!impl->identityqf) {
479       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
480       CeedChk(ierr);
481     }
482 
483     // Output basis apply
484     ierr = CeedOperatorOutputBasis_Blocked(e, Q, qfoutputfields, opoutputfields,
485                                            blksize, numinputfields,
486                                            numoutputfields, op, impl);
487     CeedChk(ierr);
488   }
489 
490   // Output restriction
491   for (CeedInt i=0; i<numoutputfields; i++) {
492     // Restore evec
493     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
494                                   &impl->edata[i + numinputfields]); CeedChk(ierr);
495     // Get output vector
496     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
497     // Active
498     if (vec == CEED_VECTOR_ACTIVE)
499       vec = outvec;
500     // Restrict
501     ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein],
502                                     CEED_TRANSPOSE, impl->evecs[i+impl->numein],
503                                     vec, request); CeedChk(ierr);
504 
505   }
506 
507   // Restore input arrays
508   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
509          opinputfields, false, impl); CeedChk(ierr);
510 
511   return 0;
512 }
513 
514 //------------------------------------------------------------------------------
515 // Assemble Linear QFunction
516 //------------------------------------------------------------------------------
517 static int CeedOperatorAssembleLinearQFunction_Blocked(CeedOperator op,
518     CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request) {
519   int ierr;
520   CeedOperator_Blocked *impl;
521   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
522   const CeedInt blksize = 8;
523   CeedInt Q, numinputfields, numoutputfields, numelements, size;
524   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
525   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
526   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
527   CeedQFunction qf;
528   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
529   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
530   CeedChk(ierr);
531   CeedOperatorField *opinputfields, *opoutputfields;
532   ierr = CeedOperatorGetFields(op, &opinputfields, &opoutputfields);
533   CeedChk(ierr);
534   CeedQFunctionField *qfinputfields, *qfoutputfields;
535   ierr = CeedQFunctionGetFields(qf, &qfinputfields, &qfoutputfields);
536   CeedChk(ierr);
537   CeedVector vec, lvec;
538   CeedInt numactivein = 0, numactiveout = 0;
539   CeedVector *activein = NULL;
540   CeedScalar *a, *tmp;
541   Ceed ceed;
542   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
543 
544   // Setup
545   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
546 
547   // Check for identity
548   if (impl->identityqf)
549     // LCOV_EXCL_START
550     return CeedError(ceed, 1, "Assembling identity qfunctions not supported");
551   // LCOV_EXCL_STOP
552 
553   // Input Evecs and Restriction
554   ierr = CeedOperatorSetupInputs_Blocked(numinputfields, qfinputfields,
555                                          opinputfields, NULL, true, impl,
556                                          request); CeedChk(ierr);
557 
558   // Count number of active input fields
559   for (CeedInt i=0; i<numinputfields; i++) {
560     // Get input vector
561     ierr = CeedOperatorFieldGetVector(opinputfields[i], &vec); CeedChk(ierr);
562     // Check if active input
563     if (vec == CEED_VECTOR_ACTIVE) {
564       ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChk(ierr);
565       ierr = CeedVectorSetValue(impl->qvecsin[i], 0.0); CeedChk(ierr);
566       ierr = CeedVectorGetArray(impl->qvecsin[i], CEED_MEM_HOST, &tmp);
567       CeedChk(ierr);
568       ierr = CeedRealloc(numactivein + size, &activein); CeedChk(ierr);
569       for (CeedInt field=0; field<size; field++) {
570         ierr = CeedVectorCreate(ceed, Q*blksize, &activein[numactivein+field]);
571         CeedChk(ierr);
572         ierr = CeedVectorSetArray(activein[numactivein+field], CEED_MEM_HOST,
573                                   CEED_USE_POINTER, &tmp[field*Q*blksize]);
574         CeedChk(ierr);
575       }
576       numactivein += size;
577       ierr = CeedVectorRestoreArray(impl->qvecsin[i], &tmp); CeedChk(ierr);
578     }
579   }
580 
581   // Count number of active output fields
582   for (CeedInt i=0; i<numoutputfields; i++) {
583     // Get output vector
584     ierr = CeedOperatorFieldGetVector(opoutputfields[i], &vec); CeedChk(ierr);
585     // Check if active output
586     if (vec == CEED_VECTOR_ACTIVE) {
587       ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChk(ierr);
588       numactiveout += size;
589     }
590   }
591 
592   // Check sizes
593   if (!numactivein || !numactiveout)
594     // LCOV_EXCL_START
595     return CeedError(ceed, 1, "Cannot assemble QFunction without active inputs "
596                      "and outputs");
597   // LCOV_EXCL_STOP
598 
599   // Setup lvec
600   ierr = CeedVectorCreate(ceed, nblks*blksize*Q*numactivein*numactiveout,
601                           &lvec); CeedChk(ierr);
602   ierr = CeedVectorGetArray(lvec, CEED_MEM_HOST, &a); CeedChk(ierr);
603 
604   // Create output restriction
605   CeedTransposeMode lmode = CEED_NOTRANSPOSE;
606   ierr = CeedElemRestrictionCreateIdentity(ceed, lmode, numelements, Q,
607          numelements*Q, numactivein*numactiveout, rstr); CeedChk(ierr);
608   // Create assembled vector
609   ierr = CeedVectorCreate(ceed, numelements*Q*numactivein*numactiveout,
610                           assembled); CeedChk(ierr);
611 
612   // Loop through elements
613   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
614     // Input basis apply
615     ierr = CeedOperatorInputBasis_Blocked(e, Q, qfinputfields, opinputfields,
616                                           numinputfields, blksize, true, impl);
617     CeedChk(ierr);
618 
619     // Assemble QFunction
620     for (CeedInt in=0; in<numactivein; in++) {
621       // Set Inputs
622       ierr = CeedVectorSetValue(activein[in], 1.0); CeedChk(ierr);
623       if (numactivein > 1) {
624         ierr = CeedVectorSetValue(activein[(in+numactivein-1)%numactivein],
625                                   0.0); CeedChk(ierr);
626       }
627       // Set Outputs
628       for (CeedInt out=0; out<numoutputfields; out++) {
629         // Get output vector
630         ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
631         CeedChk(ierr);
632         // Check if active output
633         if (vec == CEED_VECTOR_ACTIVE) {
634           CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST,
635                              CEED_USE_POINTER, a); CeedChk(ierr);
636           ierr = CeedQFunctionFieldGetSize(qfoutputfields[out], &size);
637           CeedChk(ierr);
638           a += size*Q*blksize; // Advance the pointer by the size of the output
639         }
640       }
641       // Apply QFunction
642       ierr = CeedQFunctionApply(qf, Q*blksize, impl->qvecsin, impl->qvecsout);
643       CeedChk(ierr);
644     }
645   }
646 
647   // Un-set output Qvecs to prevent accidental overwrite of Assembled
648   for (CeedInt out=0; out<numoutputfields; out++) {
649     // Get output vector
650     ierr = CeedOperatorFieldGetVector(opoutputfields[out], &vec);
651     CeedChk(ierr);
652     // Check if active output
653     if (vec == CEED_VECTOR_ACTIVE) {
654       CeedVectorSetArray(impl->qvecsout[out], CEED_MEM_HOST, CEED_COPY_VALUES,
655                          NULL); CeedChk(ierr);
656     }
657   }
658 
659   // Restore input arrays
660   ierr = CeedOperatorRestoreInputs_Blocked(numinputfields, qfinputfields,
661          opinputfields, true, impl); CeedChk(ierr);
662 
663   // Output blocked restriction
664   ierr = CeedVectorRestoreArray(lvec, &a); CeedChk(ierr);
665   ierr = CeedVectorSetValue(*assembled, 0.0); CeedChk(ierr);
666   CeedElemRestriction blkrstr;
667   ierr = CeedElemRestrictionCreateBlocked(ceed, lmode, numelements, Q, blksize,
668                                           numelements*Q,
669                                           numactivein*numactiveout,
670                                           CEED_MEM_HOST, CEED_COPY_VALUES,
671                                           NULL, &blkrstr); CeedChk(ierr);
672   ierr = CeedElemRestrictionApply(blkrstr, CEED_TRANSPOSE, lvec, *assembled,
673                                   request); CeedChk(ierr);
674 
675   // Cleanup
676   for (CeedInt i=0; i<numactivein; i++) {
677     ierr = CeedVectorDestroy(&activein[i]); CeedChk(ierr);
678   }
679   ierr = CeedFree(&activein); CeedChk(ierr);
680   ierr = CeedVectorDestroy(&lvec); CeedChk(ierr);
681   ierr = CeedElemRestrictionDestroy(&blkrstr); CeedChk(ierr);
682 
683   return 0;
684 }
685 
686 //------------------------------------------------------------------------------
687 // Operator Destroy
688 //------------------------------------------------------------------------------
689 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
690   int ierr;
691   CeedOperator_Blocked *impl;
692   ierr = CeedOperatorGetData(op, (void *)&impl); CeedChk(ierr);
693 
694   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
695     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
696     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
697   }
698   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
699   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
700   ierr = CeedFree(&impl->edata); CeedChk(ierr);
701   ierr = CeedFree(&impl->inputstate); CeedChk(ierr);
702 
703   for (CeedInt i=0; i<impl->numein; i++) {
704     ierr = CeedVectorDestroy(&impl->evecsin[i]); CeedChk(ierr);
705     ierr = CeedVectorDestroy(&impl->qvecsin[i]); CeedChk(ierr);
706   }
707   ierr = CeedFree(&impl->evecsin); CeedChk(ierr);
708   ierr = CeedFree(&impl->qvecsin); CeedChk(ierr);
709 
710   for (CeedInt i=0; i<impl->numeout; i++) {
711     ierr = CeedVectorDestroy(&impl->evecsout[i]); CeedChk(ierr);
712     ierr = CeedVectorDestroy(&impl->qvecsout[i]); CeedChk(ierr);
713   }
714   ierr = CeedFree(&impl->evecsout); CeedChk(ierr);
715   ierr = CeedFree(&impl->qvecsout); CeedChk(ierr);
716 
717   ierr = CeedFree(&impl); CeedChk(ierr);
718   return 0;
719 }
720 
721 //------------------------------------------------------------------------------
722 // Operator Create
723 //------------------------------------------------------------------------------
724 int CeedOperatorCreate_Blocked(CeedOperator op) {
725   int ierr;
726   Ceed ceed;
727   ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
728   CeedOperator_Blocked *impl;
729 
730   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
731   ierr = CeedOperatorSetData(op, (void *)&impl); CeedChk(ierr);
732 
733   ierr = CeedSetBackendFunction(ceed, "Operator", op, "AssembleLinearQFunction",
734                                 CeedOperatorAssembleLinearQFunction_Blocked);
735   CeedChk(ierr);
736   ierr = CeedSetBackendFunction(ceed, "Operator", op, "ApplyAdd",
737                                 CeedOperatorApply_Blocked); CeedChk(ierr);
738   ierr = CeedSetBackendFunction(ceed, "Operator", op, "Destroy",
739                                 CeedOperatorDestroy_Blocked); CeedChk(ierr);
740   return 0;
741 }
742 //------------------------------------------------------------------------------
743