xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision ba4bcd917872bbba2ca5b7fccd7fd5f16d3aa2e4)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed-impl.h>
18 #include <string.h>
19 #include "ceed-blocked.h"
20 #include "../ref/ceed-ref.h"
21 
22 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
23   CeedOperator_Blocked *impl = op->data;
24   int ierr;
25 
26   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
27     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
28     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
29   }
30   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
31   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
32   ierr = CeedFree(&impl->edata); CeedChk(ierr);
33 
34   for (CeedInt i=0; i<impl->numqin+impl->numqout; i++) {
35     ierr = CeedFree(&impl->qdata_alloc[i]); CeedChk(ierr);
36   }
37   ierr = CeedFree(&impl->qdata_alloc); CeedChk(ierr);
38   ierr = CeedFree(&impl->qdata); CeedChk(ierr);
39 
40   ierr = CeedFree(&impl->indata); CeedChk(ierr);
41   ierr = CeedFree(&impl->outdata); CeedChk(ierr);
42 
43   ierr = CeedFree(&op->data); CeedChk(ierr);
44   return 0;
45 }
46 
47 /*
48   Setup infields or outfields
49  */
50 static int CeedOperatorSetupFields_Blocked(struct CeedQFunctionField qfields[16],
51                                        struct CeedOperatorField ofields[16],
52                                        CeedElemRestriction *blkrestr,
53                                        CeedVector *evecs, CeedScalar **qdata,
54                                        CeedScalar **qdata_alloc, CeedScalar **indata,
55                                        CeedInt starti, CeedInt startq,
56                                        CeedInt numfields, CeedInt Q) {
57   CeedInt dim, ierr, iq=startq, ncomp;
58   const CeedInt blksize = 8;
59 
60   // Loop over fields
61   for (CeedInt i=0; i<numfields; i++) {
62     CeedEvalMode emode = qfields[i].emode;
63 
64     if (emode != CEED_EVAL_WEIGHT) {
65       CeedElemRestriction r = ofields[i].Erestrict;
66       CeedElemRestriction_Ref *data = r->data;
67       ierr = CeedElemRestrictionCreateBlocked(r->ceed, r->nelem, r->elemsize,
68                                               blksize, r->ndof, r->ncomp,
69                                               CEED_MEM_HOST, CEED_COPY_VALUES,
70                                               data->indices, &blkrestr[i+starti]);
71       CeedChk(ierr);
72       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starti], NULL,
73                                              &evecs[i+starti]);
74       CeedChk(ierr);
75     }
76 
77     switch(emode) {
78     case CEED_EVAL_NONE:
79       break; // No action
80     case CEED_EVAL_INTERP:
81       ncomp = qfields[i].ncomp;
82       ierr = CeedMalloc(Q*ncomp*blksize, &qdata_alloc[iq]); CeedChk(ierr);
83       qdata[i + starti] = qdata_alloc[iq];
84       iq++;
85       break;
86     case CEED_EVAL_GRAD:
87       ncomp = qfields[i].ncomp;
88       dim = ofields[i].basis->dim;
89       ierr = CeedMalloc(Q*ncomp*dim*blksize, &qdata_alloc[iq]); CeedChk(ierr);
90       qdata[i + starti] = qdata_alloc[iq];
91       iq++;
92       break;
93     case CEED_EVAL_WEIGHT: // Only on input fields
94       ierr = CeedMalloc(Q*blksize, &qdata_alloc[iq]); CeedChk(ierr);
95       ierr = CeedBasisApply(ofields[iq].basis, blksize, CEED_NOTRANSPOSE,
96                             CEED_EVAL_WEIGHT, NULL, qdata_alloc[iq]); CeedChk(ierr);
97       qdata[i] = qdata_alloc[iq];
98       indata[i] = qdata[i];
99       iq++;
100       break;
101     case CEED_EVAL_DIV:
102       break; // Not implimented
103     case CEED_EVAL_CURL:
104       break; // Not implimented
105     }
106   }
107   return 0;
108 }
109 
110 /*
111   CeedOperator needs to connect all the named fields (be they active or passive)
112   to the named inputs and outputs of its CeedQFunction.
113  */
114 static int CeedOperatorSetup_Blocked(CeedOperator op) {
115   if (op->setupdone) return 0;
116   CeedOperator_Blocked *impl = op->data;
117   CeedQFunction qf = op->qf;
118   CeedInt Q = op->numqpoints, numinputfields, numoutputfields;
119   int ierr;
120 
121   // Count infield and outfield array sizes and evectors
122   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
123   CeedChk(ierr);
124   impl->numein = numinputfields;
125   for (CeedInt i=0; i<numinputfields; i++) {
126     CeedEvalMode emode = qf->inputfields[i].emode;
127     impl->numqin += !!(emode & CEED_EVAL_INTERP) + !!(emode & CEED_EVAL_GRAD) +
128                     !!(emode & CEED_EVAL_WEIGHT);
129   }
130   impl->numeout = numoutputfields;
131   for (CeedInt i=0; i<numoutputfields; i++) {
132     CeedEvalMode emode = qf->outputfields[i].emode;
133     impl->numqout += !!(emode & CEED_EVAL_INTERP) + !!(emode & CEED_EVAL_GRAD);
134   }
135 
136   // Allocate
137   ierr = CeedCalloc(impl->numein + impl->numeout, &impl->blkrestr);
138   CeedChk(ierr);
139   ierr = CeedCalloc(impl->numein + impl->numeout, &impl->evecs);
140   CeedChk(ierr);
141   ierr = CeedCalloc(impl->numein + impl->numeout, &impl->edata);
142   CeedChk(ierr);
143 
144   ierr = CeedCalloc(impl->numqin + impl->numqout, &impl->qdata_alloc);
145   CeedChk(ierr);
146   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->qdata);
147   CeedChk(ierr);
148 
149   ierr = CeedCalloc(16, &impl->indata); CeedChk(ierr);
150   ierr = CeedCalloc(16, &impl->outdata); CeedChk(ierr);
151   // Set up infield and outfield pointer arrays
152   // Infields
153   ierr = CeedOperatorSetupFields_Blocked(qf->inputfields, op->inputfields,
154                                      impl->blkrestr, impl->evecs,
155                                      impl->qdata, impl->qdata_alloc,
156                                      impl->indata, 0,
157                                      0, numinputfields, Q);
158   CeedChk(ierr);
159   // Outfields
160   ierr = CeedOperatorSetupFields_Blocked(qf->outputfields, op->outputfields,
161                                      impl->blkrestr, impl->evecs,
162                                      impl->qdata, impl->qdata_alloc,
163                                      impl->indata, numinputfields,
164                                      impl->numqin, numoutputfields, Q);
165   CeedChk(ierr);
166   // Input Qvecs
167   for (CeedInt i=0; i<numinputfields; i++) {
168     CeedEvalMode emode = qf->inputfields[i].emode;
169     if ((emode != CEED_EVAL_NONE) && (emode != CEED_EVAL_WEIGHT))
170       impl->indata[i] =  impl->qdata[i];
171   }
172   // Output Qvecs
173   for (CeedInt i=0; i<numoutputfields; i++) {
174     CeedEvalMode emode = qf->outputfields[i].emode;
175     if (emode != CEED_EVAL_NONE)
176       impl->outdata[i] =  impl->qdata[i + numinputfields];
177   }
178 
179   op->setupdone = 1;
180 
181   return 0;
182 }
183 
184 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec,
185                                  CeedVector outvec, CeedRequest *request) {
186   CeedOperator_Blocked *impl = op->data;
187   const CeedInt blksize = 8;
188   CeedInt Q = op->numqpoints, elemsize, numinputfields, numoutputfields,
189           nblks = (op->numelements/blksize) + !!(op->numelements%blksize);
190   int ierr;
191   CeedQFunction qf = op->qf;
192   CeedTransposeMode lmode = CEED_NOTRANSPOSE;
193 
194   // Setup
195   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
196   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
197   CeedChk(ierr);
198 
199   // Input Evecs and Restriction
200   for (CeedInt i=0; i<numinputfields; i++) {
201     CeedEvalMode emode = qf->inputfields[i].emode;
202     if (emode == CEED_EVAL_WEIGHT) { // Skip
203     } else {
204       // Active
205       // Restrict
206       if (op->inputfields[i].vec == CEED_VECTOR_ACTIVE) {
207         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
208                                         lmode, invec, impl->evecs[i],
209                                         request); CeedChk(ierr); CeedChk(ierr);
210       } else {
211         // Passive
212         // Restrict
213         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
214                                         lmode, op->inputfields[i].vec, impl->evecs[i],
215                                         request); CeedChk(ierr);
216       }
217       // Get evec
218       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
219                                     (const CeedScalar **) &impl->edata[i]);
220       CeedChk(ierr);
221     }
222   }
223 
224   // Output Evecs
225   for (CeedInt i=0; i<numoutputfields; i++) {
226     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
227                               &impl->edata[i + numinputfields]); CeedChk(ierr);
228   }
229 
230   // Loop through elements
231   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
232     // Input basis apply if needed
233     for (CeedInt i=0; i<numinputfields; i++) {
234       // Get elemsize, emode, ncomp
235       elemsize = op->inputfields[i].Erestrict->elemsize;
236       CeedEvalMode emode = qf->inputfields[i].emode;
237       CeedInt ncomp = qf->inputfields[i].ncomp;
238       // Basis action
239       switch(emode) {
240       case CEED_EVAL_NONE:
241         impl->indata[i] = &impl->edata[i][e*Q*ncomp];
242         break;
243       case CEED_EVAL_INTERP:
244         ierr = CeedBasisApply(op->inputfields[i].basis, blksize, CEED_NOTRANSPOSE,
245                               CEED_EVAL_INTERP, &impl->edata[i][e*elemsize*ncomp], impl->qdata[i]);
246         CeedChk(ierr);
247         break;
248       case CEED_EVAL_GRAD:
249         ierr = CeedBasisApply(op->inputfields[i].basis, blksize, CEED_NOTRANSPOSE,
250                               CEED_EVAL_GRAD, &impl->edata[i][e*elemsize*ncomp], impl->qdata[i]);
251         CeedChk(ierr);
252         break;
253       case CEED_EVAL_WEIGHT:
254         break;  // No action
255       case CEED_EVAL_DIV:
256         break; // Not implimented
257       case CEED_EVAL_CURL:
258         break; // Not implimented
259       }
260     }
261 
262     // Output pointers
263     for (CeedInt i=0; i<numoutputfields; i++) {
264       CeedEvalMode emode = qf->outputfields[i].emode;
265       if (emode == CEED_EVAL_NONE) {
266         CeedInt ncomp = qf->outputfields[i].ncomp;
267         impl->outdata[i] = &impl->edata[i + numinputfields][e*Q*ncomp];
268       }
269     }
270     // Q function
271     ierr = CeedQFunctionApply(op->qf, Q*blksize,
272                               (const CeedScalar * const*) impl->indata,
273                               impl->outdata); CeedChk(ierr);
274 
275     // Output basis apply if needed
276     for (CeedInt i=0; i<numoutputfields; i++) {
277       // Get elemsize, emode, ncomp
278       elemsize = op->outputfields[i].Erestrict->elemsize;
279       CeedInt ncomp = qf->outputfields[i].ncomp;
280       CeedEvalMode emode = qf->outputfields[i].emode;
281       // Basis action
282       switch(emode) {
283       case CEED_EVAL_NONE:
284         break; // No action
285       case CEED_EVAL_INTERP:
286         ierr = CeedBasisApply(op->outputfields[i].basis, blksize, CEED_TRANSPOSE,
287                               CEED_EVAL_INTERP, impl->outdata[i],
288                               &impl->edata[i + numinputfields][e*elemsize*ncomp]);
289         CeedChk(ierr);
290         break;
291       case CEED_EVAL_GRAD:
292         ierr = CeedBasisApply(op->outputfields[i].basis, blksize, CEED_TRANSPOSE,
293                               CEED_EVAL_GRAD,
294                               impl->outdata[i], &impl->edata[i + numinputfields][e*elemsize*ncomp]);
295         CeedChk(ierr);
296         break;
297       case CEED_EVAL_WEIGHT:
298         return CeedError(op->ceed, 1,
299                          "CEED_EVAL_WEIGHT cannot be an output evaluation mode");
300         break; // Should not occur
301       case CEED_EVAL_DIV:
302         break; // Not implimented
303       case CEED_EVAL_CURL:
304         break; // Not implimented
305       }
306     }
307   }
308 
309   // Zero lvecs
310   ierr = CeedVectorSetValue(outvec, 0.0); CeedChk(ierr);
311   for (CeedInt i=0; i<qf->numoutputfields; i++)
312     if (op->outputfields[i].vec != CEED_VECTOR_ACTIVE) {
313       ierr = CeedVectorSetValue(op->outputfields[i].vec, 0.0); CeedChk(ierr);
314     }
315 
316   // Output restriction
317   for (CeedInt i=0; i<numoutputfields; i++) {
318     // Restore evec
319     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
320                                   &impl->edata[i + numinputfields]); CeedChk(ierr);
321     // Active
322     if (op->outputfields[i].vec == CEED_VECTOR_ACTIVE) {
323       // Restrict
324       ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE,
325                                       lmode, impl->evecs[i+impl->numein], outvec, request); CeedChk(ierr);
326     } else {
327       // Passive
328       // Restrict
329       ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE,
330                                       lmode, impl->evecs[i+impl->numein], op->outputfields[i].vec,
331                                       request); CeedChk(ierr);
332     }
333   }
334 
335   // Restore input arrays
336   for (CeedInt i=0; i<numinputfields; i++) {
337     CeedEvalMode emode = qf->inputfields[i].emode;
338     if (emode == CEED_EVAL_WEIGHT) { // Skip
339     } else {
340       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
341                                         (const CeedScalar **) &impl->edata[i]); CeedChk(ierr);
342     }
343   }
344 
345   return 0;
346 }
347 
348 int CeedOperatorCreate_Blocked(CeedOperator op) {
349   CeedOperator_Blocked *impl;
350   int ierr;
351 
352   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
353   op->data = impl;
354   op->Destroy = CeedOperatorDestroy_Blocked;
355   op->Apply = CeedOperatorApply_Blocked;
356   return 0;
357 }
358