xref: /libCEED/backends/blocked/ceed-blocked-operator.c (revision d863ab9ba6a0d47c58445a35d35b36efba07fc93)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <string.h>
18 #include "ceed-blocked.h"
19 #include "../ref/ceed-ref.h"
20 
21 static int CeedOperatorDestroy_Blocked(CeedOperator op) {
22   int ierr;
23   CeedOperator_Blocked *impl;
24   ierr = CeedOperatorGetData(op, (void*)&impl); CeedChk(ierr);
25 
26   for (CeedInt i=0; i<impl->numein+impl->numeout; i++) {
27     ierr = CeedElemRestrictionDestroy(&impl->blkrestr[i]); CeedChk(ierr);
28     ierr = CeedVectorDestroy(&impl->evecs[i]); CeedChk(ierr);
29   }
30   ierr = CeedFree(&impl->blkrestr); CeedChk(ierr);
31   ierr = CeedFree(&impl->evecs); CeedChk(ierr);
32   ierr = CeedFree(&impl->edata); CeedChk(ierr);
33 
34   for (CeedInt i=0; i<impl->numqin+impl->numqout; i++) {
35     ierr = CeedFree(&impl->qdata_alloc[i]); CeedChk(ierr);
36   }
37   ierr = CeedFree(&impl->qdata_alloc); CeedChk(ierr);
38   ierr = CeedFree(&impl->qdata); CeedChk(ierr);
39 
40   ierr = CeedFree(&impl->indata); CeedChk(ierr);
41   ierr = CeedFree(&impl->outdata); CeedChk(ierr);
42 
43   ierr = CeedFree(&op->data); CeedChk(ierr);
44   return 0;
45 }
46 
47 /*
48   Setup infields or outfields
49  */
50 static int CeedOperatorSetupFields_Blocked(struct CeedQFunctionField qfields[16],
51                                        struct CeedOperatorField ofields[16],
52                                        CeedElemRestriction *blkrestr,
53                                        CeedVector *evecs, CeedScalar **qdata,
54                                        CeedScalar **qdata_alloc, CeedScalar **indata,
55                                        CeedInt starti, CeedInt startq,
56                                        CeedInt numfields, CeedInt Q) {
57   CeedInt dim, ierr, iq=startq, ncomp;
58   const CeedInt blksize = 8;
59 
60   // Loop over fields
61   for (CeedInt i=0; i<numfields; i++) {
62     CeedEvalMode emode = qfields[i].emode;
63 
64     if (emode != CEED_EVAL_WEIGHT) {
65       CeedElemRestriction r = ofields[i].Erestrict;
66       CeedElemRestriction_Ref *data = r->data;
67       Ceed ceed;
68       ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
69       CeedInt nelem, elemsize, ndof, ncomp;
70       ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
71       ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
72       ierr = CeedElemRestrictionGetNumDoF(r, &ndof); CeedChk(ierr);
73       ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
74       ierr = CeedElemRestrictionCreateBlocked(ceed, nelem, elemsize,
75                                               blksize, ndof, ncomp,
76                                               CEED_MEM_HOST, CEED_COPY_VALUES,
77                                               data->indices, &blkrestr[i+starti]);
78       CeedChk(ierr);
79       ierr = CeedElemRestrictionCreateVector(blkrestr[i+starti], NULL,
80                                              &evecs[i+starti]);
81       CeedChk(ierr);
82     }
83 
84     switch(emode) {
85     case CEED_EVAL_NONE:
86       break; // No action
87     case CEED_EVAL_INTERP:
88       ncomp = qfields[i].ncomp;
89       ierr = CeedMalloc(Q*ncomp*blksize, &qdata_alloc[iq]); CeedChk(ierr);
90       qdata[i + starti] = qdata_alloc[iq];
91       iq++;
92       break;
93     case CEED_EVAL_GRAD:
94       ncomp = qfields[i].ncomp;
95       ierr = CeedBasisGetDimension(ofields[i].basis, &dim); CeedChk(ierr);
96       ierr = CeedMalloc(Q*ncomp*dim*blksize, &qdata_alloc[iq]); CeedChk(ierr);
97       qdata[i + starti] = qdata_alloc[iq];
98       iq++;
99       break;
100     case CEED_EVAL_WEIGHT: // Only on input fields
101       ierr = CeedMalloc(Q*blksize, &qdata_alloc[iq]); CeedChk(ierr);
102       ierr = CeedBasisApply(ofields[iq].basis, blksize, CEED_NOTRANSPOSE,
103                             CEED_EVAL_WEIGHT, NULL, qdata_alloc[iq]); CeedChk(ierr);
104       qdata[i] = qdata_alloc[iq];
105       indata[i] = qdata[i];
106       iq++;
107       break;
108     case CEED_EVAL_DIV:
109       break; // Not implimented
110     case CEED_EVAL_CURL:
111       break; // Not implimented
112     }
113   }
114   return 0;
115 }
116 
117 /*
118   CeedOperator needs to connect all the named fields (be they active or passive)
119   to the named inputs and outputs of its CeedQFunction.
120  */
121 static int CeedOperatorSetup_Blocked(CeedOperator op) {
122   int ierr;
123   bool setupdone;
124   ierr = CeedOperatorGetSetupStatus(op, &setupdone); CeedChk(ierr);
125   if (setupdone) return 0;
126   CeedOperator_Blocked *impl;
127   ierr = CeedOperatorGetData(op, (void*)&impl); CeedChk(ierr);
128   CeedQFunction qf;
129   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
130   CeedInt Q, numinputfields, numoutputfields;
131   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
132   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
133   CeedChk(ierr);
134 
135   // Count infield and outfield array sizes and evectors
136   impl->numein = numinputfields;
137   for (CeedInt i=0; i<numinputfields; i++) {
138     CeedEvalMode emode = qf->inputfields[i].emode;
139     impl->numqin += !!(emode & CEED_EVAL_INTERP) + !!(emode & CEED_EVAL_GRAD) +
140                     !!(emode & CEED_EVAL_WEIGHT);
141   }
142   impl->numeout = numoutputfields;
143   for (CeedInt i=0; i<numoutputfields; i++) {
144     CeedEvalMode emode = qf->outputfields[i].emode;
145     impl->numqout += !!(emode & CEED_EVAL_INTERP) + !!(emode & CEED_EVAL_GRAD);
146   }
147 
148   // Allocate
149   ierr = CeedCalloc(impl->numein + impl->numeout, &impl->blkrestr);
150   CeedChk(ierr);
151   ierr = CeedCalloc(impl->numein + impl->numeout, &impl->evecs);
152   CeedChk(ierr);
153   ierr = CeedCalloc(impl->numein + impl->numeout, &impl->edata);
154   CeedChk(ierr);
155 
156   ierr = CeedCalloc(impl->numqin + impl->numqout, &impl->qdata_alloc);
157   CeedChk(ierr);
158   ierr = CeedCalloc(numinputfields + numoutputfields, &impl->qdata);
159   CeedChk(ierr);
160 
161   ierr = CeedCalloc(16, &impl->indata); CeedChk(ierr);
162   ierr = CeedCalloc(16, &impl->outdata); CeedChk(ierr);
163   // Set up infield and outfield pointer arrays
164   // Infields
165   ierr = CeedOperatorSetupFields_Blocked(qf->inputfields, op->inputfields,
166                                      impl->blkrestr, impl->evecs,
167                                      impl->qdata, impl->qdata_alloc,
168                                      impl->indata, 0,
169                                      0, numinputfields, Q);
170   CeedChk(ierr);
171   // Outfields
172   ierr = CeedOperatorSetupFields_Blocked(qf->outputfields, op->outputfields,
173                                      impl->blkrestr, impl->evecs,
174                                      impl->qdata, impl->qdata_alloc,
175                                      impl->indata, numinputfields,
176                                      impl->numqin, numoutputfields, Q);
177   CeedChk(ierr);
178   // Input Qvecs
179   for (CeedInt i=0; i<numinputfields; i++) {
180     CeedEvalMode emode = qf->inputfields[i].emode;
181     if ((emode != CEED_EVAL_NONE) && (emode != CEED_EVAL_WEIGHT))
182       impl->indata[i] =  impl->qdata[i];
183   }
184   // Output Qvecs
185   for (CeedInt i=0; i<numoutputfields; i++) {
186     CeedEvalMode emode = qf->outputfields[i].emode;
187     if (emode != CEED_EVAL_NONE)
188       impl->outdata[i] =  impl->qdata[i + numinputfields];
189   }
190 
191   ierr = CeedOperatorSetSetupDone(op); CeedChk(ierr);
192 
193   return 0;
194 }
195 
196 static int CeedOperatorApply_Blocked(CeedOperator op, CeedVector invec,
197                                  CeedVector outvec, CeedRequest *request) {
198   int ierr;
199   CeedOperator_Blocked *impl;
200   ierr = CeedOperatorGetData(op, (void*)&impl); CeedChk(ierr);
201   const CeedInt blksize = 8;
202   CeedInt Q, elemsize, numinputfields, numoutputfields, numelements;
203   ierr = CeedOperatorGetNumElements(op, &numelements); CeedChk(ierr);
204   ierr = CeedOperatorGetNumQuadraturePoints(op, &Q); CeedChk(ierr);
205   CeedInt nblks = (numelements/blksize) + !!(numelements%blksize);
206   CeedQFunction qf;
207   ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr);
208   ierr= CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields);
209   CeedChk(ierr);
210   CeedTransposeMode lmode = CEED_NOTRANSPOSE;
211 
212   // Setup
213   ierr = CeedOperatorSetup_Blocked(op); CeedChk(ierr);
214 
215   // Input Evecs and Restriction
216   for (CeedInt i=0; i<numinputfields; i++) {
217     CeedEvalMode emode = qf->inputfields[i].emode;
218     if (emode == CEED_EVAL_WEIGHT) { // Skip
219     } else {
220       // Active
221       // Restrict
222       if (op->inputfields[i].vec == CEED_VECTOR_ACTIVE) {
223         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
224                                         lmode, invec, impl->evecs[i],
225                                         request); CeedChk(ierr); CeedChk(ierr);
226       } else {
227         // Passive
228         // Restrict
229         ierr = CeedElemRestrictionApply(impl->blkrestr[i], CEED_NOTRANSPOSE,
230                                         lmode, op->inputfields[i].vec, impl->evecs[i],
231                                         request); CeedChk(ierr);
232       }
233       // Get evec
234       ierr = CeedVectorGetArrayRead(impl->evecs[i], CEED_MEM_HOST,
235                                     (const CeedScalar **) &impl->edata[i]);
236       CeedChk(ierr);
237     }
238   }
239 
240   // Output Evecs
241   for (CeedInt i=0; i<numoutputfields; i++) {
242     ierr = CeedVectorGetArray(impl->evecs[i+impl->numein], CEED_MEM_HOST,
243                               &impl->edata[i + numinputfields]); CeedChk(ierr);
244   }
245 
246   // Loop through elements
247   for (CeedInt e=0; e<nblks*blksize; e+=blksize) {
248     // Input basis apply if needed
249     for (CeedInt i=0; i<numinputfields; i++) {
250       // Get elemsize, emode, ncomp
251       ierr = CeedElemRestrictionGetElementSize(
252                op->inputfields[i].Erestrict, &elemsize); CeedChk(ierr);
253       CeedEvalMode emode = qf->inputfields[i].emode;
254       CeedInt ncomp = qf->inputfields[i].ncomp;
255       // Basis action
256       switch(emode) {
257       case CEED_EVAL_NONE:
258         impl->indata[i] = &impl->edata[i][e*Q*ncomp];
259         break;
260       case CEED_EVAL_INTERP:
261         ierr = CeedBasisApply(op->inputfields[i].basis, blksize, CEED_NOTRANSPOSE,
262                               CEED_EVAL_INTERP, &impl->edata[i][e*elemsize*ncomp], impl->qdata[i]);
263         CeedChk(ierr);
264         break;
265       case CEED_EVAL_GRAD:
266         ierr = CeedBasisApply(op->inputfields[i].basis, blksize, CEED_NOTRANSPOSE,
267                               CEED_EVAL_GRAD, &impl->edata[i][e*elemsize*ncomp], impl->qdata[i]);
268         CeedChk(ierr);
269         break;
270       case CEED_EVAL_WEIGHT:
271         break;  // No action
272       case CEED_EVAL_DIV:
273         break; // Not implimented
274       case CEED_EVAL_CURL:
275         break; // Not implimented
276       }
277     }
278 
279     // Output pointers
280     for (CeedInt i=0; i<numoutputfields; i++) {
281       CeedEvalMode emode = qf->outputfields[i].emode;
282       if (emode == CEED_EVAL_NONE) {
283         CeedInt ncomp = qf->outputfields[i].ncomp;
284         impl->outdata[i] = &impl->edata[i + numinputfields][e*Q*ncomp];
285       }
286     }
287     // Q function
288     ierr = CeedQFunctionApply(qf, Q*blksize,
289                               (const CeedScalar * const*) impl->indata,
290                               impl->outdata); CeedChk(ierr);
291 
292     // Output basis apply if needed
293     for (CeedInt i=0; i<numoutputfields; i++) {
294       // Get elemsize, emode, ncomp
295       ierr = CeedElemRestrictionGetElementSize(
296                op->outputfields[i].Erestrict, &elemsize); CeedChk(ierr);
297       CeedInt ncomp = qf->outputfields[i].ncomp;
298       CeedEvalMode emode = qf->outputfields[i].emode;
299       // Basis action
300       switch(emode) {
301       case CEED_EVAL_NONE:
302         break; // No action
303       case CEED_EVAL_INTERP:
304         ierr = CeedBasisApply(op->outputfields[i].basis, blksize, CEED_TRANSPOSE,
305                               CEED_EVAL_INTERP, impl->outdata[i],
306                               &impl->edata[i + numinputfields][e*elemsize*ncomp]);
307         CeedChk(ierr);
308         break;
309       case CEED_EVAL_GRAD:
310         ierr = CeedBasisApply(op->outputfields[i].basis, blksize, CEED_TRANSPOSE,
311                               CEED_EVAL_GRAD,
312                               impl->outdata[i], &impl->edata[i + numinputfields][e*elemsize*ncomp]);
313         CeedChk(ierr);
314         break;
315       case CEED_EVAL_WEIGHT: {
316         Ceed ceed;
317         ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr);
318         return CeedError(ceed, 1,
319                          "CEED_EVAL_WEIGHT cannot be an output evaluation mode");
320         break; // Should not occur
321       }
322       case CEED_EVAL_DIV:
323         break; // Not implimented
324       case CEED_EVAL_CURL:
325         break; // Not implimented
326       }
327     }
328   }
329 
330   // Zero lvecs
331   ierr = CeedVectorSetValue(outvec, 0.0); CeedChk(ierr);
332   for (CeedInt i=0; i<numoutputfields; i++)
333     if (op->outputfields[i].vec != CEED_VECTOR_ACTIVE) {
334       ierr = CeedVectorSetValue(op->outputfields[i].vec, 0.0); CeedChk(ierr);
335     }
336 
337   // Output restriction
338   for (CeedInt i=0; i<numoutputfields; i++) {
339     // Restore evec
340     ierr = CeedVectorRestoreArray(impl->evecs[i+impl->numein],
341                                   &impl->edata[i + numinputfields]); CeedChk(ierr);
342     // Active
343     if (op->outputfields[i].vec == CEED_VECTOR_ACTIVE) {
344       // Restrict
345       ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE,
346                                       lmode, impl->evecs[i+impl->numein], outvec, request); CeedChk(ierr);
347     } else {
348       // Passive
349       // Restrict
350       ierr = CeedElemRestrictionApply(impl->blkrestr[i+impl->numein], CEED_TRANSPOSE,
351                                       lmode, impl->evecs[i+impl->numein], op->outputfields[i].vec,
352                                       request); CeedChk(ierr);
353     }
354   }
355 
356   // Restore input arrays
357   for (CeedInt i=0; i<numinputfields; i++) {
358     CeedEvalMode emode = qf->inputfields[i].emode;
359     if (emode == CEED_EVAL_WEIGHT) { // Skip
360     } else {
361       ierr = CeedVectorRestoreArrayRead(impl->evecs[i],
362                                         (const CeedScalar **) &impl->edata[i]); CeedChk(ierr);
363     }
364   }
365 
366   return 0;
367 }
368 
369 int CeedOperatorCreate_Blocked(CeedOperator op) {
370   int ierr;
371   CeedOperator_Blocked *impl;
372 
373   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
374   op->data = impl;
375   op->Destroy = CeedOperatorDestroy_Blocked;
376   op->Apply = CeedOperatorApply_Blocked;
377   return 0;
378 }
379