xref: /libCEED/backends/ref/ceed-ref-restriction.c (revision 5c7b696c8582f667cb0fcf7d02b9ef3156803fee)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-ref.h"
18 
19 //------------------------------------------------------------------------------
20 // Core ElemRestriction Apply Code
21 //------------------------------------------------------------------------------
22 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r,
23     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
24     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
25     CeedVector v, CeedRequest *request) {
26   int ierr;
27   CeedElemRestriction_Ref *impl;
28   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
29   const CeedScalar *uu;
30   CeedScalar *vv;
31   CeedInt nelem, elemsize, voffset;
32   ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
33   ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
34   voffset = start*blksize*elemsize*ncomp;
35 
36   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
37   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
38   // Restriction from L-vector to E-vector
39   // Perform: v = r * u
40   if (tmode == CEED_NOTRANSPOSE) {
41     // No offsets provided, Identity Restriction
42     if (!impl->offsets) {
43       bool backendstrides;
44       ierr = CeedElemRestrictionGetBackendStridesStatus(r, &backendstrides);
45       CeedChk(ierr);
46       if (backendstrides) {
47         // CPU backend strides are {1, elemsize, elemsize*ncomp}
48         // This if branch is left separate to allow better inlining
49         for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
50           CeedPragmaSIMD
51           for (CeedInt k = 0; k < ncomp; k++)
52             CeedPragmaSIMD
53             for (CeedInt n = 0; n < elemsize; n++)
54               CeedPragmaSIMD
55               for (CeedInt j = 0; j < blksize; j++)
56                 vv[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset]
57                   = uu[n + k*elemsize +
58                          CeedIntMin(e+j, nelem-1)*elemsize*ncomp];
59       } else {
60         // User provided strides
61         CeedInt strides[3];
62         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr);
63         for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
64           CeedPragmaSIMD
65           for (CeedInt k = 0; k < ncomp; k++)
66             CeedPragmaSIMD
67             for (CeedInt n = 0; n < elemsize; n++)
68               CeedPragmaSIMD
69               for (CeedInt j = 0; j < blksize; j++)
70                 vv[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset]
71                   = uu[n*strides[0] + k*strides[1] +
72                                     CeedIntMin(e+j, nelem-1)*strides[2]];
73       }
74     } else {
75       // Offsets provided, standard or blocked restriction
76       // vv has shape [elemsize, ncomp, nelem], row-major
77       // uu has shape [nnodes, ncomp]
78       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
79         CeedPragmaSIMD
80         for (CeedInt k = 0; k < ncomp; k++)
81           CeedPragmaSIMD
82           for (CeedInt i = 0; i < elemsize*blksize; i++)
83             vv[elemsize*(k*blksize+ncomp*e) + i - voffset]
84               = uu[impl->offsets[i+elemsize*e] + k*compstride];
85     }
86   } else {
87     // Restriction from E-vector to L-vector
88     // Performing v += r^T * u
89     // No offsets provided, Identity Restriction
90     if (!impl->offsets) {
91       bool backendstrides;
92       ierr = CeedElemRestrictionGetBackendStridesStatus(r, &backendstrides);
93       CeedChk(ierr);
94       if (backendstrides) {
95         // CPU backend strides are {1, elemsize, elemsize*ncomp}
96         // This if brach is left separate to allow better inlining
97         for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
98           CeedPragmaSIMD
99           for (CeedInt k = 0; k < ncomp; k++)
100             CeedPragmaSIMD
101             for (CeedInt n = 0; n < elemsize; n++)
102               CeedPragmaSIMD
103               for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++)
104                 vv[n + k*elemsize + (e+j)*elemsize*ncomp]
105                 += uu[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset];
106       } else {
107         // User provided strides
108         CeedInt strides[3];
109         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr);
110         for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
111           CeedPragmaSIMD
112           for (CeedInt k = 0; k < ncomp; k++)
113             CeedPragmaSIMD
114             for (CeedInt n = 0; n < elemsize; n++)
115               CeedPragmaSIMD
116               for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++)
117                 vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]]
118                 += uu[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset];
119       }
120     } else {
121       // Offsets provided, standard or blocked restriction
122       // uu has shape [elemsize, ncomp, nelem]
123       // vv has shape [nnodes, ncomp]
124       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
125         for (CeedInt k = 0; k < ncomp; k++)
126           for (CeedInt i = 0; i < elemsize*blksize; i+=blksize)
127             // Iteration bound set to discard padding elements
128             for (CeedInt j = i; j < i+CeedIntMin(blksize, nelem-e); j++)
129               vv[impl->offsets[j+e*elemsize] + k*compstride]
130               += uu[elemsize*(k*blksize+ncomp*e) + j - voffset];
131     }
132   }
133   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
134   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
135   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
136     *request = NULL;
137   return 0;
138 }
139 
140 //------------------------------------------------------------------------------
141 // ElemRestriction Apply - Common Sizes
142 //------------------------------------------------------------------------------
143 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r,
144     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
145     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
146     CeedVector v, CeedRequest *request) {
147   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, compstride, start, stop,
148          tmode, u, v, request);
149 }
150 
151 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r,
152     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
153     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
154     CeedVector v, CeedRequest *request) {
155   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, tmode,
156          u, v, request);
157 }
158 
159 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r,
160     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
161     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
162     CeedVector v, CeedRequest *request) {
163   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, compstride, start, stop,
164          tmode, u, v, request);
165 }
166 
167 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r,
168     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
169     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
170     CeedVector v, CeedRequest *request) {
171   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, tmode,
172          u, v, request);
173 }
174 
175 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r,
176     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
177     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
178     CeedVector v, CeedRequest *request) {
179   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, compstride, start, stop,
180          tmode, u, v, request);
181 }
182 
183 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r,
184     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
185     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
186     CeedVector v, CeedRequest *request) {
187   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, tmode,
188          u, v, request);
189 }
190 
191 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r,
192     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
193     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
194     CeedVector v, CeedRequest *request) {
195   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, compstride, start, stop,
196          tmode, u, v, request);
197 }
198 
199 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r,
200     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
201     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
202     CeedVector v, CeedRequest *request) {
203   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, tmode,
204          u, v, request);
205 }
206 
207 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r,
208     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
209     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
210     CeedVector v, CeedRequest *request) {
211   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, compstride, start, stop,
212          tmode, u, v, request);
213 }
214 
215 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r,
216     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
217     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
218     CeedVector v, CeedRequest *request) {
219   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, tmode,
220          u, v, request);
221 }
222 
223 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r,
224     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
225     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
226     CeedVector v, CeedRequest *request) {
227   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, compstride, start, stop,
228          tmode, u, v, request);
229 }
230 
231 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r,
232     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
233     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
234     CeedVector v, CeedRequest *request) {
235   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, tmode,
236          u, v, request);
237 }
238 
239 //------------------------------------------------------------------------------
240 // ElemRestriction Apply
241 //------------------------------------------------------------------------------
242 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
243                                         CeedTransposeMode tmode, CeedVector u,
244                                         CeedVector v, CeedRequest *request) {
245   int ierr;
246   CeedInt numblk, blksize, ncomp, compstride;
247   ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr);
248   ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr);
249   ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
250   ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr);
251   CeedElemRestriction_Ref *impl;
252   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
253 
254   return impl->Apply(r, ncomp, blksize, compstride, 0, numblk, tmode, u, v,
255                      request);
256 }
257 
258 //------------------------------------------------------------------------------
259 // ElemRestriction Apply Block
260 //------------------------------------------------------------------------------
261 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r,
262     CeedInt block, CeedTransposeMode tmode, CeedVector u, CeedVector v,
263     CeedRequest *request) {
264   int ierr;
265   CeedInt blksize, ncomp, compstride;
266   ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr);
267   ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
268   ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr);
269   CeedElemRestriction_Ref *impl;
270   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
271 
272   return impl->Apply(r, ncomp, blksize, compstride, block, block+1, tmode, u, v,
273                      request);
274 }
275 
276 //------------------------------------------------------------------------------
277 // ElemRestriction Destroy
278 //------------------------------------------------------------------------------
279 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
280   int ierr;
281   CeedElemRestriction_Ref *impl;
282   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
283 
284   ierr = CeedFree(&impl->offsets_allocated); CeedChk(ierr);
285   ierr = CeedFree(&impl); CeedChk(ierr);
286   return 0;
287 }
288 
289 //------------------------------------------------------------------------------
290 // ElemRestriction Create
291 //------------------------------------------------------------------------------
292 int CeedElemRestrictionCreate_Ref(CeedMemType mtype, CeedCopyMode cmode,
293                                   const CeedInt *offsets,
294                                   CeedElemRestriction r) {
295   int ierr;
296   CeedElemRestriction_Ref *impl;
297   CeedInt nelem, elemsize, numblk, blksize, ncomp, compstride;
298   ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
299   ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
300   ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr);
301   ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr);
302   ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
303   ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr);
304   Ceed ceed;
305   ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
306 
307   if (mtype != CEED_MEM_HOST)
308     // LCOV_EXCL_START
309     return CeedError(ceed, 1, "Only MemType = HOST supported");
310   // LCOV_EXCL_STOP
311   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
312   switch (cmode) {
313   case CEED_COPY_VALUES:
314     ierr = CeedMalloc(nelem*elemsize, &impl->offsets_allocated);
315     CeedChk(ierr);
316     memcpy(impl->offsets_allocated, offsets,
317            nelem * elemsize * sizeof(offsets[0]));
318     impl->offsets = impl->offsets_allocated;
319     break;
320   case CEED_OWN_POINTER:
321     impl->offsets_allocated = (CeedInt *)offsets;
322     impl->offsets = impl->offsets_allocated;
323     break;
324   case CEED_USE_POINTER:
325     impl->offsets = offsets;
326   }
327 
328   ierr = CeedElemRestrictionSetData(r, (void *)&impl); CeedChk(ierr);
329   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply",
330                                 CeedElemRestrictionApply_Ref); CeedChk(ierr);
331   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock",
332                                 CeedElemRestrictionApplyBlock_Ref);
333   CeedChk(ierr);
334   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy",
335                                 CeedElemRestrictionDestroy_Ref); CeedChk(ierr);
336 
337   // Set apply function based upon ncomp, blksize, and compstride
338   CeedInt idx = -1;
339   if (blksize < 10)
340     idx = 100*ncomp + 10*blksize + (compstride == 1);
341   switch (idx) {
342   case 110:
343     impl->Apply = CeedElemRestrictionApply_Ref_110;
344     break;
345   case 111:
346     impl->Apply = CeedElemRestrictionApply_Ref_111;
347     break;
348   case 180:
349     impl->Apply = CeedElemRestrictionApply_Ref_180;
350     break;
351   case 181:
352     impl->Apply = CeedElemRestrictionApply_Ref_181;
353     break;
354   case 310:
355     impl->Apply = CeedElemRestrictionApply_Ref_310;
356     break;
357   case 311:
358     impl->Apply = CeedElemRestrictionApply_Ref_311;
359     break;
360   case 380:
361     impl->Apply = CeedElemRestrictionApply_Ref_380;
362     break;
363   case 381:
364     impl->Apply = CeedElemRestrictionApply_Ref_381;
365     break;
366   case 510:
367     impl->Apply = CeedElemRestrictionApply_Ref_510;
368     break;
369   case 511:
370     impl->Apply = CeedElemRestrictionApply_Ref_511;
371     break;
372   case 580:
373     impl->Apply = CeedElemRestrictionApply_Ref_580;
374     break;
375   case 581:
376     impl->Apply = CeedElemRestrictionApply_Ref_581;
377     break;
378   default:
379     impl->Apply = CeedElemRestrictionApply_Ref_Core;
380     break;
381   }
382 
383   return 0;
384 }
385 //------------------------------------------------------------------------------
386 
387