xref: /libCEED/backends/ref/ceed-ref-restriction.c (revision 68d8d92875d57d0802c0a7b51fab6e206a21432c)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-ref.h"
18 
19 //------------------------------------------------------------------------------
20 // Core ElemRestriction Apply Code
21 //------------------------------------------------------------------------------
22 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r,
23     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
24     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
25     CeedVector v, CeedRequest *request) {
26   int ierr;
27   CeedElemRestriction_Ref *impl;
28   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
29   const CeedScalar *uu;
30   CeedScalar *vv;
31   CeedInt nelem, elemsize, voffset;
32   ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
33   ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
34   voffset = start*blksize*elemsize*ncomp;
35 
36   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
37   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
38   // Restriction from L-vector to E-vector
39   // Perform: v = r * u
40   if (tmode == CEED_NOTRANSPOSE) {
41     // No offsets provided, Identity Restriction
42     if (!impl->offsets) {
43       bool backendstrides;
44       ierr = CeedElemRestrictionGetBackendStridesStatus(r, &backendstrides);
45       CeedChk(ierr);
46       if (backendstrides) {
47         // CPU backend strides are {1, elemsize, elemsize*ncomp}
48         // This if branch is left separate to allow better inlining
49         for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
50           CeedPragmaSIMD
51           for (CeedInt k = 0; k < ncomp; k++)
52             CeedPragmaSIMD
53             for (CeedInt n = 0; n < elemsize; n++)
54               CeedPragmaSIMD
55               for (CeedInt j = 0; j < blksize; j++)
56                 vv[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset]
57                   = uu[n + k*elemsize +
58                          CeedIntMin(e+j, nelem-1)*elemsize*ncomp];
59       } else {
60         // User provided strides
61         CeedInt strides[3];
62         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr);
63         for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
64           CeedPragmaSIMD
65           for (CeedInt k = 0; k < ncomp; k++)
66             CeedPragmaSIMD
67             for (CeedInt n = 0; n < elemsize; n++)
68               CeedPragmaSIMD
69               for (CeedInt j = 0; j < blksize; j++)
70                 vv[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset]
71                   = uu[n*strides[0] + k*strides[1] +
72                                     CeedIntMin(e+j, nelem-1)*strides[2]];
73       }
74     } else {
75       // Offsets provided, standard or blocked restriction
76       // vv has shape [elemsize, ncomp, nelem], row-major
77       // uu has shape [nnodes, ncomp]
78       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
79         CeedPragmaSIMD
80         for (CeedInt k = 0; k < ncomp; k++)
81           CeedPragmaSIMD
82           for (CeedInt i = 0; i < elemsize*blksize; i++)
83             vv[elemsize*(k*blksize+ncomp*e) + i - voffset]
84               = uu[impl->offsets[i+elemsize*e] + k*compstride];
85     }
86   } else {
87     // Restriction from E-vector to L-vector
88     // Performing v += r^T * u
89     // No offsets provided, Identity Restriction
90     if (!impl->offsets) {
91       bool backendstrides;
92       ierr = CeedElemRestrictionGetBackendStridesStatus(r, &backendstrides);
93       CeedChk(ierr);
94       if (backendstrides) {
95         // CPU backend strides are {1, elemsize, elemsize*ncomp}
96         // This if brach is left separate to allow better inlining
97         for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
98           CeedPragmaSIMD
99           for (CeedInt k = 0; k < ncomp; k++)
100             CeedPragmaSIMD
101             for (CeedInt n = 0; n < elemsize; n++)
102               CeedPragmaSIMD
103               for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++)
104                 vv[n + k*elemsize + (e+j)*elemsize*ncomp]
105                 += uu[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset];
106       } else {
107         // User provided strides
108         CeedInt strides[3];
109         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr);
110         for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
111           CeedPragmaSIMD
112           for (CeedInt k = 0; k < ncomp; k++)
113             CeedPragmaSIMD
114             for (CeedInt n = 0; n < elemsize; n++)
115               CeedPragmaSIMD
116               for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++)
117                 vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]]
118                 += uu[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset];
119       }
120     } else {
121       // Offsets provided, standard or blocked restriction
122       // uu has shape [elemsize, ncomp, nelem]
123       // vv has shape [nnodes, ncomp]
124       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
125         for (CeedInt k = 0; k < ncomp; k++)
126           for (CeedInt i = 0; i < elemsize*blksize; i+=blksize)
127             // Iteration bound set to discard padding elements
128             for (CeedInt j = i; j < i+CeedIntMin(blksize, nelem-e); j++)
129               vv[impl->offsets[j+e*elemsize] + k*compstride]
130               += uu[elemsize*(k*blksize+ncomp*e) + j - voffset];
131     }
132   }
133   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
134   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
135   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
136     *request = NULL;
137   return 0;
138 }
139 
140 //------------------------------------------------------------------------------
141 // ElemRestriction Apply - Common Sizes
142 //------------------------------------------------------------------------------
143 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r,
144     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
145     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
146     CeedVector v, CeedRequest *request) {
147   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, compstride, start, stop,
148          tmode, u, v, request);
149 }
150 
151 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r,
152     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
153     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
154     CeedVector v, CeedRequest *request) {
155   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, tmode,
156          u, v, request);
157 }
158 
159 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r,
160     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
161     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
162     CeedVector v, CeedRequest *request) {
163   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, compstride, start, stop,
164          tmode, u, v, request);
165 }
166 
167 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r,
168     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
169     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
170     CeedVector v, CeedRequest *request) {
171   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, tmode,
172          u, v, request);
173 }
174 
175 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r,
176     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
177     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
178     CeedVector v, CeedRequest *request) {
179   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, compstride, start, stop,
180          tmode, u, v, request);
181 }
182 
183 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r,
184     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
185     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
186     CeedVector v, CeedRequest *request) {
187   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, tmode,
188          u, v, request);
189 }
190 
191 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r,
192     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
193     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
194     CeedVector v, CeedRequest *request) {
195   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, compstride, start, stop,
196          tmode, u, v, request);
197 }
198 
199 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r,
200     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
201     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
202     CeedVector v, CeedRequest *request) {
203   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, tmode,
204          u, v, request);
205 }
206 
207 // LCOV_EXCL_START
208 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r,
209     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
210     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
211     CeedVector v, CeedRequest *request) {
212   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, compstride, start, stop,
213          tmode, u, v, request);
214 }
215 // LCOV_EXCL_STOP
216 
217 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r,
218     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
219     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
220     CeedVector v, CeedRequest *request) {
221   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, tmode,
222          u, v, request);
223 }
224 
225 // LCOV_EXCL_START
226 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r,
227     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
228     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
229     CeedVector v, CeedRequest *request) {
230   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, compstride, start, stop,
231          tmode, u, v, request);
232 }
233 // LCOV_EXCL_STOP
234 
235 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r,
236     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
237     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
238     CeedVector v, CeedRequest *request) {
239   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, tmode,
240          u, v, request);
241 }
242 
243 //------------------------------------------------------------------------------
244 // ElemRestriction Apply
245 //------------------------------------------------------------------------------
246 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
247                                         CeedTransposeMode tmode, CeedVector u,
248                                         CeedVector v, CeedRequest *request) {
249   int ierr;
250   CeedInt numblk, blksize, ncomp, compstride;
251   ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr);
252   ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr);
253   ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
254   ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr);
255   CeedElemRestriction_Ref *impl;
256   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
257 
258   return impl->Apply(r, ncomp, blksize, compstride, 0, numblk, tmode, u, v,
259                      request);
260 }
261 
262 //------------------------------------------------------------------------------
263 // ElemRestriction Apply Block
264 //------------------------------------------------------------------------------
265 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r,
266     CeedInt block, CeedTransposeMode tmode, CeedVector u, CeedVector v,
267     CeedRequest *request) {
268   int ierr;
269   CeedInt blksize, ncomp, compstride;
270   ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr);
271   ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
272   ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr);
273   CeedElemRestriction_Ref *impl;
274   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
275 
276   return impl->Apply(r, ncomp, blksize, compstride, block, block+1, tmode, u, v,
277                      request);
278 }
279 
280 //------------------------------------------------------------------------------
281 // ElemRestriction Destroy
282 //------------------------------------------------------------------------------
283 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
284   int ierr;
285   CeedElemRestriction_Ref *impl;
286   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
287 
288   ierr = CeedFree(&impl->offsets_allocated); CeedChk(ierr);
289   ierr = CeedFree(&impl); CeedChk(ierr);
290   return 0;
291 }
292 
293 //------------------------------------------------------------------------------
294 // ElemRestriction Create
295 //------------------------------------------------------------------------------
296 int CeedElemRestrictionCreate_Ref(CeedMemType mtype, CeedCopyMode cmode,
297                                   const CeedInt *offsets,
298                                   CeedElemRestriction r) {
299   int ierr;
300   CeedElemRestriction_Ref *impl;
301   CeedInt nelem, elemsize, numblk, blksize, ncomp, compstride;
302   ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
303   ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
304   ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr);
305   ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr);
306   ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
307   ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr);
308   Ceed ceed;
309   ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
310 
311   if (mtype != CEED_MEM_HOST)
312     // LCOV_EXCL_START
313     return CeedError(ceed, 1, "Only MemType = HOST supported");
314   // LCOV_EXCL_STOP
315   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
316 
317   // Check indices for ref or memcheck backends
318   if (offsets) {
319     Ceed parentCeed = ceed, currCeed = NULL;
320     while (parentCeed != currCeed) {
321       currCeed = parentCeed;
322       ierr = CeedGetParent(currCeed, &parentCeed); CeedChk(ierr);
323     }
324     const char *resource;
325     ierr = CeedGetResource(parentCeed, &resource); CeedChk(ierr);
326     if (!strcmp(resource, "/cpu/self/ref/serial")
327         || !strcmp(resource, "/cpu/self/ref/blocked")
328         || !strcmp(resource, "/cpu/self/memcheck/serial")
329         || !strcmp(resource, "/cpu/self/memcheck/blocked")) {
330       CeedInt lsize;
331       ierr = CeedElemRestrictionGetLVectorSize(r, &lsize); CeedChk(ierr);
332 
333       for (CeedInt i = 0; i < nelem*elemsize; i++)
334         if (offsets[i] < 0 || lsize <= offsets[i] + (ncomp - 1) * compstride)
335           // LCOV_EXCL_START
336           return CeedError(ceed, 1, "Restriction offset %d (%d) out of range "
337                            "[0, %d]", i, offsets[i], lsize);
338       // LCOV_EXCL_STOP
339     }
340   }
341 
342   // Offsets data
343   switch (cmode) {
344   case CEED_COPY_VALUES:
345     ierr = CeedMalloc(nelem*elemsize, &impl->offsets_allocated);
346     CeedChk(ierr);
347     memcpy(impl->offsets_allocated, offsets,
348            nelem * elemsize * sizeof(offsets[0]));
349     impl->offsets = impl->offsets_allocated;
350     break;
351   case CEED_OWN_POINTER:
352     impl->offsets_allocated = (CeedInt *)offsets;
353     impl->offsets = impl->offsets_allocated;
354     break;
355   case CEED_USE_POINTER:
356     impl->offsets = offsets;
357   }
358 
359   ierr = CeedElemRestrictionSetData(r, (void *)&impl); CeedChk(ierr);
360   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply",
361                                 CeedElemRestrictionApply_Ref); CeedChk(ierr);
362   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock",
363                                 CeedElemRestrictionApplyBlock_Ref);
364   CeedChk(ierr);
365   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy",
366                                 CeedElemRestrictionDestroy_Ref); CeedChk(ierr);
367 
368   // Set apply function based upon ncomp, blksize, and compstride
369   CeedInt idx = -1;
370   if (blksize < 10)
371     idx = 100*ncomp + 10*blksize + (compstride == 1);
372   switch (idx) {
373   case 110:
374     impl->Apply = CeedElemRestrictionApply_Ref_110;
375     break;
376   case 111:
377     impl->Apply = CeedElemRestrictionApply_Ref_111;
378     break;
379   case 180:
380     impl->Apply = CeedElemRestrictionApply_Ref_180;
381     break;
382   case 181:
383     impl->Apply = CeedElemRestrictionApply_Ref_181;
384     break;
385   case 310:
386     impl->Apply = CeedElemRestrictionApply_Ref_310;
387     break;
388   case 311:
389     impl->Apply = CeedElemRestrictionApply_Ref_311;
390     break;
391   case 380:
392     impl->Apply = CeedElemRestrictionApply_Ref_380;
393     break;
394   case 381:
395     impl->Apply = CeedElemRestrictionApply_Ref_381;
396     break;
397   // LCOV_EXCL_START
398   case 510:
399     impl->Apply = CeedElemRestrictionApply_Ref_510;
400     break;
401   // LCOV_EXCL_STOP
402   case 511:
403     impl->Apply = CeedElemRestrictionApply_Ref_511;
404     break;
405   // LCOV_EXCL_START
406   case 580:
407     impl->Apply = CeedElemRestrictionApply_Ref_580;
408     break;
409   // LCOV_EXCL_STOP
410   case 581:
411     impl->Apply = CeedElemRestrictionApply_Ref_581;
412     break;
413   default:
414     impl->Apply = CeedElemRestrictionApply_Ref_Core;
415     break;
416   }
417 
418   return 0;
419 }
420 //------------------------------------------------------------------------------
421