xref: /libCEED/rust/libceed-sys/c-src/backends/ref/ceed-ref-restriction.c (revision f643baed4eda0a1f36e3d61f29c6ab9103a63892)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include "ceed-ref.h"
18 
19 //------------------------------------------------------------------------------
20 // Core ElemRestriction Apply Code
21 //------------------------------------------------------------------------------
22 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r,
23     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
24     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
25     CeedVector v, CeedRequest *request) {
26   int ierr;
27   CeedElemRestriction_Ref *impl;
28   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
29   const CeedScalar *uu;
30   CeedScalar *vv;
31   CeedInt nelem, elemsize, voffset;
32   ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
33   ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
34   voffset = start*blksize*elemsize*ncomp;
35 
36   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
37   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
38   // Restriction from L-vector to E-vector
39   // Perform: v = r * u
40   if (tmode == CEED_NOTRANSPOSE) {
41     // No offsets provided, Identity Restriction
42     if (!impl->offsets) {
43       bool backendstrides;
44       ierr = CeedElemRestrictionHasBackendStrides(r, &backendstrides);
45       CeedChk(ierr);
46       if (backendstrides) {
47         // CPU backend strides are {1, elemsize, elemsize*ncomp}
48         // This if branch is left separate to allow better inlining
49         for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
50           CeedPragmaSIMD
51           for (CeedInt k = 0; k < ncomp; k++)
52             CeedPragmaSIMD
53             for (CeedInt n = 0; n < elemsize; n++)
54               CeedPragmaSIMD
55               for (CeedInt j = 0; j < blksize; j++)
56                 vv[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset]
57                   = uu[n + k*elemsize +
58                          CeedIntMin(e+j, nelem-1)*elemsize*ncomp];
59       } else {
60         // User provided strides
61         CeedInt strides[3];
62         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr);
63         for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
64           CeedPragmaSIMD
65           for (CeedInt k = 0; k < ncomp; k++)
66             CeedPragmaSIMD
67             for (CeedInt n = 0; n < elemsize; n++)
68               CeedPragmaSIMD
69               for (CeedInt j = 0; j < blksize; j++)
70                 vv[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset]
71                   = uu[n*strides[0] + k*strides[1] +
72                                     CeedIntMin(e+j, nelem-1)*strides[2]];
73       }
74     } else {
75       // Offsets provided, standard or blocked restriction
76       // vv has shape [elemsize, ncomp, nelem], row-major
77       // uu has shape [nnodes, ncomp]
78       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
79         CeedPragmaSIMD
80         for (CeedInt k = 0; k < ncomp; k++)
81           CeedPragmaSIMD
82           for (CeedInt i = 0; i < elemsize*blksize; i++)
83             vv[elemsize*(k*blksize+ncomp*e) + i - voffset]
84               = uu[impl->offsets[i+elemsize*e] + k*compstride];
85     }
86   } else {
87     // Restriction from E-vector to L-vector
88     // Performing v += r^T * u
89     // No offsets provided, Identity Restriction
90     if (!impl->offsets) {
91       bool backendstrides;
92       ierr = CeedElemRestrictionHasBackendStrides(r, &backendstrides);
93       CeedChk(ierr);
94       if (backendstrides) {
95         // CPU backend strides are {1, elemsize, elemsize*ncomp}
96         // This if brach is left separate to allow better inlining
97         for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
98           CeedPragmaSIMD
99           for (CeedInt k = 0; k < ncomp; k++)
100             CeedPragmaSIMD
101             for (CeedInt n = 0; n < elemsize; n++)
102               CeedPragmaSIMD
103               for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++)
104                 vv[n + k*elemsize + (e+j)*elemsize*ncomp]
105                 += uu[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset];
106       } else {
107         // User provided strides
108         CeedInt strides[3];
109         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr);
110         for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
111           CeedPragmaSIMD
112           for (CeedInt k = 0; k < ncomp; k++)
113             CeedPragmaSIMD
114             for (CeedInt n = 0; n < elemsize; n++)
115               CeedPragmaSIMD
116               for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++)
117                 vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]]
118                 += uu[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset];
119       }
120     } else {
121       // Offsets provided, standard or blocked restriction
122       // uu has shape [elemsize, ncomp, nelem]
123       // vv has shape [nnodes, ncomp]
124       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
125         for (CeedInt k = 0; k < ncomp; k++)
126           for (CeedInt i = 0; i < elemsize*blksize; i+=blksize)
127             // Iteration bound set to discard padding elements
128             for (CeedInt j = i; j < i+CeedIntMin(blksize, nelem-e); j++)
129               vv[impl->offsets[j+e*elemsize] + k*compstride]
130               += uu[elemsize*(k*blksize+ncomp*e) + j - voffset];
131     }
132   }
133   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
134   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
135   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
136     *request = NULL;
137   return 0;
138 }
139 
140 //------------------------------------------------------------------------------
141 // ElemRestriction Apply - Common Sizes
142 //------------------------------------------------------------------------------
143 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r,
144     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
145     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
146     CeedVector v, CeedRequest *request) {
147   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, compstride, start, stop,
148          tmode, u, v, request);
149 }
150 
151 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r,
152     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
153     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
154     CeedVector v, CeedRequest *request) {
155   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, tmode,
156          u, v, request);
157 }
158 
159 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r,
160     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
161     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
162     CeedVector v, CeedRequest *request) {
163   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, compstride, start, stop,
164          tmode, u, v, request);
165 }
166 
167 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r,
168     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
169     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
170     CeedVector v, CeedRequest *request) {
171   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, tmode,
172          u, v, request);
173 }
174 
175 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r,
176     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
177     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
178     CeedVector v, CeedRequest *request) {
179   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, compstride, start, stop,
180          tmode, u, v, request);
181 }
182 
183 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r,
184     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
185     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
186     CeedVector v, CeedRequest *request) {
187   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, tmode,
188          u, v, request);
189 }
190 
191 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r,
192     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
193     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
194     CeedVector v, CeedRequest *request) {
195   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, compstride, start, stop,
196          tmode, u, v, request);
197 }
198 
199 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r,
200     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
201     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
202     CeedVector v, CeedRequest *request) {
203   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, tmode,
204          u, v, request);
205 }
206 
207 // LCOV_EXCL_START
208 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r,
209     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
210     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
211     CeedVector v, CeedRequest *request) {
212   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, compstride, start, stop,
213          tmode, u, v, request);
214 }
215 // LCOV_EXCL_STOP
216 
217 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r,
218     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
219     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
220     CeedVector v, CeedRequest *request) {
221   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, tmode,
222          u, v, request);
223 }
224 
225 // LCOV_EXCL_START
226 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r,
227     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
228     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
229     CeedVector v, CeedRequest *request) {
230   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, compstride, start, stop,
231          tmode, u, v, request);
232 }
233 // LCOV_EXCL_STOP
234 
235 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r,
236     const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride,
237     CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u,
238     CeedVector v, CeedRequest *request) {
239   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, tmode,
240          u, v, request);
241 }
242 
243 //------------------------------------------------------------------------------
244 // ElemRestriction Apply
245 //------------------------------------------------------------------------------
246 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
247                                         CeedTransposeMode tmode, CeedVector u,
248                                         CeedVector v, CeedRequest *request) {
249   int ierr;
250   CeedInt numblk, blksize, ncomp, compstride;
251   ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr);
252   ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr);
253   ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
254   ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr);
255   CeedElemRestriction_Ref *impl;
256   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
257 
258   return impl->Apply(r, ncomp, blksize, compstride, 0, numblk, tmode, u, v,
259                      request);
260 }
261 
262 //------------------------------------------------------------------------------
263 // ElemRestriction Apply Block
264 //------------------------------------------------------------------------------
265 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r,
266     CeedInt block, CeedTransposeMode tmode, CeedVector u, CeedVector v,
267     CeedRequest *request) {
268   int ierr;
269   CeedInt blksize, ncomp, compstride;
270   ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr);
271   ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
272   ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr);
273   CeedElemRestriction_Ref *impl;
274   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
275 
276   return impl->Apply(r, ncomp, blksize, compstride, block, block+1, tmode, u, v,
277                      request);
278 }
279 
280 //------------------------------------------------------------------------------
281 // ElemRestriction Get Offsets
282 //------------------------------------------------------------------------------
283 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr,
284     CeedMemType mtype, const CeedInt **offsets) {
285   int ierr;
286   CeedElemRestriction_Ref *impl;
287   ierr = CeedElemRestrictionGetData(rstr, (void *)&impl); CeedChk(ierr);
288   Ceed ceed;
289   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr);
290 
291   if (mtype != CEED_MEM_HOST)
292     // LCOV_EXCL_START
293     return CeedError(ceed, 1, "Can only provide to HOST memory");
294   // LCOV_EXCL_STOP
295 
296   *offsets = impl->offsets;
297   return 0;
298 }
299 
300 //------------------------------------------------------------------------------
301 // ElemRestriction Destroy
302 //------------------------------------------------------------------------------
303 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
304   int ierr;
305   CeedElemRestriction_Ref *impl;
306   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
307 
308   ierr = CeedFree(&impl->offsets_allocated); CeedChk(ierr);
309   ierr = CeedFree(&impl); CeedChk(ierr);
310   return 0;
311 }
312 
313 //------------------------------------------------------------------------------
314 // ElemRestriction Create
315 //------------------------------------------------------------------------------
316 int CeedElemRestrictionCreate_Ref(CeedMemType mtype, CeedCopyMode cmode,
317                                   const CeedInt *offsets,
318                                   CeedElemRestriction r) {
319   int ierr;
320   CeedElemRestriction_Ref *impl;
321   CeedInt nelem, elemsize, numblk, blksize, ncomp, compstride;
322   ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
323   ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
324   ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr);
325   ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr);
326   ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
327   ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr);
328   Ceed ceed;
329   ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
330 
331   if (mtype != CEED_MEM_HOST)
332     // LCOV_EXCL_START
333     return CeedError(ceed, 1, "Only MemType = HOST supported");
334   // LCOV_EXCL_STOP
335   ierr = CeedCalloc(1, &impl); CeedChk(ierr);
336 
337   // Offsets data
338   bool isStrided;
339   ierr = CeedElemRestrictionIsStrided(r, &isStrided); CeedChk(ierr);
340   if (!isStrided) {
341     // Check indices for ref or memcheck backends
342     Ceed parentCeed = ceed, currCeed = NULL;
343     while (parentCeed != currCeed) {
344       currCeed = parentCeed;
345       ierr = CeedGetParent(currCeed, &parentCeed); CeedChk(ierr);
346     }
347     const char *resource;
348     ierr = CeedGetResource(parentCeed, &resource); CeedChk(ierr);
349     if (!strcmp(resource, "/cpu/self/ref/serial")
350         || !strcmp(resource, "/cpu/self/ref/blocked")
351         || !strcmp(resource, "/cpu/self/memcheck/serial")
352         || !strcmp(resource, "/cpu/self/memcheck/blocked")) {
353       CeedInt lsize;
354       ierr = CeedElemRestrictionGetLVectorSize(r, &lsize); CeedChk(ierr);
355 
356       for (CeedInt i = 0; i < nelem*elemsize; i++)
357         if (offsets[i] < 0 || lsize <= offsets[i] + (ncomp - 1) * compstride)
358           // LCOV_EXCL_START
359           return CeedError(ceed, 1, "Restriction offset %d (%d) out of range "
360                            "[0, %d]", i, offsets[i], lsize);
361       // LCOV_EXCL_STOP
362     }
363 
364     // Copy data
365     switch (cmode) {
366     case CEED_COPY_VALUES:
367       ierr = CeedMalloc(nelem*elemsize, &impl->offsets_allocated);
368       CeedChk(ierr);
369       memcpy(impl->offsets_allocated, offsets,
370              nelem * elemsize * sizeof(offsets[0]));
371       impl->offsets = impl->offsets_allocated;
372       break;
373     case CEED_OWN_POINTER:
374       impl->offsets_allocated = (CeedInt *)offsets;
375       impl->offsets = impl->offsets_allocated;
376       break;
377     case CEED_USE_POINTER:
378       impl->offsets = offsets;
379     }
380   }
381 
382   ierr = CeedElemRestrictionSetData(r, (void *)&impl); CeedChk(ierr);
383   CeedInt layout[3] = {1, elemsize, elemsize*ncomp};
384   ierr = CeedElemRestrictionSetELayout(r, layout); CeedChk(ierr);
385   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply",
386                                 CeedElemRestrictionApply_Ref); CeedChk(ierr);
387   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock",
388                                 CeedElemRestrictionApplyBlock_Ref);
389   CeedChk(ierr);
390   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets",
391                                 CeedElemRestrictionGetOffsets_Ref);
392   CeedChk(ierr);
393   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy",
394                                 CeedElemRestrictionDestroy_Ref); CeedChk(ierr);
395 
396   // Set apply function based upon ncomp, blksize, and compstride
397   CeedInt idx = -1;
398   if (blksize < 10)
399     idx = 100*ncomp + 10*blksize + (compstride == 1);
400   switch (idx) {
401   case 110:
402     impl->Apply = CeedElemRestrictionApply_Ref_110;
403     break;
404   case 111:
405     impl->Apply = CeedElemRestrictionApply_Ref_111;
406     break;
407   case 180:
408     impl->Apply = CeedElemRestrictionApply_Ref_180;
409     break;
410   case 181:
411     impl->Apply = CeedElemRestrictionApply_Ref_181;
412     break;
413   case 310:
414     impl->Apply = CeedElemRestrictionApply_Ref_310;
415     break;
416   case 311:
417     impl->Apply = CeedElemRestrictionApply_Ref_311;
418     break;
419   case 380:
420     impl->Apply = CeedElemRestrictionApply_Ref_380;
421     break;
422   case 381:
423     impl->Apply = CeedElemRestrictionApply_Ref_381;
424     break;
425   // LCOV_EXCL_START
426   case 510:
427     impl->Apply = CeedElemRestrictionApply_Ref_510;
428     break;
429   // LCOV_EXCL_STOP
430   case 511:
431     impl->Apply = CeedElemRestrictionApply_Ref_511;
432     break;
433   // LCOV_EXCL_START
434   case 580:
435     impl->Apply = CeedElemRestrictionApply_Ref_580;
436     break;
437   // LCOV_EXCL_STOP
438   case 581:
439     impl->Apply = CeedElemRestrictionApply_Ref_581;
440     break;
441   default:
442     impl->Apply = CeedElemRestrictionApply_Ref_Core;
443     break;
444   }
445 
446   return 0;
447 }
448 //------------------------------------------------------------------------------
449