xref: /libCEED/backends/ref/ceed-ref-restriction.c (revision a61c78d6a6d5ea69db49949746e6dc59b544c365)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <stdbool.h>
20 #include <string.h>
21 #include "ceed-ref.h"
22 
23 //------------------------------------------------------------------------------
24 // Core ElemRestriction Apply Code
25 //------------------------------------------------------------------------------
26 static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r,
27     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
28     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
29     CeedVector v, CeedRequest *request) {
30   int ierr;
31   CeedElemRestriction_Ref *impl;
32   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
33   const CeedScalar *uu;
34   CeedScalar *vv;
35   CeedInt num_elem, elem_size, v_offset;
36   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
37   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
38   v_offset = start*blk_size*elem_size*num_comp;
39 
40   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr);
41   if (t_mode == CEED_TRANSPOSE) {
42     // Sum into for transpose mode, e-vec to l-vec
43     ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr);
44   } else {
45     // Overwrite for notranspose mode, l-vec to e-vec
46     ierr = CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr);
47   }
48   // Restriction from L-vector to E-vector
49   // Perform: v = r * u
50   if (t_mode == CEED_NOTRANSPOSE) {
51     // No offsets provided, Identity Restriction
52     if (!impl->offsets) {
53       bool has_backend_strides;
54       ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides);
55       CeedChkBackend(ierr);
56       if (has_backend_strides) {
57         // CPU backend strides are {1, elem_size, elem_size*num_comp}
58         // This if branch is left separate to allow better inlining
59         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
60           CeedPragmaSIMD
61           for (CeedInt k = 0; k < num_comp; k++)
62             CeedPragmaSIMD
63             for (CeedInt n = 0; n < elem_size; n++)
64               CeedPragmaSIMD
65               for (CeedInt j = 0; j < blk_size; j++)
66                 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]
67                   = uu[n + k*elem_size +
68                          CeedIntMin(e+j, num_elem-1)*elem_size*num_comp];
69       } else {
70         // User provided strides
71         CeedInt strides[3];
72         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
73         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
74           CeedPragmaSIMD
75           for (CeedInt k = 0; k < num_comp; k++)
76             CeedPragmaSIMD
77             for (CeedInt n = 0; n < elem_size; n++)
78               CeedPragmaSIMD
79               for (CeedInt j = 0; j < blk_size; j++)
80                 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]
81                   = uu[n*strides[0] + k*strides[1] +
82                                     CeedIntMin(e+j, num_elem-1)*strides[2]];
83       }
84     } else {
85       // Offsets provided, standard or blocked restriction
86       // vv has shape [elem_size, num_comp, num_elem], row-major
87       // uu has shape [nnodes, num_comp]
88       for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
89         CeedPragmaSIMD
90         for (CeedInt k = 0; k < num_comp; k++)
91           CeedPragmaSIMD
92           for (CeedInt i = 0; i < elem_size*blk_size; i++)
93             vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset]
94               = uu[impl->offsets[i+elem_size*e] + k*comp_stride];
95     }
96   } else {
97     // Restriction from E-vector to L-vector
98     // Performing v += r^T * u
99     // No offsets provided, Identity Restriction
100     if (!impl->offsets) {
101       bool has_backend_strides;
102       ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides);
103       CeedChkBackend(ierr);
104       if (has_backend_strides) {
105         // CPU backend strides are {1, elem_size, elem_size*num_comp}
106         // This if brach is left separate to allow better inlining
107         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
108           CeedPragmaSIMD
109           for (CeedInt k = 0; k < num_comp; k++)
110             CeedPragmaSIMD
111             for (CeedInt n = 0; n < elem_size; n++)
112               CeedPragmaSIMD
113               for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++)
114                 vv[n + k*elem_size + (e+j)*elem_size*num_comp]
115                 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset];
116       } else {
117         // User provided strides
118         CeedInt strides[3];
119         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
120         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
121           CeedPragmaSIMD
122           for (CeedInt k = 0; k < num_comp; k++)
123             CeedPragmaSIMD
124             for (CeedInt n = 0; n < elem_size; n++)
125               CeedPragmaSIMD
126               for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++)
127                 vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]]
128                 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset];
129       }
130     } else {
131       // Offsets provided, standard or blocked restriction
132       // uu has shape [elem_size, num_comp, num_elem]
133       // vv has shape [nnodes, num_comp]
134       for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
135         for (CeedInt k = 0; k < num_comp; k++)
136           for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size)
137             // Iteration bound set to discard padding elements
138             for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++)
139               vv[impl->offsets[j+e*elem_size] + k*comp_stride]
140               += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset];
141     }
142   }
143   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr);
144   ierr = CeedVectorRestoreArray(v, &vv); CeedChkBackend(ierr);
145   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
146     *request = NULL;
147   return CEED_ERROR_SUCCESS;
148 }
149 
150 //------------------------------------------------------------------------------
151 // ElemRestriction Apply - Common Sizes
152 //------------------------------------------------------------------------------
153 static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r,
154     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
155     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
156     CeedVector v, CeedRequest *request) {
157   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop,
158          t_mode, u, v, request);
159 }
160 
161 static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r,
162     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
163     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
164     CeedVector v, CeedRequest *request) {
165   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode,
166          u, v, request);
167 }
168 
169 static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r,
170     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
171     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
172     CeedVector v, CeedRequest *request) {
173   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop,
174          t_mode, u, v, request);
175 }
176 
177 static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r,
178     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
179     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
180     CeedVector v, CeedRequest *request) {
181   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode,
182          u, v, request);
183 }
184 
185 static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r,
186     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
187     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
188     CeedVector v, CeedRequest *request) {
189   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop,
190          t_mode, u, v, request);
191 }
192 
193 static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r,
194     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
195     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
196     CeedVector v, CeedRequest *request) {
197   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode,
198          u, v, request);
199 }
200 
201 static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r,
202     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
203     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
204     CeedVector v, CeedRequest *request) {
205   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop,
206          t_mode, u, v, request);
207 }
208 
209 static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r,
210     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
211     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
212     CeedVector v, CeedRequest *request) {
213   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode,
214          u, v, request);
215 }
216 
217 // LCOV_EXCL_START
218 static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r,
219     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
220     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
221     CeedVector v, CeedRequest *request) {
222   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop,
223          t_mode, u, v, request);
224 }
225 // LCOV_EXCL_STOP
226 
227 static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r,
228     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
229     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
230     CeedVector v, CeedRequest *request) {
231   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode,
232          u, v, request);
233 }
234 
235 // LCOV_EXCL_START
236 static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r,
237     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
238     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
239     CeedVector v, CeedRequest *request) {
240   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop,
241          t_mode, u, v, request);
242 }
243 // LCOV_EXCL_STOP
244 
245 static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r,
246     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
247     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
248     CeedVector v, CeedRequest *request) {
249   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode,
250          u, v, request);
251 }
252 
253 //------------------------------------------------------------------------------
254 // ElemRestriction Apply
255 //------------------------------------------------------------------------------
256 static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
257                                         CeedTransposeMode t_mode, CeedVector u,
258                                         CeedVector v, CeedRequest *request) {
259   int ierr;
260   CeedInt num_blk, blk_size, num_comp, comp_stride;
261   ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr);
262   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
263   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
264   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
265   CeedElemRestriction_Ref *impl;
266   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
267 
268   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v,
269                      request);
270 }
271 
272 //------------------------------------------------------------------------------
273 // ElemRestriction Apply Block
274 //------------------------------------------------------------------------------
275 static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r,
276     CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
277     CeedRequest *request) {
278   int ierr;
279   CeedInt blk_size, num_comp, comp_stride;
280   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
281   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
282   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
283   CeedElemRestriction_Ref *impl;
284   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
285 
286   return impl->Apply(r, num_comp, blk_size, comp_stride, block, block+1, t_mode,
287                      u, v, request);
288 }
289 
290 //------------------------------------------------------------------------------
291 // ElemRestriction Get Offsets
292 //------------------------------------------------------------------------------
293 static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr,
294     CeedMemType mem_type, const CeedInt **offsets) {
295   int ierr;
296   CeedElemRestriction_Ref *impl;
297   ierr = CeedElemRestrictionGetData(rstr, &impl); CeedChkBackend(ierr);
298   Ceed ceed;
299   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChkBackend(ierr);
300 
301   if (mem_type != CEED_MEM_HOST)
302     // LCOV_EXCL_START
303     return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
304   // LCOV_EXCL_STOP
305 
306   *offsets = impl->offsets;
307   return CEED_ERROR_SUCCESS;
308 }
309 
310 //------------------------------------------------------------------------------
311 // ElemRestriction Destroy
312 //------------------------------------------------------------------------------
313 static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
314   int ierr;
315   CeedElemRestriction_Ref *impl;
316   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
317 
318   ierr = CeedFree(&impl->offsets_allocated); CeedChkBackend(ierr);
319   ierr = CeedFree(&impl); CeedChkBackend(ierr);
320   return CEED_ERROR_SUCCESS;
321 }
322 
323 //------------------------------------------------------------------------------
324 // ElemRestriction Create
325 //------------------------------------------------------------------------------
326 int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode,
327                                   const CeedInt *offsets,
328                                   CeedElemRestriction r) {
329   int ierr;
330   CeedElemRestriction_Ref *impl;
331   CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride;
332   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
333   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
334   ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr);
335   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
336   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
337   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
338   Ceed ceed;
339   ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
340 
341   if (mem_type != CEED_MEM_HOST)
342     // LCOV_EXCL_START
343     return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
344   // LCOV_EXCL_STOP
345   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
346 
347   // Offsets data
348   bool is_strided;
349   ierr = CeedElemRestrictionIsStrided(r, &is_strided); CeedChkBackend(ierr);
350   if (!is_strided) {
351     // Check indices for ref or memcheck backends
352     Ceed parent_ceed = ceed, curr_ceed = NULL;
353     while (parent_ceed != curr_ceed) {
354       curr_ceed = parent_ceed;
355       ierr = CeedGetParent(curr_ceed, &parent_ceed); CeedChkBackend(ierr);
356     }
357     const char *resource;
358     ierr = CeedGetResource(parent_ceed, &resource); CeedChkBackend(ierr);
359     if (!strcmp(resource, "/cpu/self/ref/serial") ||
360         !strcmp(resource, "/cpu/self/ref/blocked") ||
361         !strcmp(resource, "/cpu/self/memcheck/serial") ||
362         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
363       CeedInt l_size;
364       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
365 
366       for (CeedInt i = 0; i < num_elem*elem_size; i++)
367         if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride)
368           // LCOV_EXCL_START
369           return CeedError(ceed, CEED_ERROR_BACKEND,
370                            "Restriction offset %d (%d) out of range "
371                            "[0, %d]", i, offsets[i], l_size);
372       // LCOV_EXCL_STOP
373     }
374 
375     // Copy data
376     switch (copy_mode) {
377     case CEED_COPY_VALUES:
378       ierr = CeedMalloc(num_elem*elem_size, &impl->offsets_allocated);
379       CeedChkBackend(ierr);
380       memcpy(impl->offsets_allocated, offsets,
381              num_elem * elem_size * sizeof(offsets[0]));
382       impl->offsets = impl->offsets_allocated;
383       break;
384     case CEED_OWN_POINTER:
385       impl->offsets_allocated = (CeedInt *)offsets;
386       impl->offsets = impl->offsets_allocated;
387       break;
388     case CEED_USE_POINTER:
389       impl->offsets = offsets;
390     }
391   }
392 
393   ierr = CeedElemRestrictionSetData(r, impl); CeedChkBackend(ierr);
394   CeedInt layout[3] = {1, elem_size, elem_size*num_comp};
395   ierr = CeedElemRestrictionSetELayout(r, layout); CeedChkBackend(ierr);
396   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply",
397                                 CeedElemRestrictionApply_Ref); CeedChkBackend(ierr);
398   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock",
399                                 CeedElemRestrictionApplyBlock_Ref);
400   CeedChkBackend(ierr);
401   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets",
402                                 CeedElemRestrictionGetOffsets_Ref);
403   CeedChkBackend(ierr);
404   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy",
405                                 CeedElemRestrictionDestroy_Ref); CeedChkBackend(ierr);
406 
407   // Set apply function based upon num_comp, blk_size, and comp_stride
408   CeedInt idx = -1;
409   if (blk_size < 10)
410     idx = 100*num_comp + 10*blk_size + (comp_stride == 1);
411   switch (idx) {
412   case 110:
413     impl->Apply = CeedElemRestrictionApply_Ref_110;
414     break;
415   case 111:
416     impl->Apply = CeedElemRestrictionApply_Ref_111;
417     break;
418   case 180:
419     impl->Apply = CeedElemRestrictionApply_Ref_180;
420     break;
421   case 181:
422     impl->Apply = CeedElemRestrictionApply_Ref_181;
423     break;
424   case 310:
425     impl->Apply = CeedElemRestrictionApply_Ref_310;
426     break;
427   case 311:
428     impl->Apply = CeedElemRestrictionApply_Ref_311;
429     break;
430   case 380:
431     impl->Apply = CeedElemRestrictionApply_Ref_380;
432     break;
433   case 381:
434     impl->Apply = CeedElemRestrictionApply_Ref_381;
435     break;
436   // LCOV_EXCL_START
437   case 510:
438     impl->Apply = CeedElemRestrictionApply_Ref_510;
439     break;
440   // LCOV_EXCL_STOP
441   case 511:
442     impl->Apply = CeedElemRestrictionApply_Ref_511;
443     break;
444   // LCOV_EXCL_START
445   case 580:
446     impl->Apply = CeedElemRestrictionApply_Ref_580;
447     break;
448   // LCOV_EXCL_STOP
449   case 581:
450     impl->Apply = CeedElemRestrictionApply_Ref_581;
451     break;
452   default:
453     impl->Apply = CeedElemRestrictionApply_Ref_Core;
454     break;
455   }
456 
457   return CEED_ERROR_SUCCESS;
458 }
459 //------------------------------------------------------------------------------
460