xref: /libCEED/backends/ref/ceed-ref-restriction.c (revision 3d8e882215d238700cdceb37404f76ca7fa24eaa)
1*3d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2*3d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
321617c04Sjeremylt //
4*3d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
521617c04Sjeremylt //
6*3d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
721617c04Sjeremylt 
8ec3da8bcSJed Brown #include <ceed/ceed.h>
9ec3da8bcSJed Brown #include <ceed/backend.h>
103d576824SJeremy L Thompson #include <stdbool.h>
113d576824SJeremy L Thompson #include <string.h>
1221617c04Sjeremylt #include "ceed-ref.h"
1321617c04Sjeremylt 
14f10650afSjeremylt //------------------------------------------------------------------------------
15f10650afSjeremylt // Core ElemRestriction Apply Code
16f10650afSjeremylt //------------------------------------------------------------------------------
17be9261b7Sjeremylt static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r,
18d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
19d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
209c36149bSjeremylt     CeedVector v, CeedRequest *request) {
2121617c04Sjeremylt   int ierr;
224ce2993fSjeremylt   CeedElemRestriction_Ref *impl;
23e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
2421617c04Sjeremylt   const CeedScalar *uu;
2521617c04Sjeremylt   CeedScalar *vv;
26d1d35e2fSjeremylt   CeedInt num_elem, elem_size, v_offset;
27d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
28d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
29d1d35e2fSjeremylt   v_offset = start*blk_size*elem_size*num_comp;
3021617c04Sjeremylt 
31b435c5a6Srezgarshakeri   bool is_oriented;
32b435c5a6Srezgarshakeri   ierr = CeedElemRestrictionIsOriented(r, &is_oriented); CeedChkBackend(ierr);
33e15f9bd0SJeremy L Thompson   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr);
349c774eddSJeremy L Thompson   if (t_mode == CEED_TRANSPOSE) {
359c774eddSJeremy L Thompson     // Sum into for transpose mode, e-vec to l-vec
36e15f9bd0SJeremy L Thompson     ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr);
379c774eddSJeremy L Thompson   } else {
389c774eddSJeremy L Thompson     // Overwrite for notranspose mode, l-vec to e-vec
399c774eddSJeremy L Thompson     ierr = CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr);
409c774eddSJeremy L Thompson   }
417f90ec76Sjeremylt   // Restriction from L-vector to E-vector
4221617c04Sjeremylt   // Perform: v = r * u
43d1d35e2fSjeremylt   if (t_mode == CEED_NOTRANSPOSE) {
44d979a051Sjeremylt     // No offsets provided, Identity Restriction
45d979a051Sjeremylt     if (!impl->offsets) {
46d1d35e2fSjeremylt       bool has_backend_strides;
47d1d35e2fSjeremylt       ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides);
48e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
49d1d35e2fSjeremylt       if (has_backend_strides) {
50d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
517f90ec76Sjeremylt         // This if branch is left separate to allow better inlining
52d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
53e1b98f6eSjeremylt           CeedPragmaSIMD
54d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
551d79ecccSjeremylt             CeedPragmaSIMD
56d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
577509a596Sjeremylt               CeedPragmaSIMD
58d1d35e2fSjeremylt               for (CeedInt j = 0; j < blk_size; j++)
59d1d35e2fSjeremylt                 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]
60d1d35e2fSjeremylt                   = uu[n + k*elem_size +
61d1d35e2fSjeremylt                          CeedIntMin(e+j, num_elem-1)*elem_size*num_comp];
627f90ec76Sjeremylt       } else {
637f90ec76Sjeremylt         // User provided strides
647f90ec76Sjeremylt         CeedInt strides[3];
65e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
66d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
677f90ec76Sjeremylt           CeedPragmaSIMD
68d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
697f90ec76Sjeremylt             CeedPragmaSIMD
70d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
717f90ec76Sjeremylt               CeedPragmaSIMD
72d1d35e2fSjeremylt               for (CeedInt j = 0; j < blk_size; j++)
73d1d35e2fSjeremylt                 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]
747509a596Sjeremylt                   = uu[n*strides[0] + k*strides[1] +
75d1d35e2fSjeremylt                                     CeedIntMin(e+j, num_elem-1)*strides[2]];
767509a596Sjeremylt       }
7721617c04Sjeremylt     } else {
78d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
79d1d35e2fSjeremylt       // vv has shape [elem_size, num_comp, num_elem], row-major
80d1d35e2fSjeremylt       // uu has shape [nnodes, num_comp]
81d1d35e2fSjeremylt       for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
82e1b98f6eSjeremylt         CeedPragmaSIMD
83d1d35e2fSjeremylt         for (CeedInt k = 0; k < num_comp; k++)
84e1b98f6eSjeremylt           CeedPragmaSIMD
85d1d35e2fSjeremylt           for (CeedInt i = 0; i < elem_size*blk_size; i++)
86d1d35e2fSjeremylt             vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset]
87fc0567d9Srezgarshakeri               = uu[impl->offsets[i+elem_size*e] + k*comp_stride] *
88000294e3Srezgarshakeri                 (is_oriented && impl->orient[i+elem_size*e] ? -1. : 1.);
89b435c5a6Srezgarshakeri     }
9021617c04Sjeremylt   } else {
917f90ec76Sjeremylt     // Restriction from E-vector to L-vector
928d94b059Sjeremylt     // Performing v += r^T * u
93d979a051Sjeremylt     // No offsets provided, Identity Restriction
94d979a051Sjeremylt     if (!impl->offsets) {
95d1d35e2fSjeremylt       bool has_backend_strides;
96d1d35e2fSjeremylt       ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides);
97e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
98d1d35e2fSjeremylt       if (has_backend_strides) {
99d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
1007f90ec76Sjeremylt         // This if brach is left separate to allow better inlining
101d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
102523b8ea0Sjeremylt           CeedPragmaSIMD
103d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
1047509a596Sjeremylt             CeedPragmaSIMD
105d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
1067f90ec76Sjeremylt               CeedPragmaSIMD
107d1d35e2fSjeremylt               for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++)
108d1d35e2fSjeremylt                 vv[n + k*elem_size + (e+j)*elem_size*num_comp]
109d1d35e2fSjeremylt                 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset];
1107f90ec76Sjeremylt       } else {
1117f90ec76Sjeremylt         // User provided strides
1127f90ec76Sjeremylt         CeedInt strides[3];
113e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
114d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
1157f90ec76Sjeremylt           CeedPragmaSIMD
116d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
1177f90ec76Sjeremylt             CeedPragmaSIMD
118d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
1197f90ec76Sjeremylt               CeedPragmaSIMD
120d1d35e2fSjeremylt               for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++)
1217509a596Sjeremylt                 vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]]
122d1d35e2fSjeremylt                 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset];
123523b8ea0Sjeremylt       }
12421617c04Sjeremylt     } else {
125d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
126d1d35e2fSjeremylt       // uu has shape [elem_size, num_comp, num_elem]
127d1d35e2fSjeremylt       // vv has shape [nnodes, num_comp]
128d1d35e2fSjeremylt       for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
129d1d35e2fSjeremylt         for (CeedInt k = 0; k < num_comp; k++)
130d1d35e2fSjeremylt           for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size)
1318d94b059Sjeremylt             // Iteration bound set to discard padding elements
132d1d35e2fSjeremylt             for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++)
133d1d35e2fSjeremylt               vv[impl->offsets[j+e*elem_size] + k*comp_stride]
134fc0567d9Srezgarshakeri               += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset] *
135000294e3Srezgarshakeri                  (is_oriented && impl->orient[j+e*elem_size] ? -1. : 1.);
13621617c04Sjeremylt     }
137b435c5a6Srezgarshakeri   }
138e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr);
139e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArray(v, &vv); CeedChkBackend(ierr);
14021617c04Sjeremylt   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
14121617c04Sjeremylt     *request = NULL;
142e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
14321617c04Sjeremylt }
14421617c04Sjeremylt 
145f10650afSjeremylt //------------------------------------------------------------------------------
146f10650afSjeremylt // ElemRestriction Apply - Common Sizes
147f10650afSjeremylt //------------------------------------------------------------------------------
148d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r,
149d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
150d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
151d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
152d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop,
153d1d35e2fSjeremylt          t_mode, u, v, request);
154d979a051Sjeremylt }
155d979a051Sjeremylt 
156d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r,
157d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
158d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
159d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
160d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode,
1619c36149bSjeremylt          u, v, request);
1624d2a38eeSjeremylt }
1634d2a38eeSjeremylt 
164d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r,
165d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
166d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
167d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
168d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop,
169d1d35e2fSjeremylt          t_mode, u, v, request);
1709c36149bSjeremylt }
1719c36149bSjeremylt 
172d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r,
173d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
174d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
175d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
176d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode,
1779c36149bSjeremylt          u, v, request);
1789c36149bSjeremylt }
1799c36149bSjeremylt 
180d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r,
181d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
182d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
183d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
184d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop,
185d1d35e2fSjeremylt          t_mode, u, v, request);
186d979a051Sjeremylt }
187d979a051Sjeremylt 
188d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r,
189d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
190d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
191d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
192d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode,
193d979a051Sjeremylt          u, v, request);
194d979a051Sjeremylt }
195d979a051Sjeremylt 
196d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r,
197d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
198d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
199d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
200d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop,
201d1d35e2fSjeremylt          t_mode, u, v, request);
202d979a051Sjeremylt }
203d979a051Sjeremylt 
204d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r,
205d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
206d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
207d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
208d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode,
209d979a051Sjeremylt          u, v, request);
210d979a051Sjeremylt }
211d979a051Sjeremylt 
212bf4d1581Sjeremylt // LCOV_EXCL_START
213d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r,
214d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
215d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
216d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
217d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop,
218d1d35e2fSjeremylt          t_mode, u, v, request);
219d979a051Sjeremylt }
220bf4d1581Sjeremylt // LCOV_EXCL_STOP
221d979a051Sjeremylt 
222d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r,
223d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
224d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
225d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
226d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode,
227d979a051Sjeremylt          u, v, request);
228d979a051Sjeremylt }
229d979a051Sjeremylt 
230bf4d1581Sjeremylt // LCOV_EXCL_START
231d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r,
232d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
233d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
234d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
235d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop,
236d1d35e2fSjeremylt          t_mode, u, v, request);
237d979a051Sjeremylt }
238bf4d1581Sjeremylt // LCOV_EXCL_STOP
239d979a051Sjeremylt 
240d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r,
241d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
242d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
243d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
244d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode,
2459c36149bSjeremylt          u, v, request);
2464d2a38eeSjeremylt }
2474d2a38eeSjeremylt 
248f10650afSjeremylt //------------------------------------------------------------------------------
249f10650afSjeremylt // ElemRestriction Apply
250f10650afSjeremylt //------------------------------------------------------------------------------
251be9261b7Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
252d1d35e2fSjeremylt                                         CeedTransposeMode t_mode, CeedVector u,
253be9261b7Sjeremylt                                         CeedVector v, CeedRequest *request) {
254be9261b7Sjeremylt   int ierr;
255d1d35e2fSjeremylt   CeedInt num_blk, blk_size, num_comp, comp_stride;
256d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr);
257d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
258d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
259d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
2607509a596Sjeremylt   CeedElemRestriction_Ref *impl;
261e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
2624d2a38eeSjeremylt 
263d1d35e2fSjeremylt   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v,
264d979a051Sjeremylt                      request);
2659c36149bSjeremylt }
266be9261b7Sjeremylt 
267f10650afSjeremylt //------------------------------------------------------------------------------
268f10650afSjeremylt // ElemRestriction Apply Block
269f10650afSjeremylt //------------------------------------------------------------------------------
270be9261b7Sjeremylt static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r,
271d1d35e2fSjeremylt     CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
272074cb416Sjeremylt     CeedRequest *request) {
2734d2a38eeSjeremylt   int ierr;
274d1d35e2fSjeremylt   CeedInt blk_size, num_comp, comp_stride;
275d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
276d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
277d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
2787509a596Sjeremylt   CeedElemRestriction_Ref *impl;
279e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
2804d2a38eeSjeremylt 
281d1d35e2fSjeremylt   return impl->Apply(r, num_comp, blk_size, comp_stride, block, block+1, t_mode,
282d1d35e2fSjeremylt                      u, v, request);
2839c36149bSjeremylt }
284be9261b7Sjeremylt 
285f10650afSjeremylt //------------------------------------------------------------------------------
286bd33150aSjeremylt // ElemRestriction Get Offsets
287bd33150aSjeremylt //------------------------------------------------------------------------------
288bd33150aSjeremylt static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr,
289d1d35e2fSjeremylt     CeedMemType mem_type, const CeedInt **offsets) {
290bd33150aSjeremylt   int ierr;
291bd33150aSjeremylt   CeedElemRestriction_Ref *impl;
292e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(rstr, &impl); CeedChkBackend(ierr);
293bd33150aSjeremylt   Ceed ceed;
294e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChkBackend(ierr);
295bd33150aSjeremylt 
296d1d35e2fSjeremylt   if (mem_type != CEED_MEM_HOST)
297bd33150aSjeremylt     // LCOV_EXCL_START
298e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
299bd33150aSjeremylt   // LCOV_EXCL_STOP
300bd33150aSjeremylt 
301bd33150aSjeremylt   *offsets = impl->offsets;
302e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
303bd33150aSjeremylt }
304bd33150aSjeremylt 
305bd33150aSjeremylt //------------------------------------------------------------------------------
306f10650afSjeremylt // ElemRestriction Destroy
307f10650afSjeremylt //------------------------------------------------------------------------------
30821617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
30921617c04Sjeremylt   int ierr;
310fe2413ffSjeremylt   CeedElemRestriction_Ref *impl;
311e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
31221617c04Sjeremylt 
313e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl->offsets_allocated); CeedChkBackend(ierr);
314e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
315e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
31621617c04Sjeremylt }
31721617c04Sjeremylt 
318f10650afSjeremylt //------------------------------------------------------------------------------
319f10650afSjeremylt // ElemRestriction Create
320f10650afSjeremylt //------------------------------------------------------------------------------
321d1d35e2fSjeremylt int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode,
322d979a051Sjeremylt                                   const CeedInt *offsets,
323d979a051Sjeremylt                                   CeedElemRestriction r) {
32421617c04Sjeremylt   int ierr;
32521617c04Sjeremylt   CeedElemRestriction_Ref *impl;
326d1d35e2fSjeremylt   CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride;
327d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
328d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
329d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr);
330d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
331d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
332d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
3334ce2993fSjeremylt   Ceed ceed;
334e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
33521617c04Sjeremylt 
336d1d35e2fSjeremylt   if (mem_type != CEED_MEM_HOST)
337c042f62fSJeremy L Thompson     // LCOV_EXCL_START
338e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
339c042f62fSJeremy L Thompson   // LCOV_EXCL_STOP
340e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
3413661185eSjeremylt 
34292fe105eSJeremy L Thompson   // Offsets data
343d1d35e2fSjeremylt   bool is_strided;
344d1d35e2fSjeremylt   ierr = CeedElemRestrictionIsStrided(r, &is_strided); CeedChkBackend(ierr);
345d1d35e2fSjeremylt   if (!is_strided) {
34692fe105eSJeremy L Thompson     // Check indices for ref or memcheck backends
347d1d35e2fSjeremylt     Ceed parent_ceed = ceed, curr_ceed = NULL;
348d1d35e2fSjeremylt     while (parent_ceed != curr_ceed) {
349d1d35e2fSjeremylt       curr_ceed = parent_ceed;
350d1d35e2fSjeremylt       ierr = CeedGetParent(curr_ceed, &parent_ceed); CeedChkBackend(ierr);
3513661185eSjeremylt     }
3523661185eSjeremylt     const char *resource;
353d1d35e2fSjeremylt     ierr = CeedGetResource(parent_ceed, &resource); CeedChkBackend(ierr);
354d1d35e2fSjeremylt     if (!strcmp(resource, "/cpu/self/ref/serial") ||
355d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/ref/blocked") ||
356d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/memcheck/serial") ||
357d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
358e79b91d9SJeremy L Thompson       CeedSize l_size;
359d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
3603661185eSjeremylt 
361d1d35e2fSjeremylt       for (CeedInt i = 0; i < num_elem*elem_size; i++)
362d1d35e2fSjeremylt         if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride)
3633661185eSjeremylt           // LCOV_EXCL_START
364e15f9bd0SJeremy L Thompson           return CeedError(ceed, CEED_ERROR_BACKEND,
365e15f9bd0SJeremy L Thompson                            "Restriction offset %d (%d) out of range "
366d1d35e2fSjeremylt                            "[0, %d]", i, offsets[i], l_size);
3673661185eSjeremylt       // LCOV_EXCL_STOP
3683661185eSjeremylt     }
3693661185eSjeremylt 
37092fe105eSJeremy L Thompson     // Copy data
371d1d35e2fSjeremylt     switch (copy_mode) {
37221617c04Sjeremylt     case CEED_COPY_VALUES:
373d1d35e2fSjeremylt       ierr = CeedMalloc(num_elem*elem_size, &impl->offsets_allocated);
374e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
375d979a051Sjeremylt       memcpy(impl->offsets_allocated, offsets,
376d1d35e2fSjeremylt              num_elem * elem_size * sizeof(offsets[0]));
377d979a051Sjeremylt       impl->offsets = impl->offsets_allocated;
37821617c04Sjeremylt       break;
37921617c04Sjeremylt     case CEED_OWN_POINTER:
380d979a051Sjeremylt       impl->offsets_allocated = (CeedInt *)offsets;
381d979a051Sjeremylt       impl->offsets = impl->offsets_allocated;
38221617c04Sjeremylt       break;
38321617c04Sjeremylt     case CEED_USE_POINTER:
384d979a051Sjeremylt       impl->offsets = offsets;
38521617c04Sjeremylt     }
38692fe105eSJeremy L Thompson   }
387fe2413ffSjeremylt 
388e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionSetData(r, impl); CeedChkBackend(ierr);
389d1d35e2fSjeremylt   CeedInt layout[3] = {1, elem_size, elem_size*num_comp};
390e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionSetELayout(r, layout); CeedChkBackend(ierr);
391fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply",
392e15f9bd0SJeremy L Thompson                                 CeedElemRestrictionApply_Ref); CeedChkBackend(ierr);
393be9261b7Sjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock",
394be9261b7Sjeremylt                                 CeedElemRestrictionApplyBlock_Ref);
395e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
396bd33150aSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets",
397bd33150aSjeremylt                                 CeedElemRestrictionGetOffsets_Ref);
398e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
399fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy",
400e15f9bd0SJeremy L Thompson                                 CeedElemRestrictionDestroy_Ref); CeedChkBackend(ierr);
401d979a051Sjeremylt 
402d1d35e2fSjeremylt   // Set apply function based upon num_comp, blk_size, and comp_stride
403d979a051Sjeremylt   CeedInt idx = -1;
404d1d35e2fSjeremylt   if (blk_size < 10)
405d1d35e2fSjeremylt     idx = 100*num_comp + 10*blk_size + (comp_stride == 1);
406d979a051Sjeremylt   switch (idx) {
407d979a051Sjeremylt   case 110:
408d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_110;
409d979a051Sjeremylt     break;
410d979a051Sjeremylt   case 111:
411d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_111;
412d979a051Sjeremylt     break;
413d979a051Sjeremylt   case 180:
414d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_180;
415d979a051Sjeremylt     break;
416d979a051Sjeremylt   case 181:
417d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_181;
418d979a051Sjeremylt     break;
419d979a051Sjeremylt   case 310:
420d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_310;
421d979a051Sjeremylt     break;
422d979a051Sjeremylt   case 311:
423d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_311;
424d979a051Sjeremylt     break;
425d979a051Sjeremylt   case 380:
426d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_380;
427d979a051Sjeremylt     break;
428d979a051Sjeremylt   case 381:
429d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_381;
430d979a051Sjeremylt     break;
431bf4d1581Sjeremylt   // LCOV_EXCL_START
432d979a051Sjeremylt   case 510:
433d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_510;
434d979a051Sjeremylt     break;
435bf4d1581Sjeremylt   // LCOV_EXCL_STOP
436d979a051Sjeremylt   case 511:
437d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_511;
438d979a051Sjeremylt     break;
439bf4d1581Sjeremylt   // LCOV_EXCL_START
440d979a051Sjeremylt   case 580:
441d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_580;
442d979a051Sjeremylt     break;
443bf4d1581Sjeremylt   // LCOV_EXCL_STOP
444d979a051Sjeremylt   case 581:
445d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_581;
446d979a051Sjeremylt     break;
447d979a051Sjeremylt   default:
448d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_Core;
449d979a051Sjeremylt     break;
450d979a051Sjeremylt   }
451d979a051Sjeremylt 
452e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
45321617c04Sjeremylt }
454fc0567d9Srezgarshakeri 
455fc0567d9Srezgarshakeri //------------------------------------------------------------------------------
456fc0567d9Srezgarshakeri // ElemRestriction Create Oriented
457fc0567d9Srezgarshakeri //------------------------------------------------------------------------------
458fc0567d9Srezgarshakeri int CeedElemRestrictionCreateOriented_Ref(CeedMemType mem_type,
459fc0567d9Srezgarshakeri     CeedCopyMode copy_mode,
460fc0567d9Srezgarshakeri     const CeedInt *offsets, const bool *orient,
461fc0567d9Srezgarshakeri     CeedElemRestriction r) {
462fc0567d9Srezgarshakeri   int ierr;
463fc0567d9Srezgarshakeri   CeedElemRestriction_Ref *impl;
464fc0567d9Srezgarshakeri   CeedInt num_elem, elem_size;
465fc0567d9Srezgarshakeri   // Set up for normal restriction with explicit offsets. This sets up dispatch to
466fc0567d9Srezgarshakeri   // CeedElemRestrictionApply_Ref_* and manages the impl->offsets array copy/allocation.
467fc0567d9Srezgarshakeri   ierr = CeedElemRestrictionCreate_Ref(mem_type, copy_mode, offsets, r);
468fc0567d9Srezgarshakeri   CeedChkBackend(ierr);
469fc0567d9Srezgarshakeri 
470fc0567d9Srezgarshakeri   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
471fc0567d9Srezgarshakeri   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
472fc0567d9Srezgarshakeri   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
473fc0567d9Srezgarshakeri   switch (copy_mode) {
474fc0567d9Srezgarshakeri   case CEED_COPY_VALUES:
475fc0567d9Srezgarshakeri     ierr = CeedMalloc(num_elem * elem_size, &impl->orient_allocated);
476fc0567d9Srezgarshakeri     CeedChkBackend(ierr);
477fc0567d9Srezgarshakeri     memcpy(impl->orient_allocated, orient,
478fc0567d9Srezgarshakeri            num_elem * elem_size * sizeof(orient[0]));
479fc0567d9Srezgarshakeri     impl->orient = impl->orient_allocated;
480fc0567d9Srezgarshakeri     break;
481fc0567d9Srezgarshakeri   case CEED_OWN_POINTER:
482fc0567d9Srezgarshakeri     impl->orient_allocated = (bool *)orient;
483fc0567d9Srezgarshakeri     impl->orient = impl->orient_allocated;
484fc0567d9Srezgarshakeri     break;
485fc0567d9Srezgarshakeri   case CEED_USE_POINTER:
486fc0567d9Srezgarshakeri     impl->orient = orient;
487fc0567d9Srezgarshakeri   }
488fc0567d9Srezgarshakeri   return CEED_ERROR_SUCCESS;
489fc0567d9Srezgarshakeri }
490f10650afSjeremylt //------------------------------------------------------------------------------
491