xref: /libCEED/rust/libceed-sys/c-src/backends/ref/ceed-ref-restriction.c (revision 9c774eddf8c0b4f5416196d32c5355c9591a7190)
121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
421617c04Sjeremylt //
521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
721617c04Sjeremylt // element discretizations for exascale applications. For more information and
821617c04Sjeremylt // source code availability see http://github.com/ceed.
921617c04Sjeremylt //
1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early
1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
1621617c04Sjeremylt 
17ec3da8bcSJed Brown #include <ceed/ceed.h>
18ec3da8bcSJed Brown #include <ceed/backend.h>
193d576824SJeremy L Thompson #include <stdbool.h>
203d576824SJeremy L Thompson #include <string.h>
2121617c04Sjeremylt #include "ceed-ref.h"
2221617c04Sjeremylt 
23f10650afSjeremylt //------------------------------------------------------------------------------
24f10650afSjeremylt // Core ElemRestriction Apply Code
25f10650afSjeremylt //------------------------------------------------------------------------------
26be9261b7Sjeremylt static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r,
27d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
28d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
299c36149bSjeremylt     CeedVector v, CeedRequest *request) {
3021617c04Sjeremylt   int ierr;
314ce2993fSjeremylt   CeedElemRestriction_Ref *impl;
32e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
3321617c04Sjeremylt   const CeedScalar *uu;
3421617c04Sjeremylt   CeedScalar *vv;
35d1d35e2fSjeremylt   CeedInt num_elem, elem_size, v_offset;
36d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
37d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
38d1d35e2fSjeremylt   v_offset = start*blk_size*elem_size*num_comp;
3921617c04Sjeremylt 
40e15f9bd0SJeremy L Thompson   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr);
41*9c774eddSJeremy L Thompson   if (t_mode == CEED_TRANSPOSE) {
42*9c774eddSJeremy L Thompson     // Sum into for transpose mode, e-vec to l-vec
43e15f9bd0SJeremy L Thompson     ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr);
44*9c774eddSJeremy L Thompson   } else {
45*9c774eddSJeremy L Thompson     // Overwrite for notranspose mode, l-vec to e-vec
46*9c774eddSJeremy L Thompson     ierr = CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr);
47*9c774eddSJeremy L Thompson   }
487f90ec76Sjeremylt   // Restriction from L-vector to E-vector
4921617c04Sjeremylt   // Perform: v = r * u
50d1d35e2fSjeremylt   if (t_mode == CEED_NOTRANSPOSE) {
51d979a051Sjeremylt     // No offsets provided, Identity Restriction
52d979a051Sjeremylt     if (!impl->offsets) {
53d1d35e2fSjeremylt       bool has_backend_strides;
54d1d35e2fSjeremylt       ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides);
55e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
56d1d35e2fSjeremylt       if (has_backend_strides) {
57d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
587f90ec76Sjeremylt         // This if branch is left separate to allow better inlining
59d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
60e1b98f6eSjeremylt           CeedPragmaSIMD
61d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
621d79ecccSjeremylt             CeedPragmaSIMD
63d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
647509a596Sjeremylt               CeedPragmaSIMD
65d1d35e2fSjeremylt               for (CeedInt j = 0; j < blk_size; j++)
66d1d35e2fSjeremylt                 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]
67d1d35e2fSjeremylt                   = uu[n + k*elem_size +
68d1d35e2fSjeremylt                          CeedIntMin(e+j, num_elem-1)*elem_size*num_comp];
697f90ec76Sjeremylt       } else {
707f90ec76Sjeremylt         // User provided strides
717f90ec76Sjeremylt         CeedInt strides[3];
72e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
73d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
747f90ec76Sjeremylt           CeedPragmaSIMD
75d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
767f90ec76Sjeremylt             CeedPragmaSIMD
77d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
787f90ec76Sjeremylt               CeedPragmaSIMD
79d1d35e2fSjeremylt               for (CeedInt j = 0; j < blk_size; j++)
80d1d35e2fSjeremylt                 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]
817509a596Sjeremylt                   = uu[n*strides[0] + k*strides[1] +
82d1d35e2fSjeremylt                                     CeedIntMin(e+j, num_elem-1)*strides[2]];
837509a596Sjeremylt       }
8421617c04Sjeremylt     } else {
85d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
86d1d35e2fSjeremylt       // vv has shape [elem_size, num_comp, num_elem], row-major
87d1d35e2fSjeremylt       // uu has shape [nnodes, num_comp]
88d1d35e2fSjeremylt       for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
89e1b98f6eSjeremylt         CeedPragmaSIMD
90d1d35e2fSjeremylt         for (CeedInt k = 0; k < num_comp; k++)
91e1b98f6eSjeremylt           CeedPragmaSIMD
92d1d35e2fSjeremylt           for (CeedInt i = 0; i < elem_size*blk_size; i++)
93d1d35e2fSjeremylt             vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset]
94d1d35e2fSjeremylt               = uu[impl->offsets[i+elem_size*e] + k*comp_stride];
9521617c04Sjeremylt     }
9621617c04Sjeremylt   } else {
977f90ec76Sjeremylt     // Restriction from E-vector to L-vector
988d94b059Sjeremylt     // Performing v += r^T * u
99d979a051Sjeremylt     // No offsets provided, Identity Restriction
100d979a051Sjeremylt     if (!impl->offsets) {
101d1d35e2fSjeremylt       bool has_backend_strides;
102d1d35e2fSjeremylt       ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides);
103e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
104d1d35e2fSjeremylt       if (has_backend_strides) {
105d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
1067f90ec76Sjeremylt         // This if brach is left separate to allow better inlining
107d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
108523b8ea0Sjeremylt           CeedPragmaSIMD
109d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
1107509a596Sjeremylt             CeedPragmaSIMD
111d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
1127f90ec76Sjeremylt               CeedPragmaSIMD
113d1d35e2fSjeremylt               for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++)
114d1d35e2fSjeremylt                 vv[n + k*elem_size + (e+j)*elem_size*num_comp]
115d1d35e2fSjeremylt                 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset];
1167f90ec76Sjeremylt       } else {
1177f90ec76Sjeremylt         // User provided strides
1187f90ec76Sjeremylt         CeedInt strides[3];
119e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
120d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
1217f90ec76Sjeremylt           CeedPragmaSIMD
122d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
1237f90ec76Sjeremylt             CeedPragmaSIMD
124d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
1257f90ec76Sjeremylt               CeedPragmaSIMD
126d1d35e2fSjeremylt               for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++)
1277509a596Sjeremylt                 vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]]
128d1d35e2fSjeremylt                 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset];
129523b8ea0Sjeremylt       }
13021617c04Sjeremylt     } else {
131d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
132d1d35e2fSjeremylt       // uu has shape [elem_size, num_comp, num_elem]
133d1d35e2fSjeremylt       // vv has shape [nnodes, num_comp]
134d1d35e2fSjeremylt       for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
135d1d35e2fSjeremylt         for (CeedInt k = 0; k < num_comp; k++)
136d1d35e2fSjeremylt           for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size)
1378d94b059Sjeremylt             // Iteration bound set to discard padding elements
138d1d35e2fSjeremylt             for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++)
139d1d35e2fSjeremylt               vv[impl->offsets[j+e*elem_size] + k*comp_stride]
140d1d35e2fSjeremylt               += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset];
14121617c04Sjeremylt     }
14221617c04Sjeremylt   }
143e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr);
144e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArray(v, &vv); CeedChkBackend(ierr);
14521617c04Sjeremylt   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
14621617c04Sjeremylt     *request = NULL;
147e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
14821617c04Sjeremylt }
14921617c04Sjeremylt 
150f10650afSjeremylt //------------------------------------------------------------------------------
151f10650afSjeremylt // ElemRestriction Apply - Common Sizes
152f10650afSjeremylt //------------------------------------------------------------------------------
153d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r,
154d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
155d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
156d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
157d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop,
158d1d35e2fSjeremylt          t_mode, u, v, request);
159d979a051Sjeremylt }
160d979a051Sjeremylt 
161d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r,
162d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
163d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
164d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
165d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode,
1669c36149bSjeremylt          u, v, request);
1674d2a38eeSjeremylt }
1684d2a38eeSjeremylt 
169d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r,
170d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
171d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
172d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
173d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop,
174d1d35e2fSjeremylt          t_mode, u, v, request);
1759c36149bSjeremylt }
1769c36149bSjeremylt 
177d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r,
178d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
179d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
180d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
181d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode,
1829c36149bSjeremylt          u, v, request);
1839c36149bSjeremylt }
1849c36149bSjeremylt 
185d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r,
186d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
187d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
188d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
189d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop,
190d1d35e2fSjeremylt          t_mode, u, v, request);
191d979a051Sjeremylt }
192d979a051Sjeremylt 
193d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r,
194d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
195d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
196d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
197d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode,
198d979a051Sjeremylt          u, v, request);
199d979a051Sjeremylt }
200d979a051Sjeremylt 
201d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r,
202d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
203d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
204d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
205d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop,
206d1d35e2fSjeremylt          t_mode, u, v, request);
207d979a051Sjeremylt }
208d979a051Sjeremylt 
209d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r,
210d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
211d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
212d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
213d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode,
214d979a051Sjeremylt          u, v, request);
215d979a051Sjeremylt }
216d979a051Sjeremylt 
217bf4d1581Sjeremylt // LCOV_EXCL_START
218d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r,
219d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
220d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
221d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
222d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop,
223d1d35e2fSjeremylt          t_mode, u, v, request);
224d979a051Sjeremylt }
225bf4d1581Sjeremylt // LCOV_EXCL_STOP
226d979a051Sjeremylt 
227d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r,
228d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
229d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
230d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
231d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode,
232d979a051Sjeremylt          u, v, request);
233d979a051Sjeremylt }
234d979a051Sjeremylt 
235bf4d1581Sjeremylt // LCOV_EXCL_START
236d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r,
237d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
238d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
239d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
240d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop,
241d1d35e2fSjeremylt          t_mode, u, v, request);
242d979a051Sjeremylt }
243bf4d1581Sjeremylt // LCOV_EXCL_STOP
244d979a051Sjeremylt 
245d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r,
246d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
247d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
248d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
249d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode,
2509c36149bSjeremylt          u, v, request);
2514d2a38eeSjeremylt }
2524d2a38eeSjeremylt 
253f10650afSjeremylt //------------------------------------------------------------------------------
254f10650afSjeremylt // ElemRestriction Apply
255f10650afSjeremylt //------------------------------------------------------------------------------
256be9261b7Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
257d1d35e2fSjeremylt                                         CeedTransposeMode t_mode, CeedVector u,
258be9261b7Sjeremylt                                         CeedVector v, CeedRequest *request) {
259be9261b7Sjeremylt   int ierr;
260d1d35e2fSjeremylt   CeedInt num_blk, blk_size, num_comp, comp_stride;
261d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr);
262d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
263d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
264d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
2657509a596Sjeremylt   CeedElemRestriction_Ref *impl;
266e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
2674d2a38eeSjeremylt 
268d1d35e2fSjeremylt   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v,
269d979a051Sjeremylt                      request);
2709c36149bSjeremylt }
271be9261b7Sjeremylt 
272f10650afSjeremylt //------------------------------------------------------------------------------
273f10650afSjeremylt // ElemRestriction Apply Block
274f10650afSjeremylt //------------------------------------------------------------------------------
275be9261b7Sjeremylt static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r,
276d1d35e2fSjeremylt     CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
277074cb416Sjeremylt     CeedRequest *request) {
2784d2a38eeSjeremylt   int ierr;
279d1d35e2fSjeremylt   CeedInt blk_size, num_comp, comp_stride;
280d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
281d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
282d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
2837509a596Sjeremylt   CeedElemRestriction_Ref *impl;
284e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
2854d2a38eeSjeremylt 
286d1d35e2fSjeremylt   return impl->Apply(r, num_comp, blk_size, comp_stride, block, block+1, t_mode,
287d1d35e2fSjeremylt                      u, v, request);
2889c36149bSjeremylt }
289be9261b7Sjeremylt 
290f10650afSjeremylt //------------------------------------------------------------------------------
291bd33150aSjeremylt // ElemRestriction Get Offsets
292bd33150aSjeremylt //------------------------------------------------------------------------------
293bd33150aSjeremylt static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr,
294d1d35e2fSjeremylt     CeedMemType mem_type, const CeedInt **offsets) {
295bd33150aSjeremylt   int ierr;
296bd33150aSjeremylt   CeedElemRestriction_Ref *impl;
297e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(rstr, &impl); CeedChkBackend(ierr);
298bd33150aSjeremylt   Ceed ceed;
299e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChkBackend(ierr);
300bd33150aSjeremylt 
301d1d35e2fSjeremylt   if (mem_type != CEED_MEM_HOST)
302bd33150aSjeremylt     // LCOV_EXCL_START
303e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
304bd33150aSjeremylt   // LCOV_EXCL_STOP
305bd33150aSjeremylt 
306bd33150aSjeremylt   *offsets = impl->offsets;
307e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
308bd33150aSjeremylt }
309bd33150aSjeremylt 
310bd33150aSjeremylt //------------------------------------------------------------------------------
311f10650afSjeremylt // ElemRestriction Destroy
312f10650afSjeremylt //------------------------------------------------------------------------------
31321617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
31421617c04Sjeremylt   int ierr;
315fe2413ffSjeremylt   CeedElemRestriction_Ref *impl;
316e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
31721617c04Sjeremylt 
318e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl->offsets_allocated); CeedChkBackend(ierr);
319e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
320e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
32121617c04Sjeremylt }
32221617c04Sjeremylt 
323f10650afSjeremylt //------------------------------------------------------------------------------
324f10650afSjeremylt // ElemRestriction Create
325f10650afSjeremylt //------------------------------------------------------------------------------
326d1d35e2fSjeremylt int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode,
327d979a051Sjeremylt                                   const CeedInt *offsets,
328d979a051Sjeremylt                                   CeedElemRestriction r) {
32921617c04Sjeremylt   int ierr;
33021617c04Sjeremylt   CeedElemRestriction_Ref *impl;
331d1d35e2fSjeremylt   CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride;
332d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
333d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
334d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr);
335d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
336d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
337d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
3384ce2993fSjeremylt   Ceed ceed;
339e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
34021617c04Sjeremylt 
341d1d35e2fSjeremylt   if (mem_type != CEED_MEM_HOST)
342c042f62fSJeremy L Thompson     // LCOV_EXCL_START
343e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
344c042f62fSJeremy L Thompson   // LCOV_EXCL_STOP
345e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
3463661185eSjeremylt 
34792fe105eSJeremy L Thompson   // Offsets data
348d1d35e2fSjeremylt   bool is_strided;
349d1d35e2fSjeremylt   ierr = CeedElemRestrictionIsStrided(r, &is_strided); CeedChkBackend(ierr);
350d1d35e2fSjeremylt   if (!is_strided) {
35192fe105eSJeremy L Thompson     // Check indices for ref or memcheck backends
352d1d35e2fSjeremylt     Ceed parent_ceed = ceed, curr_ceed = NULL;
353d1d35e2fSjeremylt     while (parent_ceed != curr_ceed) {
354d1d35e2fSjeremylt       curr_ceed = parent_ceed;
355d1d35e2fSjeremylt       ierr = CeedGetParent(curr_ceed, &parent_ceed); CeedChkBackend(ierr);
3563661185eSjeremylt     }
3573661185eSjeremylt     const char *resource;
358d1d35e2fSjeremylt     ierr = CeedGetResource(parent_ceed, &resource); CeedChkBackend(ierr);
359d1d35e2fSjeremylt     if (!strcmp(resource, "/cpu/self/ref/serial") ||
360d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/ref/blocked") ||
361d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/memcheck/serial") ||
362d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
363d1d35e2fSjeremylt       CeedInt l_size;
364d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
3653661185eSjeremylt 
366d1d35e2fSjeremylt       for (CeedInt i = 0; i < num_elem*elem_size; i++)
367d1d35e2fSjeremylt         if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride)
3683661185eSjeremylt           // LCOV_EXCL_START
369e15f9bd0SJeremy L Thompson           return CeedError(ceed, CEED_ERROR_BACKEND,
370e15f9bd0SJeremy L Thompson                            "Restriction offset %d (%d) out of range "
371d1d35e2fSjeremylt                            "[0, %d]", i, offsets[i], l_size);
3723661185eSjeremylt       // LCOV_EXCL_STOP
3733661185eSjeremylt     }
3743661185eSjeremylt 
37592fe105eSJeremy L Thompson     // Copy data
376d1d35e2fSjeremylt     switch (copy_mode) {
37721617c04Sjeremylt     case CEED_COPY_VALUES:
378d1d35e2fSjeremylt       ierr = CeedMalloc(num_elem*elem_size, &impl->offsets_allocated);
379e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
380d979a051Sjeremylt       memcpy(impl->offsets_allocated, offsets,
381d1d35e2fSjeremylt              num_elem * elem_size * sizeof(offsets[0]));
382d979a051Sjeremylt       impl->offsets = impl->offsets_allocated;
38321617c04Sjeremylt       break;
38421617c04Sjeremylt     case CEED_OWN_POINTER:
385d979a051Sjeremylt       impl->offsets_allocated = (CeedInt *)offsets;
386d979a051Sjeremylt       impl->offsets = impl->offsets_allocated;
38721617c04Sjeremylt       break;
38821617c04Sjeremylt     case CEED_USE_POINTER:
389d979a051Sjeremylt       impl->offsets = offsets;
39021617c04Sjeremylt     }
39192fe105eSJeremy L Thompson   }
392fe2413ffSjeremylt 
393e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionSetData(r, impl); CeedChkBackend(ierr);
394d1d35e2fSjeremylt   CeedInt layout[3] = {1, elem_size, elem_size*num_comp};
395e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionSetELayout(r, layout); CeedChkBackend(ierr);
396fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply",
397e15f9bd0SJeremy L Thompson                                 CeedElemRestrictionApply_Ref); CeedChkBackend(ierr);
398be9261b7Sjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock",
399be9261b7Sjeremylt                                 CeedElemRestrictionApplyBlock_Ref);
400e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
401bd33150aSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets",
402bd33150aSjeremylt                                 CeedElemRestrictionGetOffsets_Ref);
403e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
404fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy",
405e15f9bd0SJeremy L Thompson                                 CeedElemRestrictionDestroy_Ref); CeedChkBackend(ierr);
406d979a051Sjeremylt 
407d1d35e2fSjeremylt   // Set apply function based upon num_comp, blk_size, and comp_stride
408d979a051Sjeremylt   CeedInt idx = -1;
409d1d35e2fSjeremylt   if (blk_size < 10)
410d1d35e2fSjeremylt     idx = 100*num_comp + 10*blk_size + (comp_stride == 1);
411d979a051Sjeremylt   switch (idx) {
412d979a051Sjeremylt   case 110:
413d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_110;
414d979a051Sjeremylt     break;
415d979a051Sjeremylt   case 111:
416d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_111;
417d979a051Sjeremylt     break;
418d979a051Sjeremylt   case 180:
419d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_180;
420d979a051Sjeremylt     break;
421d979a051Sjeremylt   case 181:
422d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_181;
423d979a051Sjeremylt     break;
424d979a051Sjeremylt   case 310:
425d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_310;
426d979a051Sjeremylt     break;
427d979a051Sjeremylt   case 311:
428d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_311;
429d979a051Sjeremylt     break;
430d979a051Sjeremylt   case 380:
431d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_380;
432d979a051Sjeremylt     break;
433d979a051Sjeremylt   case 381:
434d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_381;
435d979a051Sjeremylt     break;
436bf4d1581Sjeremylt   // LCOV_EXCL_START
437d979a051Sjeremylt   case 510:
438d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_510;
439d979a051Sjeremylt     break;
440bf4d1581Sjeremylt   // LCOV_EXCL_STOP
441d979a051Sjeremylt   case 511:
442d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_511;
443d979a051Sjeremylt     break;
444bf4d1581Sjeremylt   // LCOV_EXCL_START
445d979a051Sjeremylt   case 580:
446d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_580;
447d979a051Sjeremylt     break;
448bf4d1581Sjeremylt   // LCOV_EXCL_STOP
449d979a051Sjeremylt   case 581:
450d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_581;
451d979a051Sjeremylt     break;
452d979a051Sjeremylt   default:
453d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_Core;
454d979a051Sjeremylt     break;
455d979a051Sjeremylt   }
456d979a051Sjeremylt 
457e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
45821617c04Sjeremylt }
459f10650afSjeremylt //------------------------------------------------------------------------------
460