xref: /libCEED/rust/libceed-sys/c-src/backends/ref/ceed-ref-restriction.c (revision d1d35e2f02dc969aee8debf3fd943dd784aa847a)
121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
421617c04Sjeremylt //
521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
721617c04Sjeremylt // element discretizations for exascale applications. For more information and
821617c04Sjeremylt // source code availability see http://github.com/ceed.
921617c04Sjeremylt //
1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early
1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
1621617c04Sjeremylt 
17ec3da8bcSJed Brown #include <ceed/ceed.h>
18ec3da8bcSJed Brown #include <ceed/backend.h>
193d576824SJeremy L Thompson #include <stdbool.h>
203d576824SJeremy L Thompson #include <string.h>
2121617c04Sjeremylt #include "ceed-ref.h"
2221617c04Sjeremylt 
23f10650afSjeremylt //------------------------------------------------------------------------------
24f10650afSjeremylt // Core ElemRestriction Apply Code
25f10650afSjeremylt //------------------------------------------------------------------------------
26be9261b7Sjeremylt static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r,
27*d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
28*d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
299c36149bSjeremylt     CeedVector v, CeedRequest *request) {
3021617c04Sjeremylt   int ierr;
314ce2993fSjeremylt   CeedElemRestriction_Ref *impl;
32e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
3321617c04Sjeremylt   const CeedScalar *uu;
3421617c04Sjeremylt   CeedScalar *vv;
35*d1d35e2fSjeremylt   CeedInt num_elem, elem_size, v_offset;
36*d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
37*d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
38*d1d35e2fSjeremylt   v_offset = start*blk_size*elem_size*num_comp;
3921617c04Sjeremylt 
40e15f9bd0SJeremy L Thompson   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr);
41e15f9bd0SJeremy L Thompson   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr);
427f90ec76Sjeremylt   // Restriction from L-vector to E-vector
4321617c04Sjeremylt   // Perform: v = r * u
44*d1d35e2fSjeremylt   if (t_mode == CEED_NOTRANSPOSE) {
45d979a051Sjeremylt     // No offsets provided, Identity Restriction
46d979a051Sjeremylt     if (!impl->offsets) {
47*d1d35e2fSjeremylt       bool has_backend_strides;
48*d1d35e2fSjeremylt       ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides);
49e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
50*d1d35e2fSjeremylt       if (has_backend_strides) {
51*d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
527f90ec76Sjeremylt         // This if branch is left separate to allow better inlining
53*d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
54e1b98f6eSjeremylt           CeedPragmaSIMD
55*d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
561d79ecccSjeremylt             CeedPragmaSIMD
57*d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
587509a596Sjeremylt               CeedPragmaSIMD
59*d1d35e2fSjeremylt               for (CeedInt j = 0; j < blk_size; j++)
60*d1d35e2fSjeremylt                 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]
61*d1d35e2fSjeremylt                   = uu[n + k*elem_size +
62*d1d35e2fSjeremylt                          CeedIntMin(e+j, num_elem-1)*elem_size*num_comp];
637f90ec76Sjeremylt       } else {
647f90ec76Sjeremylt         // User provided strides
657f90ec76Sjeremylt         CeedInt strides[3];
66e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
67*d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
687f90ec76Sjeremylt           CeedPragmaSIMD
69*d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
707f90ec76Sjeremylt             CeedPragmaSIMD
71*d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
727f90ec76Sjeremylt               CeedPragmaSIMD
73*d1d35e2fSjeremylt               for (CeedInt j = 0; j < blk_size; j++)
74*d1d35e2fSjeremylt                 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]
757509a596Sjeremylt                   = uu[n*strides[0] + k*strides[1] +
76*d1d35e2fSjeremylt                                     CeedIntMin(e+j, num_elem-1)*strides[2]];
777509a596Sjeremylt       }
7821617c04Sjeremylt     } else {
79d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
80*d1d35e2fSjeremylt       // vv has shape [elem_size, num_comp, num_elem], row-major
81*d1d35e2fSjeremylt       // uu has shape [nnodes, num_comp]
82*d1d35e2fSjeremylt       for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
83e1b98f6eSjeremylt         CeedPragmaSIMD
84*d1d35e2fSjeremylt         for (CeedInt k = 0; k < num_comp; k++)
85e1b98f6eSjeremylt           CeedPragmaSIMD
86*d1d35e2fSjeremylt           for (CeedInt i = 0; i < elem_size*blk_size; i++)
87*d1d35e2fSjeremylt             vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset]
88*d1d35e2fSjeremylt               = uu[impl->offsets[i+elem_size*e] + k*comp_stride];
8921617c04Sjeremylt     }
9021617c04Sjeremylt   } else {
917f90ec76Sjeremylt     // Restriction from E-vector to L-vector
928d94b059Sjeremylt     // Performing v += r^T * u
93d979a051Sjeremylt     // No offsets provided, Identity Restriction
94d979a051Sjeremylt     if (!impl->offsets) {
95*d1d35e2fSjeremylt       bool has_backend_strides;
96*d1d35e2fSjeremylt       ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides);
97e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
98*d1d35e2fSjeremylt       if (has_backend_strides) {
99*d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
1007f90ec76Sjeremylt         // This if brach is left separate to allow better inlining
101*d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
102523b8ea0Sjeremylt           CeedPragmaSIMD
103*d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
1047509a596Sjeremylt             CeedPragmaSIMD
105*d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
1067f90ec76Sjeremylt               CeedPragmaSIMD
107*d1d35e2fSjeremylt               for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++)
108*d1d35e2fSjeremylt                 vv[n + k*elem_size + (e+j)*elem_size*num_comp]
109*d1d35e2fSjeremylt                 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset];
1107f90ec76Sjeremylt       } else {
1117f90ec76Sjeremylt         // User provided strides
1127f90ec76Sjeremylt         CeedInt strides[3];
113e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
114*d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
1157f90ec76Sjeremylt           CeedPragmaSIMD
116*d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
1177f90ec76Sjeremylt             CeedPragmaSIMD
118*d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
1197f90ec76Sjeremylt               CeedPragmaSIMD
120*d1d35e2fSjeremylt               for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++)
1217509a596Sjeremylt                 vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]]
122*d1d35e2fSjeremylt                 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset];
123523b8ea0Sjeremylt       }
12421617c04Sjeremylt     } else {
125d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
126*d1d35e2fSjeremylt       // uu has shape [elem_size, num_comp, num_elem]
127*d1d35e2fSjeremylt       // vv has shape [nnodes, num_comp]
128*d1d35e2fSjeremylt       for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
129*d1d35e2fSjeremylt         for (CeedInt k = 0; k < num_comp; k++)
130*d1d35e2fSjeremylt           for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size)
1318d94b059Sjeremylt             // Iteration bound set to discard padding elements
132*d1d35e2fSjeremylt             for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++)
133*d1d35e2fSjeremylt               vv[impl->offsets[j+e*elem_size] + k*comp_stride]
134*d1d35e2fSjeremylt               += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset];
13521617c04Sjeremylt     }
13621617c04Sjeremylt   }
137e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr);
138e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArray(v, &vv); CeedChkBackend(ierr);
13921617c04Sjeremylt   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
14021617c04Sjeremylt     *request = NULL;
141e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
14221617c04Sjeremylt }
14321617c04Sjeremylt 
144f10650afSjeremylt //------------------------------------------------------------------------------
145f10650afSjeremylt // ElemRestriction Apply - Common Sizes
146f10650afSjeremylt //------------------------------------------------------------------------------
147d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r,
148*d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
149*d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
150d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
151*d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop,
152*d1d35e2fSjeremylt          t_mode, u, v, request);
153d979a051Sjeremylt }
154d979a051Sjeremylt 
155d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r,
156*d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
157*d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
158d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
159*d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode,
1609c36149bSjeremylt          u, v, request);
1614d2a38eeSjeremylt }
1624d2a38eeSjeremylt 
163d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r,
164*d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
165*d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
166d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
167*d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop,
168*d1d35e2fSjeremylt          t_mode, u, v, request);
1699c36149bSjeremylt }
1709c36149bSjeremylt 
171d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r,
172*d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
173*d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
174d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
175*d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode,
1769c36149bSjeremylt          u, v, request);
1779c36149bSjeremylt }
1789c36149bSjeremylt 
179d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r,
180*d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
181*d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
182d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
183*d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop,
184*d1d35e2fSjeremylt          t_mode, u, v, request);
185d979a051Sjeremylt }
186d979a051Sjeremylt 
187d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r,
188*d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
189*d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
190d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
191*d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode,
192d979a051Sjeremylt          u, v, request);
193d979a051Sjeremylt }
194d979a051Sjeremylt 
195d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r,
196*d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
197*d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
198d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
199*d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop,
200*d1d35e2fSjeremylt          t_mode, u, v, request);
201d979a051Sjeremylt }
202d979a051Sjeremylt 
203d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r,
204*d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
205*d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
206d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
207*d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode,
208d979a051Sjeremylt          u, v, request);
209d979a051Sjeremylt }
210d979a051Sjeremylt 
211bf4d1581Sjeremylt // LCOV_EXCL_START
212d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r,
213*d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
214*d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
215d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
216*d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop,
217*d1d35e2fSjeremylt          t_mode, u, v, request);
218d979a051Sjeremylt }
219bf4d1581Sjeremylt // LCOV_EXCL_STOP
220d979a051Sjeremylt 
221d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r,
222*d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
223*d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
224d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
225*d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode,
226d979a051Sjeremylt          u, v, request);
227d979a051Sjeremylt }
228d979a051Sjeremylt 
229bf4d1581Sjeremylt // LCOV_EXCL_START
230d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r,
231*d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
232*d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
233d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
234*d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop,
235*d1d35e2fSjeremylt          t_mode, u, v, request);
236d979a051Sjeremylt }
237bf4d1581Sjeremylt // LCOV_EXCL_STOP
238d979a051Sjeremylt 
239d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r,
240*d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
241*d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
242d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
243*d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode,
2449c36149bSjeremylt          u, v, request);
2454d2a38eeSjeremylt }
2464d2a38eeSjeremylt 
247f10650afSjeremylt //------------------------------------------------------------------------------
248f10650afSjeremylt // ElemRestriction Apply
249f10650afSjeremylt //------------------------------------------------------------------------------
250be9261b7Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
251*d1d35e2fSjeremylt                                         CeedTransposeMode t_mode, CeedVector u,
252be9261b7Sjeremylt                                         CeedVector v, CeedRequest *request) {
253be9261b7Sjeremylt   int ierr;
254*d1d35e2fSjeremylt   CeedInt num_blk, blk_size, num_comp, comp_stride;
255*d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr);
256*d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
257*d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
258*d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
2597509a596Sjeremylt   CeedElemRestriction_Ref *impl;
260e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
2614d2a38eeSjeremylt 
262*d1d35e2fSjeremylt   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v,
263d979a051Sjeremylt                      request);
2649c36149bSjeremylt }
265be9261b7Sjeremylt 
266f10650afSjeremylt //------------------------------------------------------------------------------
267f10650afSjeremylt // ElemRestriction Apply Block
268f10650afSjeremylt //------------------------------------------------------------------------------
269be9261b7Sjeremylt static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r,
270*d1d35e2fSjeremylt     CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
271074cb416Sjeremylt     CeedRequest *request) {
2724d2a38eeSjeremylt   int ierr;
273*d1d35e2fSjeremylt   CeedInt blk_size, num_comp, comp_stride;
274*d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
275*d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
276*d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
2777509a596Sjeremylt   CeedElemRestriction_Ref *impl;
278e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
2794d2a38eeSjeremylt 
280*d1d35e2fSjeremylt   return impl->Apply(r, num_comp, blk_size, comp_stride, block, block+1, t_mode,
281*d1d35e2fSjeremylt                      u, v, request);
2829c36149bSjeremylt }
283be9261b7Sjeremylt 
284f10650afSjeremylt //------------------------------------------------------------------------------
285bd33150aSjeremylt // ElemRestriction Get Offsets
286bd33150aSjeremylt //------------------------------------------------------------------------------
287bd33150aSjeremylt static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr,
288*d1d35e2fSjeremylt     CeedMemType mem_type, const CeedInt **offsets) {
289bd33150aSjeremylt   int ierr;
290bd33150aSjeremylt   CeedElemRestriction_Ref *impl;
291e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(rstr, &impl); CeedChkBackend(ierr);
292bd33150aSjeremylt   Ceed ceed;
293e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChkBackend(ierr);
294bd33150aSjeremylt 
295*d1d35e2fSjeremylt   if (mem_type != CEED_MEM_HOST)
296bd33150aSjeremylt     // LCOV_EXCL_START
297e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
298bd33150aSjeremylt   // LCOV_EXCL_STOP
299bd33150aSjeremylt 
300bd33150aSjeremylt   *offsets = impl->offsets;
301e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
302bd33150aSjeremylt }
303bd33150aSjeremylt 
304bd33150aSjeremylt //------------------------------------------------------------------------------
305f10650afSjeremylt // ElemRestriction Destroy
306f10650afSjeremylt //------------------------------------------------------------------------------
30721617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
30821617c04Sjeremylt   int ierr;
309fe2413ffSjeremylt   CeedElemRestriction_Ref *impl;
310e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
31121617c04Sjeremylt 
312e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl->offsets_allocated); CeedChkBackend(ierr);
313e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
314e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
31521617c04Sjeremylt }
31621617c04Sjeremylt 
317f10650afSjeremylt //------------------------------------------------------------------------------
318f10650afSjeremylt // ElemRestriction Create
319f10650afSjeremylt //------------------------------------------------------------------------------
320*d1d35e2fSjeremylt int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode,
321d979a051Sjeremylt                                   const CeedInt *offsets,
322d979a051Sjeremylt                                   CeedElemRestriction r) {
32321617c04Sjeremylt   int ierr;
32421617c04Sjeremylt   CeedElemRestriction_Ref *impl;
325*d1d35e2fSjeremylt   CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride;
326*d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
327*d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
328*d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr);
329*d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
330*d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
331*d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
3324ce2993fSjeremylt   Ceed ceed;
333e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
33421617c04Sjeremylt 
335*d1d35e2fSjeremylt   if (mem_type != CEED_MEM_HOST)
336c042f62fSJeremy L Thompson     // LCOV_EXCL_START
337e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
338c042f62fSJeremy L Thompson   // LCOV_EXCL_STOP
339e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
3403661185eSjeremylt 
34192fe105eSJeremy L Thompson   // Offsets data
342*d1d35e2fSjeremylt   bool is_strided;
343*d1d35e2fSjeremylt   ierr = CeedElemRestrictionIsStrided(r, &is_strided); CeedChkBackend(ierr);
344*d1d35e2fSjeremylt   if (!is_strided) {
34592fe105eSJeremy L Thompson     // Check indices for ref or memcheck backends
346*d1d35e2fSjeremylt     Ceed parent_ceed = ceed, curr_ceed = NULL;
347*d1d35e2fSjeremylt     while (parent_ceed != curr_ceed) {
348*d1d35e2fSjeremylt       curr_ceed = parent_ceed;
349*d1d35e2fSjeremylt       ierr = CeedGetParent(curr_ceed, &parent_ceed); CeedChkBackend(ierr);
3503661185eSjeremylt     }
3513661185eSjeremylt     const char *resource;
352*d1d35e2fSjeremylt     ierr = CeedGetResource(parent_ceed, &resource); CeedChkBackend(ierr);
353*d1d35e2fSjeremylt     if (!strcmp(resource, "/cpu/self/ref/serial") ||
354*d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/ref/blocked") ||
355*d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/memcheck/serial") ||
356*d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
357*d1d35e2fSjeremylt       CeedInt l_size;
358*d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
3593661185eSjeremylt 
360*d1d35e2fSjeremylt       for (CeedInt i = 0; i < num_elem*elem_size; i++)
361*d1d35e2fSjeremylt         if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride)
3623661185eSjeremylt           // LCOV_EXCL_START
363e15f9bd0SJeremy L Thompson           return CeedError(ceed, CEED_ERROR_BACKEND,
364e15f9bd0SJeremy L Thompson                            "Restriction offset %d (%d) out of range "
365*d1d35e2fSjeremylt                            "[0, %d]", i, offsets[i], l_size);
3663661185eSjeremylt       // LCOV_EXCL_STOP
3673661185eSjeremylt     }
3683661185eSjeremylt 
36992fe105eSJeremy L Thompson     // Copy data
370*d1d35e2fSjeremylt     switch (copy_mode) {
37121617c04Sjeremylt     case CEED_COPY_VALUES:
372*d1d35e2fSjeremylt       ierr = CeedMalloc(num_elem*elem_size, &impl->offsets_allocated);
373e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
374d979a051Sjeremylt       memcpy(impl->offsets_allocated, offsets,
375*d1d35e2fSjeremylt              num_elem * elem_size * sizeof(offsets[0]));
376d979a051Sjeremylt       impl->offsets = impl->offsets_allocated;
37721617c04Sjeremylt       break;
37821617c04Sjeremylt     case CEED_OWN_POINTER:
379d979a051Sjeremylt       impl->offsets_allocated = (CeedInt *)offsets;
380d979a051Sjeremylt       impl->offsets = impl->offsets_allocated;
38121617c04Sjeremylt       break;
38221617c04Sjeremylt     case CEED_USE_POINTER:
383d979a051Sjeremylt       impl->offsets = offsets;
38421617c04Sjeremylt     }
38592fe105eSJeremy L Thompson   }
386fe2413ffSjeremylt 
387e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionSetData(r, impl); CeedChkBackend(ierr);
388*d1d35e2fSjeremylt   CeedInt layout[3] = {1, elem_size, elem_size*num_comp};
389e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionSetELayout(r, layout); CeedChkBackend(ierr);
390fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply",
391e15f9bd0SJeremy L Thompson                                 CeedElemRestrictionApply_Ref); CeedChkBackend(ierr);
392be9261b7Sjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock",
393be9261b7Sjeremylt                                 CeedElemRestrictionApplyBlock_Ref);
394e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
395bd33150aSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets",
396bd33150aSjeremylt                                 CeedElemRestrictionGetOffsets_Ref);
397e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
398fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy",
399e15f9bd0SJeremy L Thompson                                 CeedElemRestrictionDestroy_Ref); CeedChkBackend(ierr);
400d979a051Sjeremylt 
401*d1d35e2fSjeremylt   // Set apply function based upon num_comp, blk_size, and comp_stride
402d979a051Sjeremylt   CeedInt idx = -1;
403*d1d35e2fSjeremylt   if (blk_size < 10)
404*d1d35e2fSjeremylt     idx = 100*num_comp + 10*blk_size + (comp_stride == 1);
405d979a051Sjeremylt   switch (idx) {
406d979a051Sjeremylt   case 110:
407d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_110;
408d979a051Sjeremylt     break;
409d979a051Sjeremylt   case 111:
410d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_111;
411d979a051Sjeremylt     break;
412d979a051Sjeremylt   case 180:
413d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_180;
414d979a051Sjeremylt     break;
415d979a051Sjeremylt   case 181:
416d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_181;
417d979a051Sjeremylt     break;
418d979a051Sjeremylt   case 310:
419d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_310;
420d979a051Sjeremylt     break;
421d979a051Sjeremylt   case 311:
422d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_311;
423d979a051Sjeremylt     break;
424d979a051Sjeremylt   case 380:
425d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_380;
426d979a051Sjeremylt     break;
427d979a051Sjeremylt   case 381:
428d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_381;
429d979a051Sjeremylt     break;
430bf4d1581Sjeremylt   // LCOV_EXCL_START
431d979a051Sjeremylt   case 510:
432d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_510;
433d979a051Sjeremylt     break;
434bf4d1581Sjeremylt   // LCOV_EXCL_STOP
435d979a051Sjeremylt   case 511:
436d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_511;
437d979a051Sjeremylt     break;
438bf4d1581Sjeremylt   // LCOV_EXCL_START
439d979a051Sjeremylt   case 580:
440d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_580;
441d979a051Sjeremylt     break;
442bf4d1581Sjeremylt   // LCOV_EXCL_STOP
443d979a051Sjeremylt   case 581:
444d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_581;
445d979a051Sjeremylt     break;
446d979a051Sjeremylt   default:
447d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_Core;
448d979a051Sjeremylt     break;
449d979a051Sjeremylt   }
450d979a051Sjeremylt 
451e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
45221617c04Sjeremylt }
453f10650afSjeremylt //------------------------------------------------------------------------------
454