xref: /libCEED/rust/libceed-sys/c-src/backends/ref/ceed-ref-restriction.c (revision b435c5a60b1833cef613d6281360673b9d9fef16)
121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
421617c04Sjeremylt //
521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
721617c04Sjeremylt // element discretizations for exascale applications. For more information and
821617c04Sjeremylt // source code availability see http://github.com/ceed.
921617c04Sjeremylt //
1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early
1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
1621617c04Sjeremylt 
17ec3da8bcSJed Brown #include <ceed/ceed.h>
18ec3da8bcSJed Brown #include <ceed/backend.h>
193d576824SJeremy L Thompson #include <stdbool.h>
203d576824SJeremy L Thompson #include <string.h>
2121617c04Sjeremylt #include "ceed-ref.h"
2221617c04Sjeremylt 
23f10650afSjeremylt //------------------------------------------------------------------------------
24f10650afSjeremylt // Core ElemRestriction Apply Code
25f10650afSjeremylt //------------------------------------------------------------------------------
26be9261b7Sjeremylt static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r,
27d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
28d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
299c36149bSjeremylt     CeedVector v, CeedRequest *request) {
3021617c04Sjeremylt   int ierr;
314ce2993fSjeremylt   CeedElemRestriction_Ref *impl;
32e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
3321617c04Sjeremylt   const CeedScalar *uu;
3421617c04Sjeremylt   CeedScalar *vv;
35d1d35e2fSjeremylt   CeedInt num_elem, elem_size, v_offset;
36d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
37d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
38d1d35e2fSjeremylt   v_offset = start*blk_size*elem_size*num_comp;
3921617c04Sjeremylt 
40*b435c5a6Srezgarshakeri   bool is_oriented;
41*b435c5a6Srezgarshakeri   ierr = CeedElemRestrictionIsOriented(r, &is_oriented); CeedChkBackend(ierr);
42e15f9bd0SJeremy L Thompson   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr);
439c774eddSJeremy L Thompson   if (t_mode == CEED_TRANSPOSE) {
449c774eddSJeremy L Thompson     // Sum into for transpose mode, e-vec to l-vec
45e15f9bd0SJeremy L Thompson     ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr);
469c774eddSJeremy L Thompson   } else {
479c774eddSJeremy L Thompson     // Overwrite for notranspose mode, l-vec to e-vec
489c774eddSJeremy L Thompson     ierr = CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr);
499c774eddSJeremy L Thompson   }
507f90ec76Sjeremylt   // Restriction from L-vector to E-vector
5121617c04Sjeremylt   // Perform: v = r * u
52d1d35e2fSjeremylt   if (t_mode == CEED_NOTRANSPOSE) {
53d979a051Sjeremylt     // No offsets provided, Identity Restriction
54d979a051Sjeremylt     if (!impl->offsets) {
55d1d35e2fSjeremylt       bool has_backend_strides;
56d1d35e2fSjeremylt       ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides);
57e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
58d1d35e2fSjeremylt       if (has_backend_strides) {
59d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
607f90ec76Sjeremylt         // This if branch is left separate to allow better inlining
61d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
62e1b98f6eSjeremylt           CeedPragmaSIMD
63d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
641d79ecccSjeremylt             CeedPragmaSIMD
65d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
667509a596Sjeremylt               CeedPragmaSIMD
67d1d35e2fSjeremylt               for (CeedInt j = 0; j < blk_size; j++)
68d1d35e2fSjeremylt                 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]
69d1d35e2fSjeremylt                   = uu[n + k*elem_size +
70d1d35e2fSjeremylt                          CeedIntMin(e+j, num_elem-1)*elem_size*num_comp];
717f90ec76Sjeremylt       } else {
727f90ec76Sjeremylt         // User provided strides
737f90ec76Sjeremylt         CeedInt strides[3];
74e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
75d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
767f90ec76Sjeremylt           CeedPragmaSIMD
77d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
787f90ec76Sjeremylt             CeedPragmaSIMD
79d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
807f90ec76Sjeremylt               CeedPragmaSIMD
81d1d35e2fSjeremylt               for (CeedInt j = 0; j < blk_size; j++)
82d1d35e2fSjeremylt                 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]
837509a596Sjeremylt                   = uu[n*strides[0] + k*strides[1] +
84d1d35e2fSjeremylt                                     CeedIntMin(e+j, num_elem-1)*strides[2]];
857509a596Sjeremylt       }
8621617c04Sjeremylt     } else {
87d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
88d1d35e2fSjeremylt       // vv has shape [elem_size, num_comp, num_elem], row-major
89d1d35e2fSjeremylt       // uu has shape [nnodes, num_comp]
90*b435c5a6Srezgarshakeri       if (!is_oriented) {
91*b435c5a6Srezgarshakeri         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
92*b435c5a6Srezgarshakeri           CeedPragmaSIMD
93*b435c5a6Srezgarshakeri           for (CeedInt k = 0; k < num_comp; k++)
94*b435c5a6Srezgarshakeri             CeedPragmaSIMD
95*b435c5a6Srezgarshakeri             for (CeedInt i = 0; i < elem_size*blk_size; i++)
96*b435c5a6Srezgarshakeri               vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset]
97*b435c5a6Srezgarshakeri                 = uu[impl->offsets[i+elem_size*e] + k*comp_stride];
98*b435c5a6Srezgarshakeri       } else {
99d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
100e1b98f6eSjeremylt           CeedPragmaSIMD
101d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
102e1b98f6eSjeremylt             CeedPragmaSIMD
103d1d35e2fSjeremylt             for (CeedInt i = 0; i < elem_size*blk_size; i++)
104d1d35e2fSjeremylt               vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset]
105fc0567d9Srezgarshakeri                 = uu[impl->offsets[i+elem_size*e] + k*comp_stride] *
106fc0567d9Srezgarshakeri                   (impl->orient && impl->orient[i+elem_size*e] ? -1. : 1.);
10721617c04Sjeremylt       }
108*b435c5a6Srezgarshakeri     }
10921617c04Sjeremylt   } else {
1107f90ec76Sjeremylt     // Restriction from E-vector to L-vector
1118d94b059Sjeremylt     // Performing v += r^T * u
112d979a051Sjeremylt     // No offsets provided, Identity Restriction
113d979a051Sjeremylt     if (!impl->offsets) {
114d1d35e2fSjeremylt       bool has_backend_strides;
115d1d35e2fSjeremylt       ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides);
116e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
117d1d35e2fSjeremylt       if (has_backend_strides) {
118d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
1197f90ec76Sjeremylt         // This if brach is left separate to allow better inlining
120d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
121523b8ea0Sjeremylt           CeedPragmaSIMD
122d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
1237509a596Sjeremylt             CeedPragmaSIMD
124d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
1257f90ec76Sjeremylt               CeedPragmaSIMD
126d1d35e2fSjeremylt               for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++)
127d1d35e2fSjeremylt                 vv[n + k*elem_size + (e+j)*elem_size*num_comp]
128d1d35e2fSjeremylt                 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset];
1297f90ec76Sjeremylt       } else {
1307f90ec76Sjeremylt         // User provided strides
1317f90ec76Sjeremylt         CeedInt strides[3];
132e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
133d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
1347f90ec76Sjeremylt           CeedPragmaSIMD
135d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
1367f90ec76Sjeremylt             CeedPragmaSIMD
137d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
1387f90ec76Sjeremylt               CeedPragmaSIMD
139d1d35e2fSjeremylt               for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++)
1407509a596Sjeremylt                 vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]]
141d1d35e2fSjeremylt                 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset];
142523b8ea0Sjeremylt       }
14321617c04Sjeremylt     } else {
144d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
145d1d35e2fSjeremylt       // uu has shape [elem_size, num_comp, num_elem]
146d1d35e2fSjeremylt       // vv has shape [nnodes, num_comp]
147*b435c5a6Srezgarshakeri       if (!is_oriented) {
148*b435c5a6Srezgarshakeri         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
149*b435c5a6Srezgarshakeri           for (CeedInt k = 0; k < num_comp; k++)
150*b435c5a6Srezgarshakeri             for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size)
151*b435c5a6Srezgarshakeri               // Iteration bound set to discard padding elements
152*b435c5a6Srezgarshakeri               for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++)
153*b435c5a6Srezgarshakeri                 vv[impl->offsets[j+e*elem_size] + k*comp_stride]
154*b435c5a6Srezgarshakeri                 += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset];
155*b435c5a6Srezgarshakeri       } else {
156d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
157d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
158d1d35e2fSjeremylt             for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size)
1598d94b059Sjeremylt               // Iteration bound set to discard padding elements
160d1d35e2fSjeremylt               for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++)
161d1d35e2fSjeremylt                 vv[impl->offsets[j+e*elem_size] + k*comp_stride]
162fc0567d9Srezgarshakeri                 += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset] *
163fc0567d9Srezgarshakeri                    (impl->orient && impl->orient[j+e*elem_size] ? -1. : 1.);
16421617c04Sjeremylt       }
16521617c04Sjeremylt     }
166*b435c5a6Srezgarshakeri   }
167e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr);
168e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArray(v, &vv); CeedChkBackend(ierr);
16921617c04Sjeremylt   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
17021617c04Sjeremylt     *request = NULL;
171e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
17221617c04Sjeremylt }
17321617c04Sjeremylt 
174f10650afSjeremylt //------------------------------------------------------------------------------
175f10650afSjeremylt // ElemRestriction Apply - Common Sizes
176f10650afSjeremylt //------------------------------------------------------------------------------
177d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r,
178d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
179d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
180d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
181d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop,
182d1d35e2fSjeremylt          t_mode, u, v, request);
183d979a051Sjeremylt }
184d979a051Sjeremylt 
185d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r,
186d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
187d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
188d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
189d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode,
1909c36149bSjeremylt          u, v, request);
1914d2a38eeSjeremylt }
1924d2a38eeSjeremylt 
193d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r,
194d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
195d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
196d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
197d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop,
198d1d35e2fSjeremylt          t_mode, u, v, request);
1999c36149bSjeremylt }
2009c36149bSjeremylt 
201d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r,
202d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
203d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
204d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
205d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode,
2069c36149bSjeremylt          u, v, request);
2079c36149bSjeremylt }
2089c36149bSjeremylt 
209d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r,
210d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
211d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
212d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
213d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop,
214d1d35e2fSjeremylt          t_mode, u, v, request);
215d979a051Sjeremylt }
216d979a051Sjeremylt 
217d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r,
218d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
219d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
220d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
221d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode,
222d979a051Sjeremylt          u, v, request);
223d979a051Sjeremylt }
224d979a051Sjeremylt 
225d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r,
226d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
227d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
228d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
229d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop,
230d1d35e2fSjeremylt          t_mode, u, v, request);
231d979a051Sjeremylt }
232d979a051Sjeremylt 
233d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r,
234d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
235d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
236d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
237d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode,
238d979a051Sjeremylt          u, v, request);
239d979a051Sjeremylt }
240d979a051Sjeremylt 
241bf4d1581Sjeremylt // LCOV_EXCL_START
242d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r,
243d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
244d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
245d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
246d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop,
247d1d35e2fSjeremylt          t_mode, u, v, request);
248d979a051Sjeremylt }
249bf4d1581Sjeremylt // LCOV_EXCL_STOP
250d979a051Sjeremylt 
251d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r,
252d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
253d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
254d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
255d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode,
256d979a051Sjeremylt          u, v, request);
257d979a051Sjeremylt }
258d979a051Sjeremylt 
259bf4d1581Sjeremylt // LCOV_EXCL_START
260d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r,
261d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
262d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
263d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
264d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop,
265d1d35e2fSjeremylt          t_mode, u, v, request);
266d979a051Sjeremylt }
267bf4d1581Sjeremylt // LCOV_EXCL_STOP
268d979a051Sjeremylt 
269d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r,
270d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
271d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
272d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
273d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode,
2749c36149bSjeremylt          u, v, request);
2754d2a38eeSjeremylt }
2764d2a38eeSjeremylt 
277f10650afSjeremylt //------------------------------------------------------------------------------
278f10650afSjeremylt // ElemRestriction Apply
279f10650afSjeremylt //------------------------------------------------------------------------------
280be9261b7Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
281d1d35e2fSjeremylt                                         CeedTransposeMode t_mode, CeedVector u,
282be9261b7Sjeremylt                                         CeedVector v, CeedRequest *request) {
283be9261b7Sjeremylt   int ierr;
284d1d35e2fSjeremylt   CeedInt num_blk, blk_size, num_comp, comp_stride;
285d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr);
286d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
287d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
288d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
2897509a596Sjeremylt   CeedElemRestriction_Ref *impl;
290e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
2914d2a38eeSjeremylt 
292d1d35e2fSjeremylt   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v,
293d979a051Sjeremylt                      request);
2949c36149bSjeremylt }
295be9261b7Sjeremylt 
296f10650afSjeremylt //------------------------------------------------------------------------------
297f10650afSjeremylt // ElemRestriction Apply Block
298f10650afSjeremylt //------------------------------------------------------------------------------
299be9261b7Sjeremylt static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r,
300d1d35e2fSjeremylt     CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
301074cb416Sjeremylt     CeedRequest *request) {
3024d2a38eeSjeremylt   int ierr;
303d1d35e2fSjeremylt   CeedInt blk_size, num_comp, comp_stride;
304d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
305d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
306d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
3077509a596Sjeremylt   CeedElemRestriction_Ref *impl;
308e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
3094d2a38eeSjeremylt 
310d1d35e2fSjeremylt   return impl->Apply(r, num_comp, blk_size, comp_stride, block, block+1, t_mode,
311d1d35e2fSjeremylt                      u, v, request);
3129c36149bSjeremylt }
313be9261b7Sjeremylt 
314f10650afSjeremylt //------------------------------------------------------------------------------
315bd33150aSjeremylt // ElemRestriction Get Offsets
316bd33150aSjeremylt //------------------------------------------------------------------------------
317bd33150aSjeremylt static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr,
318d1d35e2fSjeremylt     CeedMemType mem_type, const CeedInt **offsets) {
319bd33150aSjeremylt   int ierr;
320bd33150aSjeremylt   CeedElemRestriction_Ref *impl;
321e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(rstr, &impl); CeedChkBackend(ierr);
322bd33150aSjeremylt   Ceed ceed;
323e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChkBackend(ierr);
324bd33150aSjeremylt 
325d1d35e2fSjeremylt   if (mem_type != CEED_MEM_HOST)
326bd33150aSjeremylt     // LCOV_EXCL_START
327e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
328bd33150aSjeremylt   // LCOV_EXCL_STOP
329bd33150aSjeremylt 
330bd33150aSjeremylt   *offsets = impl->offsets;
331e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
332bd33150aSjeremylt }
333bd33150aSjeremylt 
334bd33150aSjeremylt //------------------------------------------------------------------------------
335f10650afSjeremylt // ElemRestriction Destroy
336f10650afSjeremylt //------------------------------------------------------------------------------
33721617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
33821617c04Sjeremylt   int ierr;
339fe2413ffSjeremylt   CeedElemRestriction_Ref *impl;
340e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
34121617c04Sjeremylt 
342e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl->offsets_allocated); CeedChkBackend(ierr);
343e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
344e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
34521617c04Sjeremylt }
34621617c04Sjeremylt 
347f10650afSjeremylt //------------------------------------------------------------------------------
348f10650afSjeremylt // ElemRestriction Create
349f10650afSjeremylt //------------------------------------------------------------------------------
350d1d35e2fSjeremylt int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode,
351d979a051Sjeremylt                                   const CeedInt *offsets,
352d979a051Sjeremylt                                   CeedElemRestriction r) {
35321617c04Sjeremylt   int ierr;
35421617c04Sjeremylt   CeedElemRestriction_Ref *impl;
355d1d35e2fSjeremylt   CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride;
356d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
357d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
358d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr);
359d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
360d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
361d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
3624ce2993fSjeremylt   Ceed ceed;
363e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
36421617c04Sjeremylt 
365d1d35e2fSjeremylt   if (mem_type != CEED_MEM_HOST)
366c042f62fSJeremy L Thompson     // LCOV_EXCL_START
367e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
368c042f62fSJeremy L Thompson   // LCOV_EXCL_STOP
369e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
3703661185eSjeremylt 
37192fe105eSJeremy L Thompson   // Offsets data
372d1d35e2fSjeremylt   bool is_strided;
373d1d35e2fSjeremylt   ierr = CeedElemRestrictionIsStrided(r, &is_strided); CeedChkBackend(ierr);
374d1d35e2fSjeremylt   if (!is_strided) {
37592fe105eSJeremy L Thompson     // Check indices for ref or memcheck backends
376d1d35e2fSjeremylt     Ceed parent_ceed = ceed, curr_ceed = NULL;
377d1d35e2fSjeremylt     while (parent_ceed != curr_ceed) {
378d1d35e2fSjeremylt       curr_ceed = parent_ceed;
379d1d35e2fSjeremylt       ierr = CeedGetParent(curr_ceed, &parent_ceed); CeedChkBackend(ierr);
3803661185eSjeremylt     }
3813661185eSjeremylt     const char *resource;
382d1d35e2fSjeremylt     ierr = CeedGetResource(parent_ceed, &resource); CeedChkBackend(ierr);
383d1d35e2fSjeremylt     if (!strcmp(resource, "/cpu/self/ref/serial") ||
384d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/ref/blocked") ||
385d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/memcheck/serial") ||
386d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
387d1d35e2fSjeremylt       CeedInt l_size;
388d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
3893661185eSjeremylt 
390d1d35e2fSjeremylt       for (CeedInt i = 0; i < num_elem*elem_size; i++)
391d1d35e2fSjeremylt         if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride)
3923661185eSjeremylt           // LCOV_EXCL_START
393e15f9bd0SJeremy L Thompson           return CeedError(ceed, CEED_ERROR_BACKEND,
394e15f9bd0SJeremy L Thompson                            "Restriction offset %d (%d) out of range "
395d1d35e2fSjeremylt                            "[0, %d]", i, offsets[i], l_size);
3963661185eSjeremylt       // LCOV_EXCL_STOP
3973661185eSjeremylt     }
3983661185eSjeremylt 
39992fe105eSJeremy L Thompson     // Copy data
400d1d35e2fSjeremylt     switch (copy_mode) {
40121617c04Sjeremylt     case CEED_COPY_VALUES:
402d1d35e2fSjeremylt       ierr = CeedMalloc(num_elem*elem_size, &impl->offsets_allocated);
403e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
404d979a051Sjeremylt       memcpy(impl->offsets_allocated, offsets,
405d1d35e2fSjeremylt              num_elem * elem_size * sizeof(offsets[0]));
406d979a051Sjeremylt       impl->offsets = impl->offsets_allocated;
40721617c04Sjeremylt       break;
40821617c04Sjeremylt     case CEED_OWN_POINTER:
409d979a051Sjeremylt       impl->offsets_allocated = (CeedInt *)offsets;
410d979a051Sjeremylt       impl->offsets = impl->offsets_allocated;
41121617c04Sjeremylt       break;
41221617c04Sjeremylt     case CEED_USE_POINTER:
413d979a051Sjeremylt       impl->offsets = offsets;
41421617c04Sjeremylt     }
41592fe105eSJeremy L Thompson   }
416fe2413ffSjeremylt 
417e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionSetData(r, impl); CeedChkBackend(ierr);
418d1d35e2fSjeremylt   CeedInt layout[3] = {1, elem_size, elem_size*num_comp};
419e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionSetELayout(r, layout); CeedChkBackend(ierr);
420fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply",
421e15f9bd0SJeremy L Thompson                                 CeedElemRestrictionApply_Ref); CeedChkBackend(ierr);
422be9261b7Sjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock",
423be9261b7Sjeremylt                                 CeedElemRestrictionApplyBlock_Ref);
424e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
425bd33150aSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets",
426bd33150aSjeremylt                                 CeedElemRestrictionGetOffsets_Ref);
427e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
428fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy",
429e15f9bd0SJeremy L Thompson                                 CeedElemRestrictionDestroy_Ref); CeedChkBackend(ierr);
430d979a051Sjeremylt 
431d1d35e2fSjeremylt   // Set apply function based upon num_comp, blk_size, and comp_stride
432d979a051Sjeremylt   CeedInt idx = -1;
433d1d35e2fSjeremylt   if (blk_size < 10)
434d1d35e2fSjeremylt     idx = 100*num_comp + 10*blk_size + (comp_stride == 1);
435d979a051Sjeremylt   switch (idx) {
436d979a051Sjeremylt   case 110:
437d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_110;
438d979a051Sjeremylt     break;
439d979a051Sjeremylt   case 111:
440d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_111;
441d979a051Sjeremylt     break;
442d979a051Sjeremylt   case 180:
443d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_180;
444d979a051Sjeremylt     break;
445d979a051Sjeremylt   case 181:
446d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_181;
447d979a051Sjeremylt     break;
448d979a051Sjeremylt   case 310:
449d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_310;
450d979a051Sjeremylt     break;
451d979a051Sjeremylt   case 311:
452d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_311;
453d979a051Sjeremylt     break;
454d979a051Sjeremylt   case 380:
455d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_380;
456d979a051Sjeremylt     break;
457d979a051Sjeremylt   case 381:
458d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_381;
459d979a051Sjeremylt     break;
460bf4d1581Sjeremylt   // LCOV_EXCL_START
461d979a051Sjeremylt   case 510:
462d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_510;
463d979a051Sjeremylt     break;
464bf4d1581Sjeremylt   // LCOV_EXCL_STOP
465d979a051Sjeremylt   case 511:
466d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_511;
467d979a051Sjeremylt     break;
468bf4d1581Sjeremylt   // LCOV_EXCL_START
469d979a051Sjeremylt   case 580:
470d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_580;
471d979a051Sjeremylt     break;
472bf4d1581Sjeremylt   // LCOV_EXCL_STOP
473d979a051Sjeremylt   case 581:
474d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_581;
475d979a051Sjeremylt     break;
476d979a051Sjeremylt   default:
477d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_Core;
478d979a051Sjeremylt     break;
479d979a051Sjeremylt   }
480d979a051Sjeremylt 
481e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
48221617c04Sjeremylt }
483fc0567d9Srezgarshakeri 
484fc0567d9Srezgarshakeri //------------------------------------------------------------------------------
485fc0567d9Srezgarshakeri // ElemRestriction Create Oriented
486fc0567d9Srezgarshakeri //------------------------------------------------------------------------------
487fc0567d9Srezgarshakeri int CeedElemRestrictionCreateOriented_Ref(CeedMemType mem_type,
488fc0567d9Srezgarshakeri     CeedCopyMode copy_mode,
489fc0567d9Srezgarshakeri     const CeedInt *offsets, const bool *orient,
490fc0567d9Srezgarshakeri     CeedElemRestriction r) {
491fc0567d9Srezgarshakeri   int ierr;
492fc0567d9Srezgarshakeri   CeedElemRestriction_Ref *impl;
493fc0567d9Srezgarshakeri   CeedInt num_elem, elem_size;
494fc0567d9Srezgarshakeri   // Set up for normal restriction with explicit offsets. This sets up dispatch to
495fc0567d9Srezgarshakeri   // CeedElemRestrictionApply_Ref_* and manages the impl->offsets array copy/allocation.
496fc0567d9Srezgarshakeri   ierr = CeedElemRestrictionCreate_Ref(mem_type, copy_mode, offsets, r);
497fc0567d9Srezgarshakeri   CeedChkBackend(ierr);
498fc0567d9Srezgarshakeri 
499fc0567d9Srezgarshakeri   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
500fc0567d9Srezgarshakeri   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
501fc0567d9Srezgarshakeri   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
502fc0567d9Srezgarshakeri   switch (copy_mode) {
503fc0567d9Srezgarshakeri   case CEED_COPY_VALUES:
504fc0567d9Srezgarshakeri     ierr = CeedMalloc(num_elem * elem_size, &impl->orient_allocated);
505fc0567d9Srezgarshakeri     CeedChkBackend(ierr);
506fc0567d9Srezgarshakeri     memcpy(impl->orient_allocated, orient,
507fc0567d9Srezgarshakeri            num_elem * elem_size * sizeof(orient[0]));
508fc0567d9Srezgarshakeri     impl->orient = impl->orient_allocated;
509fc0567d9Srezgarshakeri     break;
510fc0567d9Srezgarshakeri   case CEED_OWN_POINTER:
511fc0567d9Srezgarshakeri     impl->orient_allocated = (bool *)orient;
512fc0567d9Srezgarshakeri     impl->orient = impl->orient_allocated;
513fc0567d9Srezgarshakeri     break;
514fc0567d9Srezgarshakeri   case CEED_USE_POINTER:
515fc0567d9Srezgarshakeri     impl->orient = orient;
516fc0567d9Srezgarshakeri   }
517fc0567d9Srezgarshakeri   return CEED_ERROR_SUCCESS;
518fc0567d9Srezgarshakeri }
519f10650afSjeremylt //------------------------------------------------------------------------------
520