xref: /libCEED/backends/ref/ceed-ref-restriction.c (revision fc0567d9dd43c4a8f0744c44d73ac6beb4777ae6)
121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
421617c04Sjeremylt //
521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
721617c04Sjeremylt // element discretizations for exascale applications. For more information and
821617c04Sjeremylt // source code availability see http://github.com/ceed.
921617c04Sjeremylt //
1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early
1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
1621617c04Sjeremylt 
17ec3da8bcSJed Brown #include <ceed/ceed.h>
18ec3da8bcSJed Brown #include <ceed/backend.h>
193d576824SJeremy L Thompson #include <stdbool.h>
203d576824SJeremy L Thompson #include <string.h>
2121617c04Sjeremylt #include "ceed-ref.h"
2221617c04Sjeremylt 
23f10650afSjeremylt //------------------------------------------------------------------------------
24f10650afSjeremylt // Core ElemRestriction Apply Code
25f10650afSjeremylt //------------------------------------------------------------------------------
26be9261b7Sjeremylt static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r,
27d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
28d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
299c36149bSjeremylt     CeedVector v, CeedRequest *request) {
3021617c04Sjeremylt   int ierr;
314ce2993fSjeremylt   CeedElemRestriction_Ref *impl;
32e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
3321617c04Sjeremylt   const CeedScalar *uu;
3421617c04Sjeremylt   CeedScalar *vv;
35d1d35e2fSjeremylt   CeedInt num_elem, elem_size, v_offset;
36d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
37d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
38d1d35e2fSjeremylt   v_offset = start*blk_size*elem_size*num_comp;
3921617c04Sjeremylt 
40e15f9bd0SJeremy L Thompson   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr);
419c774eddSJeremy L Thompson   if (t_mode == CEED_TRANSPOSE) {
429c774eddSJeremy L Thompson     // Sum into for transpose mode, e-vec to l-vec
43e15f9bd0SJeremy L Thompson     ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr);
449c774eddSJeremy L Thompson   } else {
459c774eddSJeremy L Thompson     // Overwrite for notranspose mode, l-vec to e-vec
469c774eddSJeremy L Thompson     ierr = CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr);
479c774eddSJeremy L Thompson   }
487f90ec76Sjeremylt   // Restriction from L-vector to E-vector
4921617c04Sjeremylt   // Perform: v = r * u
50d1d35e2fSjeremylt   if (t_mode == CEED_NOTRANSPOSE) {
51d979a051Sjeremylt     // No offsets provided, Identity Restriction
52d979a051Sjeremylt     if (!impl->offsets) {
53d1d35e2fSjeremylt       bool has_backend_strides;
54d1d35e2fSjeremylt       ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides);
55e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
56d1d35e2fSjeremylt       if (has_backend_strides) {
57d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
587f90ec76Sjeremylt         // This if branch is left separate to allow better inlining
59d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
60e1b98f6eSjeremylt           CeedPragmaSIMD
61d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
621d79ecccSjeremylt             CeedPragmaSIMD
63d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
647509a596Sjeremylt               CeedPragmaSIMD
65d1d35e2fSjeremylt               for (CeedInt j = 0; j < blk_size; j++)
66d1d35e2fSjeremylt                 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]
67d1d35e2fSjeremylt                   = uu[n + k*elem_size +
68d1d35e2fSjeremylt                          CeedIntMin(e+j, num_elem-1)*elem_size*num_comp];
697f90ec76Sjeremylt       } else {
707f90ec76Sjeremylt         // User provided strides
717f90ec76Sjeremylt         CeedInt strides[3];
72e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
73d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
747f90ec76Sjeremylt           CeedPragmaSIMD
75d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
767f90ec76Sjeremylt             CeedPragmaSIMD
77d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
787f90ec76Sjeremylt               CeedPragmaSIMD
79d1d35e2fSjeremylt               for (CeedInt j = 0; j < blk_size; j++)
80d1d35e2fSjeremylt                 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]
817509a596Sjeremylt                   = uu[n*strides[0] + k*strides[1] +
82d1d35e2fSjeremylt                                     CeedIntMin(e+j, num_elem-1)*strides[2]];
837509a596Sjeremylt       }
8421617c04Sjeremylt     } else {
85d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
86d1d35e2fSjeremylt       // vv has shape [elem_size, num_comp, num_elem], row-major
87d1d35e2fSjeremylt       // uu has shape [nnodes, num_comp]
88d1d35e2fSjeremylt       for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
89e1b98f6eSjeremylt         CeedPragmaSIMD
90d1d35e2fSjeremylt         for (CeedInt k = 0; k < num_comp; k++)
91e1b98f6eSjeremylt           CeedPragmaSIMD
92d1d35e2fSjeremylt           for (CeedInt i = 0; i < elem_size*blk_size; i++)
93d1d35e2fSjeremylt             vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset]
94*fc0567d9Srezgarshakeri               = uu[impl->offsets[i+elem_size*e] + k*comp_stride] *
95*fc0567d9Srezgarshakeri                 (impl->orient && impl->orient[i+elem_size*e] ? -1. : 1.);
9621617c04Sjeremylt     }
9721617c04Sjeremylt   } else {
987f90ec76Sjeremylt     // Restriction from E-vector to L-vector
998d94b059Sjeremylt     // Performing v += r^T * u
100d979a051Sjeremylt     // No offsets provided, Identity Restriction
101d979a051Sjeremylt     if (!impl->offsets) {
102d1d35e2fSjeremylt       bool has_backend_strides;
103d1d35e2fSjeremylt       ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides);
104e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
105d1d35e2fSjeremylt       if (has_backend_strides) {
106d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
1077f90ec76Sjeremylt         // This if brach is left separate to allow better inlining
108d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
109523b8ea0Sjeremylt           CeedPragmaSIMD
110d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
1117509a596Sjeremylt             CeedPragmaSIMD
112d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
1137f90ec76Sjeremylt               CeedPragmaSIMD
114d1d35e2fSjeremylt               for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++)
115d1d35e2fSjeremylt                 vv[n + k*elem_size + (e+j)*elem_size*num_comp]
116d1d35e2fSjeremylt                 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset];
1177f90ec76Sjeremylt       } else {
1187f90ec76Sjeremylt         // User provided strides
1197f90ec76Sjeremylt         CeedInt strides[3];
120e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
121d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
1227f90ec76Sjeremylt           CeedPragmaSIMD
123d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
1247f90ec76Sjeremylt             CeedPragmaSIMD
125d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
1267f90ec76Sjeremylt               CeedPragmaSIMD
127d1d35e2fSjeremylt               for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++)
1287509a596Sjeremylt                 vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]]
129d1d35e2fSjeremylt                 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset];
130523b8ea0Sjeremylt       }
13121617c04Sjeremylt     } else {
132d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
133d1d35e2fSjeremylt       // uu has shape [elem_size, num_comp, num_elem]
134d1d35e2fSjeremylt       // vv has shape [nnodes, num_comp]
135d1d35e2fSjeremylt       for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
136d1d35e2fSjeremylt         for (CeedInt k = 0; k < num_comp; k++)
137d1d35e2fSjeremylt           for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size)
1388d94b059Sjeremylt             // Iteration bound set to discard padding elements
139d1d35e2fSjeremylt             for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++)
140d1d35e2fSjeremylt               vv[impl->offsets[j+e*elem_size] + k*comp_stride]
141*fc0567d9Srezgarshakeri               += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset] *
142*fc0567d9Srezgarshakeri                  (impl->orient && impl->orient[j+e*elem_size] ? -1. : 1.);
14321617c04Sjeremylt     }
14421617c04Sjeremylt   }
145e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr);
146e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArray(v, &vv); CeedChkBackend(ierr);
14721617c04Sjeremylt   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
14821617c04Sjeremylt     *request = NULL;
149e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
15021617c04Sjeremylt }
15121617c04Sjeremylt 
152f10650afSjeremylt //------------------------------------------------------------------------------
153f10650afSjeremylt // ElemRestriction Apply - Common Sizes
154f10650afSjeremylt //------------------------------------------------------------------------------
155d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r,
156d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
157d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
158d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
159d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop,
160d1d35e2fSjeremylt          t_mode, u, v, request);
161d979a051Sjeremylt }
162d979a051Sjeremylt 
163d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r,
164d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
165d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
166d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
167d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode,
1689c36149bSjeremylt          u, v, request);
1694d2a38eeSjeremylt }
1704d2a38eeSjeremylt 
171d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r,
172d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
173d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
174d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
175d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop,
176d1d35e2fSjeremylt          t_mode, u, v, request);
1779c36149bSjeremylt }
1789c36149bSjeremylt 
179d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r,
180d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
181d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
182d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
183d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode,
1849c36149bSjeremylt          u, v, request);
1859c36149bSjeremylt }
1869c36149bSjeremylt 
187d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r,
188d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
189d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
190d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
191d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop,
192d1d35e2fSjeremylt          t_mode, u, v, request);
193d979a051Sjeremylt }
194d979a051Sjeremylt 
195d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r,
196d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
197d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
198d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
199d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode,
200d979a051Sjeremylt          u, v, request);
201d979a051Sjeremylt }
202d979a051Sjeremylt 
203d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r,
204d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
205d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
206d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
207d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop,
208d1d35e2fSjeremylt          t_mode, u, v, request);
209d979a051Sjeremylt }
210d979a051Sjeremylt 
211d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r,
212d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
213d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
214d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
215d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode,
216d979a051Sjeremylt          u, v, request);
217d979a051Sjeremylt }
218d979a051Sjeremylt 
219bf4d1581Sjeremylt // LCOV_EXCL_START
220d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r,
221d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
222d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
223d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
224d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop,
225d1d35e2fSjeremylt          t_mode, u, v, request);
226d979a051Sjeremylt }
227bf4d1581Sjeremylt // LCOV_EXCL_STOP
228d979a051Sjeremylt 
229d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r,
230d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
231d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
232d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
233d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode,
234d979a051Sjeremylt          u, v, request);
235d979a051Sjeremylt }
236d979a051Sjeremylt 
237bf4d1581Sjeremylt // LCOV_EXCL_START
238d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r,
239d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
240d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
241d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
242d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop,
243d1d35e2fSjeremylt          t_mode, u, v, request);
244d979a051Sjeremylt }
245bf4d1581Sjeremylt // LCOV_EXCL_STOP
246d979a051Sjeremylt 
247d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r,
248d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
249d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
250d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
251d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode,
2529c36149bSjeremylt          u, v, request);
2534d2a38eeSjeremylt }
2544d2a38eeSjeremylt 
255f10650afSjeremylt //------------------------------------------------------------------------------
256f10650afSjeremylt // ElemRestriction Apply
257f10650afSjeremylt //------------------------------------------------------------------------------
258be9261b7Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
259d1d35e2fSjeremylt                                         CeedTransposeMode t_mode, CeedVector u,
260be9261b7Sjeremylt                                         CeedVector v, CeedRequest *request) {
261be9261b7Sjeremylt   int ierr;
262d1d35e2fSjeremylt   CeedInt num_blk, blk_size, num_comp, comp_stride;
263d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr);
264d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
265d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
266d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
2677509a596Sjeremylt   CeedElemRestriction_Ref *impl;
268e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
2694d2a38eeSjeremylt 
270d1d35e2fSjeremylt   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v,
271d979a051Sjeremylt                      request);
2729c36149bSjeremylt }
273be9261b7Sjeremylt 
274f10650afSjeremylt //------------------------------------------------------------------------------
275f10650afSjeremylt // ElemRestriction Apply Block
276f10650afSjeremylt //------------------------------------------------------------------------------
277be9261b7Sjeremylt static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r,
278d1d35e2fSjeremylt     CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
279074cb416Sjeremylt     CeedRequest *request) {
2804d2a38eeSjeremylt   int ierr;
281d1d35e2fSjeremylt   CeedInt blk_size, num_comp, comp_stride;
282d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
283d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
284d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
2857509a596Sjeremylt   CeedElemRestriction_Ref *impl;
286e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
2874d2a38eeSjeremylt 
288d1d35e2fSjeremylt   return impl->Apply(r, num_comp, blk_size, comp_stride, block, block+1, t_mode,
289d1d35e2fSjeremylt                      u, v, request);
2909c36149bSjeremylt }
291be9261b7Sjeremylt 
292f10650afSjeremylt //------------------------------------------------------------------------------
293bd33150aSjeremylt // ElemRestriction Get Offsets
294bd33150aSjeremylt //------------------------------------------------------------------------------
295bd33150aSjeremylt static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr,
296d1d35e2fSjeremylt     CeedMemType mem_type, const CeedInt **offsets) {
297bd33150aSjeremylt   int ierr;
298bd33150aSjeremylt   CeedElemRestriction_Ref *impl;
299e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(rstr, &impl); CeedChkBackend(ierr);
300bd33150aSjeremylt   Ceed ceed;
301e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChkBackend(ierr);
302bd33150aSjeremylt 
303d1d35e2fSjeremylt   if (mem_type != CEED_MEM_HOST)
304bd33150aSjeremylt     // LCOV_EXCL_START
305e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
306bd33150aSjeremylt   // LCOV_EXCL_STOP
307bd33150aSjeremylt 
308bd33150aSjeremylt   *offsets = impl->offsets;
309e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
310bd33150aSjeremylt }
311bd33150aSjeremylt 
312bd33150aSjeremylt //------------------------------------------------------------------------------
313f10650afSjeremylt // ElemRestriction Destroy
314f10650afSjeremylt //------------------------------------------------------------------------------
31521617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
31621617c04Sjeremylt   int ierr;
317fe2413ffSjeremylt   CeedElemRestriction_Ref *impl;
318e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
31921617c04Sjeremylt 
320e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl->offsets_allocated); CeedChkBackend(ierr);
321e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
322e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
32321617c04Sjeremylt }
32421617c04Sjeremylt 
325f10650afSjeremylt //------------------------------------------------------------------------------
326f10650afSjeremylt // ElemRestriction Create
327f10650afSjeremylt //------------------------------------------------------------------------------
328d1d35e2fSjeremylt int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode,
329d979a051Sjeremylt                                   const CeedInt *offsets,
330d979a051Sjeremylt                                   CeedElemRestriction r) {
33121617c04Sjeremylt   int ierr;
33221617c04Sjeremylt   CeedElemRestriction_Ref *impl;
333d1d35e2fSjeremylt   CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride;
334d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
335d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
336d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr);
337d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
338d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
339d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
3404ce2993fSjeremylt   Ceed ceed;
341e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
34221617c04Sjeremylt 
343d1d35e2fSjeremylt   if (mem_type != CEED_MEM_HOST)
344c042f62fSJeremy L Thompson     // LCOV_EXCL_START
345e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
346c042f62fSJeremy L Thompson   // LCOV_EXCL_STOP
347e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
3483661185eSjeremylt 
34992fe105eSJeremy L Thompson   // Offsets data
350d1d35e2fSjeremylt   bool is_strided;
351d1d35e2fSjeremylt   ierr = CeedElemRestrictionIsStrided(r, &is_strided); CeedChkBackend(ierr);
352d1d35e2fSjeremylt   if (!is_strided) {
35392fe105eSJeremy L Thompson     // Check indices for ref or memcheck backends
354d1d35e2fSjeremylt     Ceed parent_ceed = ceed, curr_ceed = NULL;
355d1d35e2fSjeremylt     while (parent_ceed != curr_ceed) {
356d1d35e2fSjeremylt       curr_ceed = parent_ceed;
357d1d35e2fSjeremylt       ierr = CeedGetParent(curr_ceed, &parent_ceed); CeedChkBackend(ierr);
3583661185eSjeremylt     }
3593661185eSjeremylt     const char *resource;
360d1d35e2fSjeremylt     ierr = CeedGetResource(parent_ceed, &resource); CeedChkBackend(ierr);
361d1d35e2fSjeremylt     if (!strcmp(resource, "/cpu/self/ref/serial") ||
362d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/ref/blocked") ||
363d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/memcheck/serial") ||
364d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
365d1d35e2fSjeremylt       CeedInt l_size;
366d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
3673661185eSjeremylt 
368d1d35e2fSjeremylt       for (CeedInt i = 0; i < num_elem*elem_size; i++)
369d1d35e2fSjeremylt         if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride)
3703661185eSjeremylt           // LCOV_EXCL_START
371e15f9bd0SJeremy L Thompson           return CeedError(ceed, CEED_ERROR_BACKEND,
372e15f9bd0SJeremy L Thompson                            "Restriction offset %d (%d) out of range "
373d1d35e2fSjeremylt                            "[0, %d]", i, offsets[i], l_size);
3743661185eSjeremylt       // LCOV_EXCL_STOP
3753661185eSjeremylt     }
3763661185eSjeremylt 
37792fe105eSJeremy L Thompson     // Copy data
378d1d35e2fSjeremylt     switch (copy_mode) {
37921617c04Sjeremylt     case CEED_COPY_VALUES:
380d1d35e2fSjeremylt       ierr = CeedMalloc(num_elem*elem_size, &impl->offsets_allocated);
381e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
382d979a051Sjeremylt       memcpy(impl->offsets_allocated, offsets,
383d1d35e2fSjeremylt              num_elem * elem_size * sizeof(offsets[0]));
384d979a051Sjeremylt       impl->offsets = impl->offsets_allocated;
38521617c04Sjeremylt       break;
38621617c04Sjeremylt     case CEED_OWN_POINTER:
387d979a051Sjeremylt       impl->offsets_allocated = (CeedInt *)offsets;
388d979a051Sjeremylt       impl->offsets = impl->offsets_allocated;
38921617c04Sjeremylt       break;
39021617c04Sjeremylt     case CEED_USE_POINTER:
391d979a051Sjeremylt       impl->offsets = offsets;
39221617c04Sjeremylt     }
39392fe105eSJeremy L Thompson   }
394fe2413ffSjeremylt 
395e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionSetData(r, impl); CeedChkBackend(ierr);
396d1d35e2fSjeremylt   CeedInt layout[3] = {1, elem_size, elem_size*num_comp};
397e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionSetELayout(r, layout); CeedChkBackend(ierr);
398fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply",
399e15f9bd0SJeremy L Thompson                                 CeedElemRestrictionApply_Ref); CeedChkBackend(ierr);
400be9261b7Sjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock",
401be9261b7Sjeremylt                                 CeedElemRestrictionApplyBlock_Ref);
402e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
403bd33150aSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets",
404bd33150aSjeremylt                                 CeedElemRestrictionGetOffsets_Ref);
405e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
406fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy",
407e15f9bd0SJeremy L Thompson                                 CeedElemRestrictionDestroy_Ref); CeedChkBackend(ierr);
408d979a051Sjeremylt 
409d1d35e2fSjeremylt   // Set apply function based upon num_comp, blk_size, and comp_stride
410d979a051Sjeremylt   CeedInt idx = -1;
411d1d35e2fSjeremylt   if (blk_size < 10)
412d1d35e2fSjeremylt     idx = 100*num_comp + 10*blk_size + (comp_stride == 1);
413d979a051Sjeremylt   switch (idx) {
414d979a051Sjeremylt   case 110:
415d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_110;
416d979a051Sjeremylt     break;
417d979a051Sjeremylt   case 111:
418d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_111;
419d979a051Sjeremylt     break;
420d979a051Sjeremylt   case 180:
421d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_180;
422d979a051Sjeremylt     break;
423d979a051Sjeremylt   case 181:
424d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_181;
425d979a051Sjeremylt     break;
426d979a051Sjeremylt   case 310:
427d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_310;
428d979a051Sjeremylt     break;
429d979a051Sjeremylt   case 311:
430d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_311;
431d979a051Sjeremylt     break;
432d979a051Sjeremylt   case 380:
433d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_380;
434d979a051Sjeremylt     break;
435d979a051Sjeremylt   case 381:
436d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_381;
437d979a051Sjeremylt     break;
438bf4d1581Sjeremylt   // LCOV_EXCL_START
439d979a051Sjeremylt   case 510:
440d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_510;
441d979a051Sjeremylt     break;
442bf4d1581Sjeremylt   // LCOV_EXCL_STOP
443d979a051Sjeremylt   case 511:
444d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_511;
445d979a051Sjeremylt     break;
446bf4d1581Sjeremylt   // LCOV_EXCL_START
447d979a051Sjeremylt   case 580:
448d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_580;
449d979a051Sjeremylt     break;
450bf4d1581Sjeremylt   // LCOV_EXCL_STOP
451d979a051Sjeremylt   case 581:
452d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_581;
453d979a051Sjeremylt     break;
454d979a051Sjeremylt   default:
455d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_Core;
456d979a051Sjeremylt     break;
457d979a051Sjeremylt   }
458d979a051Sjeremylt 
459e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
46021617c04Sjeremylt }
461*fc0567d9Srezgarshakeri 
462*fc0567d9Srezgarshakeri //------------------------------------------------------------------------------
463*fc0567d9Srezgarshakeri // ElemRestriction Create Oriented
464*fc0567d9Srezgarshakeri //------------------------------------------------------------------------------
465*fc0567d9Srezgarshakeri int CeedElemRestrictionCreateOriented_Ref(CeedMemType mem_type,
466*fc0567d9Srezgarshakeri     CeedCopyMode copy_mode,
467*fc0567d9Srezgarshakeri     const CeedInt *offsets, const bool *orient,
468*fc0567d9Srezgarshakeri     CeedElemRestriction r) {
469*fc0567d9Srezgarshakeri   int ierr;
470*fc0567d9Srezgarshakeri   CeedElemRestriction_Ref *impl;
471*fc0567d9Srezgarshakeri   CeedInt num_elem, elem_size;
472*fc0567d9Srezgarshakeri   // Set up for normal restriction with explicit offsets. This sets up dispatch to
473*fc0567d9Srezgarshakeri   // CeedElemRestrictionApply_Ref_* and manages the impl->offsets array copy/allocation.
474*fc0567d9Srezgarshakeri   ierr = CeedElemRestrictionCreate_Ref(mem_type, copy_mode, offsets, r);
475*fc0567d9Srezgarshakeri   CeedChkBackend(ierr);
476*fc0567d9Srezgarshakeri 
477*fc0567d9Srezgarshakeri   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
478*fc0567d9Srezgarshakeri   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
479*fc0567d9Srezgarshakeri   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
480*fc0567d9Srezgarshakeri   switch (copy_mode) {
481*fc0567d9Srezgarshakeri   case CEED_COPY_VALUES:
482*fc0567d9Srezgarshakeri     ierr = CeedMalloc(num_elem * elem_size, &impl->orient_allocated);
483*fc0567d9Srezgarshakeri     CeedChkBackend(ierr);
484*fc0567d9Srezgarshakeri     memcpy(impl->orient_allocated, orient,
485*fc0567d9Srezgarshakeri            num_elem * elem_size * sizeof(orient[0]));
486*fc0567d9Srezgarshakeri     impl->orient = impl->orient_allocated;
487*fc0567d9Srezgarshakeri     break;
488*fc0567d9Srezgarshakeri   case CEED_OWN_POINTER:
489*fc0567d9Srezgarshakeri     impl->orient_allocated = (bool *)orient;
490*fc0567d9Srezgarshakeri     impl->orient = impl->orient_allocated;
491*fc0567d9Srezgarshakeri     break;
492*fc0567d9Srezgarshakeri   case CEED_USE_POINTER:
493*fc0567d9Srezgarshakeri     impl->orient = orient;
494*fc0567d9Srezgarshakeri   }
495*fc0567d9Srezgarshakeri   return CEED_ERROR_SUCCESS;
496*fc0567d9Srezgarshakeri }
497f10650afSjeremylt //------------------------------------------------------------------------------
498