xref: /libCEED/backends/ref/ceed-ref-restriction.c (revision e79b91d9f61753a734e6e21c778d772fcdbcc265)
121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
421617c04Sjeremylt //
521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
721617c04Sjeremylt // element discretizations for exascale applications. For more information and
821617c04Sjeremylt // source code availability see http://github.com/ceed.
921617c04Sjeremylt //
1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early
1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
1621617c04Sjeremylt 
17ec3da8bcSJed Brown #include <ceed/ceed.h>
18ec3da8bcSJed Brown #include <ceed/backend.h>
193d576824SJeremy L Thompson #include <stdbool.h>
203d576824SJeremy L Thompson #include <string.h>
2121617c04Sjeremylt #include "ceed-ref.h"
2221617c04Sjeremylt 
23f10650afSjeremylt //------------------------------------------------------------------------------
24f10650afSjeremylt // Core ElemRestriction Apply Code
25f10650afSjeremylt //------------------------------------------------------------------------------
26be9261b7Sjeremylt static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r,
27d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
28d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
299c36149bSjeremylt     CeedVector v, CeedRequest *request) {
3021617c04Sjeremylt   int ierr;
314ce2993fSjeremylt   CeedElemRestriction_Ref *impl;
32e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
3321617c04Sjeremylt   const CeedScalar *uu;
3421617c04Sjeremylt   CeedScalar *vv;
35d1d35e2fSjeremylt   CeedInt num_elem, elem_size, v_offset;
36d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
37d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
38d1d35e2fSjeremylt   v_offset = start*blk_size*elem_size*num_comp;
3921617c04Sjeremylt 
40b435c5a6Srezgarshakeri   bool is_oriented;
41b435c5a6Srezgarshakeri   ierr = CeedElemRestrictionIsOriented(r, &is_oriented); CeedChkBackend(ierr);
42e15f9bd0SJeremy L Thompson   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr);
439c774eddSJeremy L Thompson   if (t_mode == CEED_TRANSPOSE) {
449c774eddSJeremy L Thompson     // Sum into for transpose mode, e-vec to l-vec
45e15f9bd0SJeremy L Thompson     ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr);
469c774eddSJeremy L Thompson   } else {
479c774eddSJeremy L Thompson     // Overwrite for notranspose mode, l-vec to e-vec
489c774eddSJeremy L Thompson     ierr = CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr);
499c774eddSJeremy L Thompson   }
507f90ec76Sjeremylt   // Restriction from L-vector to E-vector
5121617c04Sjeremylt   // Perform: v = r * u
52d1d35e2fSjeremylt   if (t_mode == CEED_NOTRANSPOSE) {
53d979a051Sjeremylt     // No offsets provided, Identity Restriction
54d979a051Sjeremylt     if (!impl->offsets) {
55d1d35e2fSjeremylt       bool has_backend_strides;
56d1d35e2fSjeremylt       ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides);
57e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
58d1d35e2fSjeremylt       if (has_backend_strides) {
59d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
607f90ec76Sjeremylt         // This if branch is left separate to allow better inlining
61d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
62e1b98f6eSjeremylt           CeedPragmaSIMD
63d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
641d79ecccSjeremylt             CeedPragmaSIMD
65d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
667509a596Sjeremylt               CeedPragmaSIMD
67d1d35e2fSjeremylt               for (CeedInt j = 0; j < blk_size; j++)
68d1d35e2fSjeremylt                 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]
69d1d35e2fSjeremylt                   = uu[n + k*elem_size +
70d1d35e2fSjeremylt                          CeedIntMin(e+j, num_elem-1)*elem_size*num_comp];
717f90ec76Sjeremylt       } else {
727f90ec76Sjeremylt         // User provided strides
737f90ec76Sjeremylt         CeedInt strides[3];
74e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
75d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
767f90ec76Sjeremylt           CeedPragmaSIMD
77d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
787f90ec76Sjeremylt             CeedPragmaSIMD
79d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
807f90ec76Sjeremylt               CeedPragmaSIMD
81d1d35e2fSjeremylt               for (CeedInt j = 0; j < blk_size; j++)
82d1d35e2fSjeremylt                 vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]
837509a596Sjeremylt                   = uu[n*strides[0] + k*strides[1] +
84d1d35e2fSjeremylt                                     CeedIntMin(e+j, num_elem-1)*strides[2]];
857509a596Sjeremylt       }
8621617c04Sjeremylt     } else {
87d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
88d1d35e2fSjeremylt       // vv has shape [elem_size, num_comp, num_elem], row-major
89d1d35e2fSjeremylt       // uu has shape [nnodes, num_comp]
90d1d35e2fSjeremylt       for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
91e1b98f6eSjeremylt         CeedPragmaSIMD
92d1d35e2fSjeremylt         for (CeedInt k = 0; k < num_comp; k++)
93e1b98f6eSjeremylt           CeedPragmaSIMD
94d1d35e2fSjeremylt           for (CeedInt i = 0; i < elem_size*blk_size; i++)
95d1d35e2fSjeremylt             vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset]
96fc0567d9Srezgarshakeri               = uu[impl->offsets[i+elem_size*e] + k*comp_stride] *
97000294e3Srezgarshakeri                 (is_oriented && impl->orient[i+elem_size*e] ? -1. : 1.);
98b435c5a6Srezgarshakeri     }
9921617c04Sjeremylt   } else {
1007f90ec76Sjeremylt     // Restriction from E-vector to L-vector
1018d94b059Sjeremylt     // Performing v += r^T * u
102d979a051Sjeremylt     // No offsets provided, Identity Restriction
103d979a051Sjeremylt     if (!impl->offsets) {
104d1d35e2fSjeremylt       bool has_backend_strides;
105d1d35e2fSjeremylt       ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides);
106e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
107d1d35e2fSjeremylt       if (has_backend_strides) {
108d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
1097f90ec76Sjeremylt         // This if brach is left separate to allow better inlining
110d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
111523b8ea0Sjeremylt           CeedPragmaSIMD
112d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
1137509a596Sjeremylt             CeedPragmaSIMD
114d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
1157f90ec76Sjeremylt               CeedPragmaSIMD
116d1d35e2fSjeremylt               for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++)
117d1d35e2fSjeremylt                 vv[n + k*elem_size + (e+j)*elem_size*num_comp]
118d1d35e2fSjeremylt                 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset];
1197f90ec76Sjeremylt       } else {
1207f90ec76Sjeremylt         // User provided strides
1217f90ec76Sjeremylt         CeedInt strides[3];
122e15f9bd0SJeremy L Thompson         ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr);
123d1d35e2fSjeremylt         for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
1247f90ec76Sjeremylt           CeedPragmaSIMD
125d1d35e2fSjeremylt           for (CeedInt k = 0; k < num_comp; k++)
1267f90ec76Sjeremylt             CeedPragmaSIMD
127d1d35e2fSjeremylt             for (CeedInt n = 0; n < elem_size; n++)
1287f90ec76Sjeremylt               CeedPragmaSIMD
129d1d35e2fSjeremylt               for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++)
1307509a596Sjeremylt                 vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]]
131d1d35e2fSjeremylt                 += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset];
132523b8ea0Sjeremylt       }
13321617c04Sjeremylt     } else {
134d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
135d1d35e2fSjeremylt       // uu has shape [elem_size, num_comp, num_elem]
136d1d35e2fSjeremylt       // vv has shape [nnodes, num_comp]
137d1d35e2fSjeremylt       for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size)
138d1d35e2fSjeremylt         for (CeedInt k = 0; k < num_comp; k++)
139d1d35e2fSjeremylt           for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size)
1408d94b059Sjeremylt             // Iteration bound set to discard padding elements
141d1d35e2fSjeremylt             for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++)
142d1d35e2fSjeremylt               vv[impl->offsets[j+e*elem_size] + k*comp_stride]
143fc0567d9Srezgarshakeri               += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset] *
144000294e3Srezgarshakeri                  (is_oriented && impl->orient[j+e*elem_size] ? -1. : 1.);
14521617c04Sjeremylt     }
146b435c5a6Srezgarshakeri   }
147e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr);
148e15f9bd0SJeremy L Thompson   ierr = CeedVectorRestoreArray(v, &vv); CeedChkBackend(ierr);
14921617c04Sjeremylt   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
15021617c04Sjeremylt     *request = NULL;
151e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
15221617c04Sjeremylt }
15321617c04Sjeremylt 
154f10650afSjeremylt //------------------------------------------------------------------------------
155f10650afSjeremylt // ElemRestriction Apply - Common Sizes
156f10650afSjeremylt //------------------------------------------------------------------------------
157d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r,
158d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
159d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
160d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
161d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop,
162d1d35e2fSjeremylt          t_mode, u, v, request);
163d979a051Sjeremylt }
164d979a051Sjeremylt 
165d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r,
166d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
167d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
168d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
169d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode,
1709c36149bSjeremylt          u, v, request);
1714d2a38eeSjeremylt }
1724d2a38eeSjeremylt 
173d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r,
174d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
175d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
176d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
177d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop,
178d1d35e2fSjeremylt          t_mode, u, v, request);
1799c36149bSjeremylt }
1809c36149bSjeremylt 
181d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r,
182d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
183d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
184d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
185d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode,
1869c36149bSjeremylt          u, v, request);
1879c36149bSjeremylt }
1889c36149bSjeremylt 
189d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r,
190d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
191d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
192d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
193d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop,
194d1d35e2fSjeremylt          t_mode, u, v, request);
195d979a051Sjeremylt }
196d979a051Sjeremylt 
197d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r,
198d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
199d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
200d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
201d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode,
202d979a051Sjeremylt          u, v, request);
203d979a051Sjeremylt }
204d979a051Sjeremylt 
205d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r,
206d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
207d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
208d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
209d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop,
210d1d35e2fSjeremylt          t_mode, u, v, request);
211d979a051Sjeremylt }
212d979a051Sjeremylt 
213d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r,
214d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
215d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
216d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
217d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode,
218d979a051Sjeremylt          u, v, request);
219d979a051Sjeremylt }
220d979a051Sjeremylt 
221bf4d1581Sjeremylt // LCOV_EXCL_START
222d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r,
223d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
224d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
225d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
226d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop,
227d1d35e2fSjeremylt          t_mode, u, v, request);
228d979a051Sjeremylt }
229bf4d1581Sjeremylt // LCOV_EXCL_STOP
230d979a051Sjeremylt 
231d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r,
232d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
233d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
234d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
235d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode,
236d979a051Sjeremylt          u, v, request);
237d979a051Sjeremylt }
238d979a051Sjeremylt 
239bf4d1581Sjeremylt // LCOV_EXCL_START
240d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r,
241d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
242d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
243d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
244d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop,
245d1d35e2fSjeremylt          t_mode, u, v, request);
246d979a051Sjeremylt }
247bf4d1581Sjeremylt // LCOV_EXCL_STOP
248d979a051Sjeremylt 
249d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r,
250d1d35e2fSjeremylt     const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
251d1d35e2fSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u,
252d979a051Sjeremylt     CeedVector v, CeedRequest *request) {
253d1d35e2fSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode,
2549c36149bSjeremylt          u, v, request);
2554d2a38eeSjeremylt }
2564d2a38eeSjeremylt 
257f10650afSjeremylt //------------------------------------------------------------------------------
258f10650afSjeremylt // ElemRestriction Apply
259f10650afSjeremylt //------------------------------------------------------------------------------
260be9261b7Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
261d1d35e2fSjeremylt                                         CeedTransposeMode t_mode, CeedVector u,
262be9261b7Sjeremylt                                         CeedVector v, CeedRequest *request) {
263be9261b7Sjeremylt   int ierr;
264d1d35e2fSjeremylt   CeedInt num_blk, blk_size, num_comp, comp_stride;
265d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr);
266d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
267d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
268d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
2697509a596Sjeremylt   CeedElemRestriction_Ref *impl;
270e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
2714d2a38eeSjeremylt 
272d1d35e2fSjeremylt   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v,
273d979a051Sjeremylt                      request);
2749c36149bSjeremylt }
275be9261b7Sjeremylt 
276f10650afSjeremylt //------------------------------------------------------------------------------
277f10650afSjeremylt // ElemRestriction Apply Block
278f10650afSjeremylt //------------------------------------------------------------------------------
279be9261b7Sjeremylt static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r,
280d1d35e2fSjeremylt     CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
281074cb416Sjeremylt     CeedRequest *request) {
2824d2a38eeSjeremylt   int ierr;
283d1d35e2fSjeremylt   CeedInt blk_size, num_comp, comp_stride;
284d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
285d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
286d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
2877509a596Sjeremylt   CeedElemRestriction_Ref *impl;
288e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
2894d2a38eeSjeremylt 
290d1d35e2fSjeremylt   return impl->Apply(r, num_comp, blk_size, comp_stride, block, block+1, t_mode,
291d1d35e2fSjeremylt                      u, v, request);
2929c36149bSjeremylt }
293be9261b7Sjeremylt 
294f10650afSjeremylt //------------------------------------------------------------------------------
295bd33150aSjeremylt // ElemRestriction Get Offsets
296bd33150aSjeremylt //------------------------------------------------------------------------------
297bd33150aSjeremylt static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr,
298d1d35e2fSjeremylt     CeedMemType mem_type, const CeedInt **offsets) {
299bd33150aSjeremylt   int ierr;
300bd33150aSjeremylt   CeedElemRestriction_Ref *impl;
301e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(rstr, &impl); CeedChkBackend(ierr);
302bd33150aSjeremylt   Ceed ceed;
303e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChkBackend(ierr);
304bd33150aSjeremylt 
305d1d35e2fSjeremylt   if (mem_type != CEED_MEM_HOST)
306bd33150aSjeremylt     // LCOV_EXCL_START
307e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
308bd33150aSjeremylt   // LCOV_EXCL_STOP
309bd33150aSjeremylt 
310bd33150aSjeremylt   *offsets = impl->offsets;
311e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
312bd33150aSjeremylt }
313bd33150aSjeremylt 
314bd33150aSjeremylt //------------------------------------------------------------------------------
315f10650afSjeremylt // ElemRestriction Destroy
316f10650afSjeremylt //------------------------------------------------------------------------------
31721617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
31821617c04Sjeremylt   int ierr;
319fe2413ffSjeremylt   CeedElemRestriction_Ref *impl;
320e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
32121617c04Sjeremylt 
322e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl->offsets_allocated); CeedChkBackend(ierr);
323e15f9bd0SJeremy L Thompson   ierr = CeedFree(&impl); CeedChkBackend(ierr);
324e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
32521617c04Sjeremylt }
32621617c04Sjeremylt 
327f10650afSjeremylt //------------------------------------------------------------------------------
328f10650afSjeremylt // ElemRestriction Create
329f10650afSjeremylt //------------------------------------------------------------------------------
330d1d35e2fSjeremylt int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode,
331d979a051Sjeremylt                                   const CeedInt *offsets,
332d979a051Sjeremylt                                   CeedElemRestriction r) {
33321617c04Sjeremylt   int ierr;
33421617c04Sjeremylt   CeedElemRestriction_Ref *impl;
335d1d35e2fSjeremylt   CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride;
336d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
337d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
338d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr);
339d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr);
340d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr);
341d1d35e2fSjeremylt   ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr);
3424ce2993fSjeremylt   Ceed ceed;
343e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr);
34421617c04Sjeremylt 
345d1d35e2fSjeremylt   if (mem_type != CEED_MEM_HOST)
346c042f62fSJeremy L Thompson     // LCOV_EXCL_START
347e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
348c042f62fSJeremy L Thompson   // LCOV_EXCL_STOP
349e15f9bd0SJeremy L Thompson   ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr);
3503661185eSjeremylt 
35192fe105eSJeremy L Thompson   // Offsets data
352d1d35e2fSjeremylt   bool is_strided;
353d1d35e2fSjeremylt   ierr = CeedElemRestrictionIsStrided(r, &is_strided); CeedChkBackend(ierr);
354d1d35e2fSjeremylt   if (!is_strided) {
35592fe105eSJeremy L Thompson     // Check indices for ref or memcheck backends
356d1d35e2fSjeremylt     Ceed parent_ceed = ceed, curr_ceed = NULL;
357d1d35e2fSjeremylt     while (parent_ceed != curr_ceed) {
358d1d35e2fSjeremylt       curr_ceed = parent_ceed;
359d1d35e2fSjeremylt       ierr = CeedGetParent(curr_ceed, &parent_ceed); CeedChkBackend(ierr);
3603661185eSjeremylt     }
3613661185eSjeremylt     const char *resource;
362d1d35e2fSjeremylt     ierr = CeedGetResource(parent_ceed, &resource); CeedChkBackend(ierr);
363d1d35e2fSjeremylt     if (!strcmp(resource, "/cpu/self/ref/serial") ||
364d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/ref/blocked") ||
365d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/memcheck/serial") ||
366d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
367*e79b91d9SJeremy L Thompson       CeedSize l_size;
368d1d35e2fSjeremylt       ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr);
3693661185eSjeremylt 
370d1d35e2fSjeremylt       for (CeedInt i = 0; i < num_elem*elem_size; i++)
371d1d35e2fSjeremylt         if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride)
3723661185eSjeremylt           // LCOV_EXCL_START
373e15f9bd0SJeremy L Thompson           return CeedError(ceed, CEED_ERROR_BACKEND,
374e15f9bd0SJeremy L Thompson                            "Restriction offset %d (%d) out of range "
375d1d35e2fSjeremylt                            "[0, %d]", i, offsets[i], l_size);
3763661185eSjeremylt       // LCOV_EXCL_STOP
3773661185eSjeremylt     }
3783661185eSjeremylt 
37992fe105eSJeremy L Thompson     // Copy data
380d1d35e2fSjeremylt     switch (copy_mode) {
38121617c04Sjeremylt     case CEED_COPY_VALUES:
382d1d35e2fSjeremylt       ierr = CeedMalloc(num_elem*elem_size, &impl->offsets_allocated);
383e15f9bd0SJeremy L Thompson       CeedChkBackend(ierr);
384d979a051Sjeremylt       memcpy(impl->offsets_allocated, offsets,
385d1d35e2fSjeremylt              num_elem * elem_size * sizeof(offsets[0]));
386d979a051Sjeremylt       impl->offsets = impl->offsets_allocated;
38721617c04Sjeremylt       break;
38821617c04Sjeremylt     case CEED_OWN_POINTER:
389d979a051Sjeremylt       impl->offsets_allocated = (CeedInt *)offsets;
390d979a051Sjeremylt       impl->offsets = impl->offsets_allocated;
39121617c04Sjeremylt       break;
39221617c04Sjeremylt     case CEED_USE_POINTER:
393d979a051Sjeremylt       impl->offsets = offsets;
39421617c04Sjeremylt     }
39592fe105eSJeremy L Thompson   }
396fe2413ffSjeremylt 
397e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionSetData(r, impl); CeedChkBackend(ierr);
398d1d35e2fSjeremylt   CeedInt layout[3] = {1, elem_size, elem_size*num_comp};
399e15f9bd0SJeremy L Thompson   ierr = CeedElemRestrictionSetELayout(r, layout); CeedChkBackend(ierr);
400fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply",
401e15f9bd0SJeremy L Thompson                                 CeedElemRestrictionApply_Ref); CeedChkBackend(ierr);
402be9261b7Sjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock",
403be9261b7Sjeremylt                                 CeedElemRestrictionApplyBlock_Ref);
404e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
405bd33150aSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets",
406bd33150aSjeremylt                                 CeedElemRestrictionGetOffsets_Ref);
407e15f9bd0SJeremy L Thompson   CeedChkBackend(ierr);
408fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy",
409e15f9bd0SJeremy L Thompson                                 CeedElemRestrictionDestroy_Ref); CeedChkBackend(ierr);
410d979a051Sjeremylt 
411d1d35e2fSjeremylt   // Set apply function based upon num_comp, blk_size, and comp_stride
412d979a051Sjeremylt   CeedInt idx = -1;
413d1d35e2fSjeremylt   if (blk_size < 10)
414d1d35e2fSjeremylt     idx = 100*num_comp + 10*blk_size + (comp_stride == 1);
415d979a051Sjeremylt   switch (idx) {
416d979a051Sjeremylt   case 110:
417d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_110;
418d979a051Sjeremylt     break;
419d979a051Sjeremylt   case 111:
420d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_111;
421d979a051Sjeremylt     break;
422d979a051Sjeremylt   case 180:
423d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_180;
424d979a051Sjeremylt     break;
425d979a051Sjeremylt   case 181:
426d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_181;
427d979a051Sjeremylt     break;
428d979a051Sjeremylt   case 310:
429d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_310;
430d979a051Sjeremylt     break;
431d979a051Sjeremylt   case 311:
432d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_311;
433d979a051Sjeremylt     break;
434d979a051Sjeremylt   case 380:
435d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_380;
436d979a051Sjeremylt     break;
437d979a051Sjeremylt   case 381:
438d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_381;
439d979a051Sjeremylt     break;
440bf4d1581Sjeremylt   // LCOV_EXCL_START
441d979a051Sjeremylt   case 510:
442d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_510;
443d979a051Sjeremylt     break;
444bf4d1581Sjeremylt   // LCOV_EXCL_STOP
445d979a051Sjeremylt   case 511:
446d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_511;
447d979a051Sjeremylt     break;
448bf4d1581Sjeremylt   // LCOV_EXCL_START
449d979a051Sjeremylt   case 580:
450d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_580;
451d979a051Sjeremylt     break;
452bf4d1581Sjeremylt   // LCOV_EXCL_STOP
453d979a051Sjeremylt   case 581:
454d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_581;
455d979a051Sjeremylt     break;
456d979a051Sjeremylt   default:
457d979a051Sjeremylt     impl->Apply = CeedElemRestrictionApply_Ref_Core;
458d979a051Sjeremylt     break;
459d979a051Sjeremylt   }
460d979a051Sjeremylt 
461e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
46221617c04Sjeremylt }
463fc0567d9Srezgarshakeri 
464fc0567d9Srezgarshakeri //------------------------------------------------------------------------------
465fc0567d9Srezgarshakeri // ElemRestriction Create Oriented
466fc0567d9Srezgarshakeri //------------------------------------------------------------------------------
467fc0567d9Srezgarshakeri int CeedElemRestrictionCreateOriented_Ref(CeedMemType mem_type,
468fc0567d9Srezgarshakeri     CeedCopyMode copy_mode,
469fc0567d9Srezgarshakeri     const CeedInt *offsets, const bool *orient,
470fc0567d9Srezgarshakeri     CeedElemRestriction r) {
471fc0567d9Srezgarshakeri   int ierr;
472fc0567d9Srezgarshakeri   CeedElemRestriction_Ref *impl;
473fc0567d9Srezgarshakeri   CeedInt num_elem, elem_size;
474fc0567d9Srezgarshakeri   // Set up for normal restriction with explicit offsets. This sets up dispatch to
475fc0567d9Srezgarshakeri   // CeedElemRestrictionApply_Ref_* and manages the impl->offsets array copy/allocation.
476fc0567d9Srezgarshakeri   ierr = CeedElemRestrictionCreate_Ref(mem_type, copy_mode, offsets, r);
477fc0567d9Srezgarshakeri   CeedChkBackend(ierr);
478fc0567d9Srezgarshakeri 
479fc0567d9Srezgarshakeri   ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr);
480fc0567d9Srezgarshakeri   ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr);
481fc0567d9Srezgarshakeri   ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr);
482fc0567d9Srezgarshakeri   switch (copy_mode) {
483fc0567d9Srezgarshakeri   case CEED_COPY_VALUES:
484fc0567d9Srezgarshakeri     ierr = CeedMalloc(num_elem * elem_size, &impl->orient_allocated);
485fc0567d9Srezgarshakeri     CeedChkBackend(ierr);
486fc0567d9Srezgarshakeri     memcpy(impl->orient_allocated, orient,
487fc0567d9Srezgarshakeri            num_elem * elem_size * sizeof(orient[0]));
488fc0567d9Srezgarshakeri     impl->orient = impl->orient_allocated;
489fc0567d9Srezgarshakeri     break;
490fc0567d9Srezgarshakeri   case CEED_OWN_POINTER:
491fc0567d9Srezgarshakeri     impl->orient_allocated = (bool *)orient;
492fc0567d9Srezgarshakeri     impl->orient = impl->orient_allocated;
493fc0567d9Srezgarshakeri     break;
494fc0567d9Srezgarshakeri   case CEED_USE_POINTER:
495fc0567d9Srezgarshakeri     impl->orient = orient;
496fc0567d9Srezgarshakeri   }
497fc0567d9Srezgarshakeri   return CEED_ERROR_SUCCESS;
498fc0567d9Srezgarshakeri }
499f10650afSjeremylt //------------------------------------------------------------------------------
500