xref: /libCEED/rust/libceed-sys/c-src/backends/ref/ceed-ref-restriction.c (revision 2b730f8b5a9c809740a0b3b302db43a719c636b1)
13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
321617c04Sjeremylt //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
521617c04Sjeremylt //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
721617c04Sjeremylt 
8ec3da8bcSJed Brown #include <ceed/backend.h>
9*2b730f8bSJeremy L Thompson #include <ceed/ceed.h>
103d576824SJeremy L Thompson #include <stdbool.h>
113d576824SJeremy L Thompson #include <string.h>
12*2b730f8bSJeremy L Thompson 
1321617c04Sjeremylt #include "ceed-ref.h"
1421617c04Sjeremylt 
15f10650afSjeremylt //------------------------------------------------------------------------------
16f10650afSjeremylt // Core ElemRestriction Apply Code
17f10650afSjeremylt //------------------------------------------------------------------------------
18*2b730f8bSJeremy L Thompson static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
19*2b730f8bSJeremy L Thompson                                                     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
20*2b730f8bSJeremy L Thompson                                                     CeedRequest *request) {
214ce2993fSjeremylt   CeedElemRestriction_Ref *impl;
22*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
2321617c04Sjeremylt   const CeedScalar *uu;
2421617c04Sjeremylt   CeedScalar       *vv;
25d1d35e2fSjeremylt   CeedInt           num_elem, elem_size, v_offset;
26*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
27*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
28d1d35e2fSjeremylt   v_offset = start * blk_size * elem_size * num_comp;
2921617c04Sjeremylt 
30b435c5a6Srezgarshakeri   bool is_oriented;
31*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionIsOriented(r, &is_oriented));
32*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
339c774eddSJeremy L Thompson   if (t_mode == CEED_TRANSPOSE) {
349c774eddSJeremy L Thompson     // Sum into for transpose mode, e-vec to l-vec
35*2b730f8bSJeremy L Thompson     CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
369c774eddSJeremy L Thompson   } else {
379c774eddSJeremy L Thompson     // Overwrite for notranspose mode, l-vec to e-vec
38*2b730f8bSJeremy L Thompson     CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
399c774eddSJeremy L Thompson   }
407f90ec76Sjeremylt   // Restriction from L-vector to E-vector
4121617c04Sjeremylt   // Perform: v = r * u
42d1d35e2fSjeremylt   if (t_mode == CEED_NOTRANSPOSE) {
43d979a051Sjeremylt     // No offsets provided, Identity Restriction
44d979a051Sjeremylt     if (!impl->offsets) {
45d1d35e2fSjeremylt       bool has_backend_strides;
46*2b730f8bSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides));
47d1d35e2fSjeremylt       if (has_backend_strides) {
48d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
497f90ec76Sjeremylt         // This if branch is left separate to allow better inlining
50*2b730f8bSJeremy L Thompson         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
51*2b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
52*2b730f8bSJeremy L Thompson             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
53*2b730f8bSJeremy L Thompson               CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
54*2b730f8bSJeremy L Thompson                 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
55*2b730f8bSJeremy L Thompson                     uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp];
56*2b730f8bSJeremy L Thompson               }
57*2b730f8bSJeremy L Thompson             }
58*2b730f8bSJeremy L Thompson           }
59*2b730f8bSJeremy L Thompson         }
607f90ec76Sjeremylt       } else {
617f90ec76Sjeremylt         // User provided strides
627f90ec76Sjeremylt         CeedInt strides[3];
63*2b730f8bSJeremy L Thompson         CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides));
64*2b730f8bSJeremy L Thompson         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
65*2b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
66*2b730f8bSJeremy L Thompson             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
67*2b730f8bSJeremy L Thompson               CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
68*2b730f8bSJeremy L Thompson                 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
69*2b730f8bSJeremy L Thompson                     uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]];
70*2b730f8bSJeremy L Thompson               }
71*2b730f8bSJeremy L Thompson             }
72*2b730f8bSJeremy L Thompson           }
73*2b730f8bSJeremy L Thompson         }
747509a596Sjeremylt       }
7521617c04Sjeremylt     } else {
76d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
77d1d35e2fSjeremylt       // vv has shape [elem_size, num_comp, num_elem], row-major
78d1d35e2fSjeremylt       // uu has shape [nnodes, num_comp]
79*2b730f8bSJeremy L Thompson       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
80*2b730f8bSJeremy L Thompson         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
81*2b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
82*2b730f8bSJeremy L Thompson             vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] =
83*2b730f8bSJeremy L Thompson                 uu[impl->offsets[i + elem_size * e] + k * comp_stride] * (is_oriented && impl->orient[i + elem_size * e] ? -1. : 1.);
84*2b730f8bSJeremy L Thompson           }
85*2b730f8bSJeremy L Thompson         }
86*2b730f8bSJeremy L Thompson       }
87b435c5a6Srezgarshakeri     }
8821617c04Sjeremylt   } else {
897f90ec76Sjeremylt     // Restriction from E-vector to L-vector
908d94b059Sjeremylt     // Performing v += r^T * u
91d979a051Sjeremylt     // No offsets provided, Identity Restriction
92d979a051Sjeremylt     if (!impl->offsets) {
93d1d35e2fSjeremylt       bool has_backend_strides;
94*2b730f8bSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides));
95d1d35e2fSjeremylt       if (has_backend_strides) {
96d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
977f90ec76Sjeremylt         // This if brach is left separate to allow better inlining
98*2b730f8bSJeremy L Thompson         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
99*2b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
100*2b730f8bSJeremy L Thompson             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
101*2b730f8bSJeremy L Thompson               CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) {
102*2b730f8bSJeremy L Thompson                 vv[n + k * elem_size + (e + j) * elem_size * num_comp] +=
103*2b730f8bSJeremy L Thompson                     uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset];
104*2b730f8bSJeremy L Thompson               }
105*2b730f8bSJeremy L Thompson             }
106*2b730f8bSJeremy L Thompson           }
107*2b730f8bSJeremy L Thompson         }
1087f90ec76Sjeremylt       } else {
1097f90ec76Sjeremylt         // User provided strides
1107f90ec76Sjeremylt         CeedInt strides[3];
111*2b730f8bSJeremy L Thompson         CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides));
112*2b730f8bSJeremy L Thompson         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
113*2b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
114*2b730f8bSJeremy L Thompson             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
115*2b730f8bSJeremy L Thompson               CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) {
116*2b730f8bSJeremy L Thompson                 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] +=
117*2b730f8bSJeremy L Thompson                     uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset];
118*2b730f8bSJeremy L Thompson               }
119*2b730f8bSJeremy L Thompson             }
120*2b730f8bSJeremy L Thompson           }
121*2b730f8bSJeremy L Thompson         }
122523b8ea0Sjeremylt       }
12321617c04Sjeremylt     } else {
124d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
125d1d35e2fSjeremylt       // uu has shape [elem_size, num_comp, num_elem]
126d1d35e2fSjeremylt       // vv has shape [nnodes, num_comp]
127*2b730f8bSJeremy L Thompson       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
128*2b730f8bSJeremy L Thompson         for (CeedInt k = 0; k < num_comp; k++) {
129*2b730f8bSJeremy L Thompson           for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
1308d94b059Sjeremylt             // Iteration bound set to discard padding elements
131*2b730f8bSJeremy L Thompson             for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
132*2b730f8bSJeremy L Thompson               vv[impl->offsets[j + e * elem_size] + k * comp_stride] +=
133*2b730f8bSJeremy L Thompson                   uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset] * (is_oriented && impl->orient[j + e * elem_size] ? -1. : 1.);
13421617c04Sjeremylt             }
135b435c5a6Srezgarshakeri           }
136*2b730f8bSJeremy L Thompson         }
137*2b730f8bSJeremy L Thompson       }
138*2b730f8bSJeremy L Thompson     }
139*2b730f8bSJeremy L Thompson   }
140*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
141*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
142*2b730f8bSJeremy L Thompson   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
143e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
14421617c04Sjeremylt }
14521617c04Sjeremylt 
146f10650afSjeremylt //------------------------------------------------------------------------------
147f10650afSjeremylt // ElemRestriction Apply - Common Sizes
148f10650afSjeremylt //------------------------------------------------------------------------------
149*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
150*2b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
151*2b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, t_mode, u, v, request);
152d979a051Sjeremylt }
153d979a051Sjeremylt 
154*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
155*2b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
156*2b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, u, v, request);
1574d2a38eeSjeremylt }
1584d2a38eeSjeremylt 
159*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
160*2b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
161*2b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, t_mode, u, v, request);
1629c36149bSjeremylt }
1639c36149bSjeremylt 
164*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
165*2b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
166*2b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, u, v, request);
1679c36149bSjeremylt }
1689c36149bSjeremylt 
169*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
170*2b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
171*2b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, t_mode, u, v, request);
172d979a051Sjeremylt }
173d979a051Sjeremylt 
174*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
175*2b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
176*2b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, u, v, request);
177d979a051Sjeremylt }
178d979a051Sjeremylt 
179*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
180*2b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
181*2b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, t_mode, u, v, request);
182d979a051Sjeremylt }
183d979a051Sjeremylt 
184*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
185*2b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
186*2b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, u, v, request);
187d979a051Sjeremylt }
188d979a051Sjeremylt 
189bf4d1581Sjeremylt // LCOV_EXCL_START
190*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
191*2b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
192*2b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, t_mode, u, v, request);
193d979a051Sjeremylt }
194bf4d1581Sjeremylt // LCOV_EXCL_STOP
195d979a051Sjeremylt 
196*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
197*2b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
198*2b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, u, v, request);
199d979a051Sjeremylt }
200d979a051Sjeremylt 
201bf4d1581Sjeremylt // LCOV_EXCL_START
202*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
203*2b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
204*2b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, t_mode, u, v, request);
205d979a051Sjeremylt }
206bf4d1581Sjeremylt // LCOV_EXCL_STOP
207d979a051Sjeremylt 
208*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
209*2b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
210*2b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, u, v, request);
2114d2a38eeSjeremylt }
2124d2a38eeSjeremylt 
213f10650afSjeremylt //------------------------------------------------------------------------------
214f10650afSjeremylt // ElemRestriction Apply
215f10650afSjeremylt //------------------------------------------------------------------------------
216*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
217d1d35e2fSjeremylt   CeedInt num_blk, blk_size, num_comp, comp_stride;
218*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
219*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
220*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
221*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
2227509a596Sjeremylt   CeedElemRestriction_Ref *impl;
223*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
2244d2a38eeSjeremylt 
225*2b730f8bSJeremy L Thompson   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v, request);
2269c36149bSjeremylt }
227be9261b7Sjeremylt 
228f10650afSjeremylt //------------------------------------------------------------------------------
229f10650afSjeremylt // ElemRestriction Apply Block
230f10650afSjeremylt //------------------------------------------------------------------------------
231*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
232074cb416Sjeremylt                                              CeedRequest *request) {
233d1d35e2fSjeremylt   CeedInt blk_size, num_comp, comp_stride;
234*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
235*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
236*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
2377509a596Sjeremylt   CeedElemRestriction_Ref *impl;
238*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
2394d2a38eeSjeremylt 
240*2b730f8bSJeremy L Thompson   return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, t_mode, u, v, request);
2419c36149bSjeremylt }
242be9261b7Sjeremylt 
243f10650afSjeremylt //------------------------------------------------------------------------------
244bd33150aSjeremylt // ElemRestriction Get Offsets
245bd33150aSjeremylt //------------------------------------------------------------------------------
246*2b730f8bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) {
247bd33150aSjeremylt   CeedElemRestriction_Ref *impl;
248*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
249bd33150aSjeremylt   Ceed ceed;
250*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
251bd33150aSjeremylt 
252*2b730f8bSJeremy L Thompson   if (mem_type != CEED_MEM_HOST) {
253bd33150aSjeremylt     // LCOV_EXCL_START
254e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
255bd33150aSjeremylt     // LCOV_EXCL_STOP
256*2b730f8bSJeremy L Thompson   }
257bd33150aSjeremylt 
258bd33150aSjeremylt   *offsets = impl->offsets;
259e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
260bd33150aSjeremylt }
261bd33150aSjeremylt 
262bd33150aSjeremylt //------------------------------------------------------------------------------
263f10650afSjeremylt // ElemRestriction Destroy
264f10650afSjeremylt //------------------------------------------------------------------------------
26521617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
266fe2413ffSjeremylt   CeedElemRestriction_Ref *impl;
267*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
26821617c04Sjeremylt 
269*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl->offsets_allocated));
270*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl->orient_allocated));
271*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl));
272e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
27321617c04Sjeremylt }
27421617c04Sjeremylt 
275f10650afSjeremylt //------------------------------------------------------------------------------
276f10650afSjeremylt // ElemRestriction Create
277f10650afSjeremylt //------------------------------------------------------------------------------
278*2b730f8bSJeremy L Thompson int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, CeedElemRestriction r) {
27921617c04Sjeremylt   CeedElemRestriction_Ref *impl;
280d1d35e2fSjeremylt   CeedInt                  num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride;
281*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
282*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
283*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
284*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
285*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
286*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
2874ce2993fSjeremylt   Ceed ceed;
288*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
28921617c04Sjeremylt 
290*2b730f8bSJeremy L Thompson   if (mem_type != CEED_MEM_HOST) {
291c042f62fSJeremy L Thompson     // LCOV_EXCL_START
292e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
293c042f62fSJeremy L Thompson     // LCOV_EXCL_STOP
294*2b730f8bSJeremy L Thompson   }
295*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedCalloc(1, &impl));
2963661185eSjeremylt 
29792fe105eSJeremy L Thompson   // Offsets data
298d1d35e2fSjeremylt   bool is_strided;
299*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionIsStrided(r, &is_strided));
300d1d35e2fSjeremylt   if (!is_strided) {
30192fe105eSJeremy L Thompson     // Check indices for ref or memcheck backends
302d1d35e2fSjeremylt     Ceed parent_ceed = ceed, curr_ceed = NULL;
303d1d35e2fSjeremylt     while (parent_ceed != curr_ceed) {
304d1d35e2fSjeremylt       curr_ceed = parent_ceed;
305*2b730f8bSJeremy L Thompson       CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed));
3063661185eSjeremylt     }
3073661185eSjeremylt     const char *resource;
308*2b730f8bSJeremy L Thompson     CeedCallBackend(CeedGetResource(parent_ceed, &resource));
309*2b730f8bSJeremy L Thompson     if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") ||
310d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
311e79b91d9SJeremy L Thompson       CeedSize l_size;
312*2b730f8bSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size));
3133661185eSjeremylt 
314*2b730f8bSJeremy L Thompson       for (CeedInt i = 0; i < num_elem * elem_size; i++) {
315*2b730f8bSJeremy L Thompson         if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride) {
3163661185eSjeremylt           // LCOV_EXCL_START
317*2b730f8bSJeremy L Thompson           return CeedError(ceed, CEED_ERROR_BACKEND, "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i,
318*2b730f8bSJeremy L Thompson                            offsets[i], l_size);
3193661185eSjeremylt           // LCOV_EXCL_STOP
3203661185eSjeremylt         }
321*2b730f8bSJeremy L Thompson       }
322*2b730f8bSJeremy L Thompson     }
3233661185eSjeremylt 
32492fe105eSJeremy L Thompson     // Copy data
325d1d35e2fSjeremylt     switch (copy_mode) {
32621617c04Sjeremylt       case CEED_COPY_VALUES:
327*2b730f8bSJeremy L Thompson         CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated));
328*2b730f8bSJeremy L Thompson         memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0]));
329d979a051Sjeremylt         impl->offsets = impl->offsets_allocated;
33021617c04Sjeremylt         break;
33121617c04Sjeremylt       case CEED_OWN_POINTER:
332d979a051Sjeremylt         impl->offsets_allocated = (CeedInt *)offsets;
333d979a051Sjeremylt         impl->offsets           = impl->offsets_allocated;
33421617c04Sjeremylt         break;
33521617c04Sjeremylt       case CEED_USE_POINTER:
336d979a051Sjeremylt         impl->offsets = offsets;
33721617c04Sjeremylt     }
33892fe105eSJeremy L Thompson   }
339fe2413ffSjeremylt 
340*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionSetData(r, impl));
341d1d35e2fSjeremylt   CeedInt layout[3] = {1, elem_size, elem_size * num_comp};
342*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionSetELayout(r, layout));
343*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref));
344*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref));
345*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref));
346*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref));
347d979a051Sjeremylt 
348d1d35e2fSjeremylt   // Set apply function based upon num_comp, blk_size, and comp_stride
349d979a051Sjeremylt   CeedInt idx = -1;
350*2b730f8bSJeremy L Thompson   if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1);
351d979a051Sjeremylt   switch (idx) {
352d979a051Sjeremylt     case 110:
353d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_110;
354d979a051Sjeremylt       break;
355d979a051Sjeremylt     case 111:
356d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_111;
357d979a051Sjeremylt       break;
358d979a051Sjeremylt     case 180:
359d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_180;
360d979a051Sjeremylt       break;
361d979a051Sjeremylt     case 181:
362d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_181;
363d979a051Sjeremylt       break;
364d979a051Sjeremylt     case 310:
365d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_310;
366d979a051Sjeremylt       break;
367d979a051Sjeremylt     case 311:
368d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_311;
369d979a051Sjeremylt       break;
370d979a051Sjeremylt     case 380:
371d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_380;
372d979a051Sjeremylt       break;
373d979a051Sjeremylt     case 381:
374d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_381;
375d979a051Sjeremylt       break;
376bf4d1581Sjeremylt     // LCOV_EXCL_START
377d979a051Sjeremylt     case 510:
378d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_510;
379d979a051Sjeremylt       break;
380bf4d1581Sjeremylt     // LCOV_EXCL_STOP
381d979a051Sjeremylt     case 511:
382d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_511;
383d979a051Sjeremylt       break;
384bf4d1581Sjeremylt     // LCOV_EXCL_START
385d979a051Sjeremylt     case 580:
386d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_580;
387d979a051Sjeremylt       break;
388bf4d1581Sjeremylt     // LCOV_EXCL_STOP
389d979a051Sjeremylt     case 581:
390d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_581;
391d979a051Sjeremylt       break;
392d979a051Sjeremylt     default:
393d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_Core;
394d979a051Sjeremylt       break;
395d979a051Sjeremylt   }
396d979a051Sjeremylt 
397e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
39821617c04Sjeremylt }
399fc0567d9Srezgarshakeri 
400fc0567d9Srezgarshakeri //------------------------------------------------------------------------------
401fc0567d9Srezgarshakeri // ElemRestriction Create Oriented
402fc0567d9Srezgarshakeri //------------------------------------------------------------------------------
403*2b730f8bSJeremy L Thompson int CeedElemRestrictionCreateOriented_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orient,
404fc0567d9Srezgarshakeri                                           CeedElemRestriction r) {
405fc0567d9Srezgarshakeri   CeedElemRestriction_Ref *impl;
406fc0567d9Srezgarshakeri   CeedInt                  num_elem, elem_size;
407fc0567d9Srezgarshakeri   // Set up for normal restriction with explicit offsets. This sets up dispatch to
408fc0567d9Srezgarshakeri   // CeedElemRestrictionApply_Ref_* and manages the impl->offsets array copy/allocation.
409*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionCreate_Ref(mem_type, copy_mode, offsets, r));
410fc0567d9Srezgarshakeri 
411*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
412*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
413*2b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
414fc0567d9Srezgarshakeri   switch (copy_mode) {
415fc0567d9Srezgarshakeri     case CEED_COPY_VALUES:
416*2b730f8bSJeremy L Thompson       CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orient_allocated));
417*2b730f8bSJeremy L Thompson       memcpy(impl->orient_allocated, orient, num_elem * elem_size * sizeof(orient[0]));
418fc0567d9Srezgarshakeri       impl->orient = impl->orient_allocated;
419fc0567d9Srezgarshakeri       break;
420fc0567d9Srezgarshakeri     case CEED_OWN_POINTER:
421fc0567d9Srezgarshakeri       impl->orient_allocated = (bool *)orient;
422fc0567d9Srezgarshakeri       impl->orient           = impl->orient_allocated;
423fc0567d9Srezgarshakeri       break;
424fc0567d9Srezgarshakeri     case CEED_USE_POINTER:
425fc0567d9Srezgarshakeri       impl->orient = orient;
426fc0567d9Srezgarshakeri   }
427fc0567d9Srezgarshakeri   return CEED_ERROR_SUCCESS;
428fc0567d9Srezgarshakeri }
429f10650afSjeremylt //------------------------------------------------------------------------------
430