xref: /libCEED/rust/libceed-sys/c-src/backends/ref/ceed-ref-restriction.c (revision 3bdd4e5a84903d9ef10177c79e65525d59684ccd)
13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
321617c04Sjeremylt //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
521617c04Sjeremylt //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
721617c04Sjeremylt 
849aac155SJeremy L Thompson #include <ceed.h>
9ec3da8bcSJed Brown #include <ceed/backend.h>
103d576824SJeremy L Thompson #include <stdbool.h>
113d576824SJeremy L Thompson #include <string.h>
122b730f8bSJeremy L Thompson 
1321617c04Sjeremylt #include "ceed-ref.h"
1421617c04Sjeremylt 
15f10650afSjeremylt //------------------------------------------------------------------------------
16f10650afSjeremylt // Core ElemRestriction Apply Code
17f10650afSjeremylt //------------------------------------------------------------------------------
182b730f8bSJeremy L Thompson static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
19*3bdd4e5aSSebastian Grimberg                                                     CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u,
20*3bdd4e5aSSebastian Grimberg                                                     CeedVector v, CeedRequest *request) {
214ce2993fSjeremylt   CeedElemRestriction_Ref *impl;
222b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
2321617c04Sjeremylt   const CeedScalar *uu;
2421617c04Sjeremylt   CeedScalar       *vv;
25d1d35e2fSjeremylt   CeedInt           num_elem, elem_size, v_offset;
262b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
272b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
28d1d35e2fSjeremylt   v_offset = start * blk_size * elem_size * num_comp;
2921617c04Sjeremylt 
30b435c5a6Srezgarshakeri   bool is_oriented;
312b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionIsOriented(r, &is_oriented));
322b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
339c774eddSJeremy L Thompson   if (t_mode == CEED_TRANSPOSE) {
349c774eddSJeremy L Thompson     // Sum into for transpose mode, e-vec to l-vec
352b730f8bSJeremy L Thompson     CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
369c774eddSJeremy L Thompson   } else {
379c774eddSJeremy L Thompson     // Overwrite for notranspose mode, l-vec to e-vec
382b730f8bSJeremy L Thompson     CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
399c774eddSJeremy L Thompson   }
407f90ec76Sjeremylt   // Restriction from L-vector to E-vector
4121617c04Sjeremylt   // Perform: v = r * u
42d1d35e2fSjeremylt   if (t_mode == CEED_NOTRANSPOSE) {
43d979a051Sjeremylt     // No offsets provided, Identity Restriction
44d979a051Sjeremylt     if (!impl->offsets) {
45d1d35e2fSjeremylt       bool has_backend_strides;
462b730f8bSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides));
47d1d35e2fSjeremylt       if (has_backend_strides) {
48d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
497f90ec76Sjeremylt         // This if branch is left separate to allow better inlining
502b730f8bSJeremy L Thompson         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
512b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
522b730f8bSJeremy L Thompson             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
532b730f8bSJeremy L Thompson               CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
542b730f8bSJeremy L Thompson                 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
552b730f8bSJeremy L Thompson                     uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp];
562b730f8bSJeremy L Thompson               }
572b730f8bSJeremy L Thompson             }
582b730f8bSJeremy L Thompson           }
592b730f8bSJeremy L Thompson         }
607f90ec76Sjeremylt       } else {
617f90ec76Sjeremylt         // User provided strides
627f90ec76Sjeremylt         CeedInt strides[3];
632b730f8bSJeremy L Thompson         CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides));
642b730f8bSJeremy L Thompson         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
652b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
662b730f8bSJeremy L Thompson             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
672b730f8bSJeremy L Thompson               CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
682b730f8bSJeremy L Thompson                 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
692b730f8bSJeremy L Thompson                     uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]];
702b730f8bSJeremy L Thompson               }
712b730f8bSJeremy L Thompson             }
722b730f8bSJeremy L Thompson           }
732b730f8bSJeremy L Thompson         }
747509a596Sjeremylt       }
7521617c04Sjeremylt     } else {
76d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
77d1d35e2fSjeremylt       // vv has shape [elem_size, num_comp, num_elem], row-major
78d1d35e2fSjeremylt       // uu has shape [nnodes, num_comp]
792b730f8bSJeremy L Thompson       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
802b730f8bSJeremy L Thompson         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
812b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
82f30b1135SSebastian Grimberg             if (!use_orient || !impl->orient) {
83f30b1135SSebastian Grimberg               // Unsigned restriction
84*3bdd4e5aSSebastian Grimberg               vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = uu[impl->offsets[i + elem_size * e] + k * comp_stride];
85f30b1135SSebastian Grimberg             } else {
86f30b1135SSebastian Grimberg               // Signed restriction
87f30b1135SSebastian Grimberg               vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] =
88f30b1135SSebastian Grimberg                   uu[impl->offsets[i + elem_size * e] + k * comp_stride] * (impl->orient[i + elem_size * e] ? -1.0 : 1.0);
89f30b1135SSebastian Grimberg             }
902b730f8bSJeremy L Thompson           }
912b730f8bSJeremy L Thompson         }
922b730f8bSJeremy L Thompson       }
93b435c5a6Srezgarshakeri     }
9421617c04Sjeremylt   } else {
957f90ec76Sjeremylt     // Restriction from E-vector to L-vector
968d94b059Sjeremylt     // Performing v += r^T * u
97d979a051Sjeremylt     // No offsets provided, Identity Restriction
98d979a051Sjeremylt     if (!impl->offsets) {
99d1d35e2fSjeremylt       bool has_backend_strides;
1002b730f8bSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides));
101d1d35e2fSjeremylt       if (has_backend_strides) {
102d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
1037f90ec76Sjeremylt         // This if brach is left separate to allow better inlining
1042b730f8bSJeremy L Thompson         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
1052b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
1062b730f8bSJeremy L Thompson             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
1072b730f8bSJeremy L Thompson               CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) {
1082b730f8bSJeremy L Thompson                 vv[n + k * elem_size + (e + j) * elem_size * num_comp] +=
1092b730f8bSJeremy L Thompson                     uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset];
1102b730f8bSJeremy L Thompson               }
1112b730f8bSJeremy L Thompson             }
1122b730f8bSJeremy L Thompson           }
1132b730f8bSJeremy L Thompson         }
1147f90ec76Sjeremylt       } else {
1157f90ec76Sjeremylt         // User provided strides
1167f90ec76Sjeremylt         CeedInt strides[3];
1172b730f8bSJeremy L Thompson         CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides));
1182b730f8bSJeremy L Thompson         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
1192b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
1202b730f8bSJeremy L Thompson             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
1212b730f8bSJeremy L Thompson               CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) {
1222b730f8bSJeremy L Thompson                 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] +=
1232b730f8bSJeremy L Thompson                     uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset];
1242b730f8bSJeremy L Thompson               }
1252b730f8bSJeremy L Thompson             }
1262b730f8bSJeremy L Thompson           }
1272b730f8bSJeremy L Thompson         }
128523b8ea0Sjeremylt       }
12921617c04Sjeremylt     } else {
130d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
131d1d35e2fSjeremylt       // uu has shape [elem_size, num_comp, num_elem]
132d1d35e2fSjeremylt       // vv has shape [nnodes, num_comp]
1332b730f8bSJeremy L Thompson       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
1342b730f8bSJeremy L Thompson         for (CeedInt k = 0; k < num_comp; k++) {
1352b730f8bSJeremy L Thompson           for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
1368d94b059Sjeremylt             // Iteration bound set to discard padding elements
1372b730f8bSJeremy L Thompson             for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
138f30b1135SSebastian Grimberg               if (!use_orient || !impl->orient) {
139f30b1135SSebastian Grimberg                 // Unsigned restriction
140*3bdd4e5aSSebastian Grimberg                 vv[impl->offsets[j + e * elem_size] + k * comp_stride] += uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset];
141f30b1135SSebastian Grimberg               } else {
142f30b1135SSebastian Grimberg                 // Signed restriction
143f30b1135SSebastian Grimberg                 vv[impl->offsets[j + e * elem_size] + k * comp_stride] +=
144f30b1135SSebastian Grimberg                     uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset] * (impl->orient[j + e * elem_size] ? -1.0 : 1.0);
145f30b1135SSebastian Grimberg               }
14621617c04Sjeremylt             }
147b435c5a6Srezgarshakeri           }
1482b730f8bSJeremy L Thompson         }
1492b730f8bSJeremy L Thompson       }
1502b730f8bSJeremy L Thompson     }
1512b730f8bSJeremy L Thompson   }
1522b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
1532b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
1542b730f8bSJeremy L Thompson   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
155e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
15621617c04Sjeremylt }
15721617c04Sjeremylt 
158f10650afSjeremylt //------------------------------------------------------------------------------
159f10650afSjeremylt // ElemRestriction Apply - Common Sizes
160f10650afSjeremylt //------------------------------------------------------------------------------
1612b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
162*3bdd4e5aSSebastian Grimberg                                             CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
163*3bdd4e5aSSebastian Grimberg                                             CeedRequest *request) {
164f30b1135SSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, use_orient, t_mode, u, v, request);
165d979a051Sjeremylt }
166d979a051Sjeremylt 
1672b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
168*3bdd4e5aSSebastian Grimberg                                             CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
169*3bdd4e5aSSebastian Grimberg                                             CeedRequest *request) {
170f30b1135SSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, use_orient, t_mode, u, v, request);
1714d2a38eeSjeremylt }
1724d2a38eeSjeremylt 
1732b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
174*3bdd4e5aSSebastian Grimberg                                             CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
175*3bdd4e5aSSebastian Grimberg                                             CeedRequest *request) {
176f30b1135SSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, use_orient, t_mode, u, v, request);
1779c36149bSjeremylt }
1789c36149bSjeremylt 
1792b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
180*3bdd4e5aSSebastian Grimberg                                             CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
181*3bdd4e5aSSebastian Grimberg                                             CeedRequest *request) {
182f30b1135SSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, use_orient, t_mode, u, v, request);
1839c36149bSjeremylt }
1849c36149bSjeremylt 
1852b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
186*3bdd4e5aSSebastian Grimberg                                             CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
187*3bdd4e5aSSebastian Grimberg                                             CeedRequest *request) {
188f30b1135SSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, use_orient, t_mode, u, v, request);
189d979a051Sjeremylt }
190d979a051Sjeremylt 
1912b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
192*3bdd4e5aSSebastian Grimberg                                             CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
193*3bdd4e5aSSebastian Grimberg                                             CeedRequest *request) {
194f30b1135SSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, use_orient, t_mode, u, v, request);
195d979a051Sjeremylt }
196d979a051Sjeremylt 
1972b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
198*3bdd4e5aSSebastian Grimberg                                             CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
199*3bdd4e5aSSebastian Grimberg                                             CeedRequest *request) {
200f30b1135SSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, use_orient, t_mode, u, v, request);
201d979a051Sjeremylt }
202d979a051Sjeremylt 
2032b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
204*3bdd4e5aSSebastian Grimberg                                             CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
205*3bdd4e5aSSebastian Grimberg                                             CeedRequest *request) {
206f30b1135SSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, use_orient, t_mode, u, v, request);
207d979a051Sjeremylt }
208d979a051Sjeremylt 
209bf4d1581Sjeremylt // LCOV_EXCL_START
2102b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
211*3bdd4e5aSSebastian Grimberg                                             CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
212*3bdd4e5aSSebastian Grimberg                                             CeedRequest *request) {
213f30b1135SSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, use_orient, t_mode, u, v, request);
214d979a051Sjeremylt }
215bf4d1581Sjeremylt // LCOV_EXCL_STOP
216d979a051Sjeremylt 
2172b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
218*3bdd4e5aSSebastian Grimberg                                             CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
219*3bdd4e5aSSebastian Grimberg                                             CeedRequest *request) {
220f30b1135SSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, use_orient, t_mode, u, v, request);
221d979a051Sjeremylt }
222d979a051Sjeremylt 
223bf4d1581Sjeremylt // LCOV_EXCL_START
2242b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
225*3bdd4e5aSSebastian Grimberg                                             CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
226*3bdd4e5aSSebastian Grimberg                                             CeedRequest *request) {
227f30b1135SSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, use_orient, t_mode, u, v, request);
228d979a051Sjeremylt }
229bf4d1581Sjeremylt // LCOV_EXCL_STOP
230d979a051Sjeremylt 
2312b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
232*3bdd4e5aSSebastian Grimberg                                             CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
233*3bdd4e5aSSebastian Grimberg                                             CeedRequest *request) {
234f30b1135SSebastian Grimberg   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, use_orient, t_mode, u, v, request);
2354d2a38eeSjeremylt }
2364d2a38eeSjeremylt 
237f10650afSjeremylt //------------------------------------------------------------------------------
238f10650afSjeremylt // ElemRestriction Apply
239f10650afSjeremylt //------------------------------------------------------------------------------
2402b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
241d1d35e2fSjeremylt   CeedInt num_blk, blk_size, num_comp, comp_stride;
2422b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
2432b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
2442b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
2452b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
2467509a596Sjeremylt   CeedElemRestriction_Ref *impl;
2472b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
2484d2a38eeSjeremylt 
249f30b1135SSebastian Grimberg   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, true, t_mode, u, v, request);
250f30b1135SSebastian Grimberg }
251f30b1135SSebastian Grimberg 
252f30b1135SSebastian Grimberg //------------------------------------------------------------------------------
253f30b1135SSebastian Grimberg // ElemRestriction Apply Unsigned
254f30b1135SSebastian Grimberg //------------------------------------------------------------------------------
255f30b1135SSebastian Grimberg static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
256f30b1135SSebastian Grimberg   CeedInt num_blk, blk_size, num_comp, comp_stride;
257f30b1135SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
258f30b1135SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
259f30b1135SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
260f30b1135SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
261f30b1135SSebastian Grimberg   CeedElemRestriction_Ref *impl;
262f30b1135SSebastian Grimberg   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
263f30b1135SSebastian Grimberg 
264f30b1135SSebastian Grimberg   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, false, t_mode, u, v, request);
2659c36149bSjeremylt }
266be9261b7Sjeremylt 
267f10650afSjeremylt //------------------------------------------------------------------------------
268f10650afSjeremylt // ElemRestriction Apply Block
269f10650afSjeremylt //------------------------------------------------------------------------------
2702b730f8bSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
271074cb416Sjeremylt                                              CeedRequest *request) {
272d1d35e2fSjeremylt   CeedInt blk_size, num_comp, comp_stride;
2732b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
2742b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
2752b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
2767509a596Sjeremylt   CeedElemRestriction_Ref *impl;
2772b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
2784d2a38eeSjeremylt 
279f30b1135SSebastian Grimberg   return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, true, t_mode, u, v, request);
2809c36149bSjeremylt }
281be9261b7Sjeremylt 
282f10650afSjeremylt //------------------------------------------------------------------------------
283bd33150aSjeremylt // ElemRestriction Get Offsets
284bd33150aSjeremylt //------------------------------------------------------------------------------
2852b730f8bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) {
286bd33150aSjeremylt   CeedElemRestriction_Ref *impl;
2872b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
288bd33150aSjeremylt   Ceed ceed;
2892b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
290bd33150aSjeremylt 
2916574a04fSJeremy L Thompson   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
292bd33150aSjeremylt 
293bd33150aSjeremylt   *offsets = impl->offsets;
294e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
295bd33150aSjeremylt }
296bd33150aSjeremylt 
297bd33150aSjeremylt //------------------------------------------------------------------------------
298f10650afSjeremylt // ElemRestriction Destroy
299f10650afSjeremylt //------------------------------------------------------------------------------
30021617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
301fe2413ffSjeremylt   CeedElemRestriction_Ref *impl;
3022b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
30321617c04Sjeremylt 
3042b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl->offsets_allocated));
3052b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl->orient_allocated));
3062b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl));
307e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
30821617c04Sjeremylt }
30921617c04Sjeremylt 
310f10650afSjeremylt //------------------------------------------------------------------------------
311f10650afSjeremylt // ElemRestriction Create
312f10650afSjeremylt //------------------------------------------------------------------------------
3132b730f8bSJeremy L Thompson int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, CeedElemRestriction r) {
31421617c04Sjeremylt   CeedElemRestriction_Ref *impl;
315d1d35e2fSjeremylt   CeedInt                  num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride;
3162b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
3172b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
3182b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
3192b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
3202b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
3212b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
3224ce2993fSjeremylt   Ceed ceed;
3232b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
32421617c04Sjeremylt 
3256574a04fSJeremy L Thompson   CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
3262b730f8bSJeremy L Thompson   CeedCallBackend(CeedCalloc(1, &impl));
3273661185eSjeremylt 
32892fe105eSJeremy L Thompson   // Offsets data
329d1d35e2fSjeremylt   bool is_strided;
3302b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionIsStrided(r, &is_strided));
331d1d35e2fSjeremylt   if (!is_strided) {
33292fe105eSJeremy L Thompson     // Check indices for ref or memcheck backends
333d1d35e2fSjeremylt     Ceed parent_ceed = ceed, curr_ceed = NULL;
334d1d35e2fSjeremylt     while (parent_ceed != curr_ceed) {
335d1d35e2fSjeremylt       curr_ceed = parent_ceed;
3362b730f8bSJeremy L Thompson       CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed));
3373661185eSjeremylt     }
3383661185eSjeremylt     const char *resource;
3392b730f8bSJeremy L Thompson     CeedCallBackend(CeedGetResource(parent_ceed, &resource));
3402b730f8bSJeremy L Thompson     if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") ||
341d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
342e79b91d9SJeremy L Thompson       CeedSize l_size;
3432b730f8bSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size));
3443661185eSjeremylt 
3452b730f8bSJeremy L Thompson       for (CeedInt i = 0; i < num_elem * elem_size; i++) {
3466574a04fSJeremy L Thompson         CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND,
3476574a04fSJeremy L Thompson                   "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size);
3482b730f8bSJeremy L Thompson       }
3492b730f8bSJeremy L Thompson     }
3503661185eSjeremylt 
35192fe105eSJeremy L Thompson     // Copy data
352d1d35e2fSjeremylt     switch (copy_mode) {
35321617c04Sjeremylt       case CEED_COPY_VALUES:
3542b730f8bSJeremy L Thompson         CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated));
3552b730f8bSJeremy L Thompson         memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0]));
356d979a051Sjeremylt         impl->offsets = impl->offsets_allocated;
35721617c04Sjeremylt         break;
35821617c04Sjeremylt       case CEED_OWN_POINTER:
359d979a051Sjeremylt         impl->offsets_allocated = (CeedInt *)offsets;
360d979a051Sjeremylt         impl->offsets           = impl->offsets_allocated;
36121617c04Sjeremylt         break;
36221617c04Sjeremylt       case CEED_USE_POINTER:
363d979a051Sjeremylt         impl->offsets = offsets;
36421617c04Sjeremylt     }
36592fe105eSJeremy L Thompson   }
366fe2413ffSjeremylt 
3672b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionSetData(r, impl));
368d1d35e2fSjeremylt   CeedInt layout[3] = {1, elem_size, elem_size * num_comp};
3692b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionSetELayout(r, layout));
3702b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref));
371f30b1135SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref));
3722b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref));
3732b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref));
3742b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref));
375d979a051Sjeremylt 
376d1d35e2fSjeremylt   // Set apply function based upon num_comp, blk_size, and comp_stride
377d979a051Sjeremylt   CeedInt idx = -1;
3782b730f8bSJeremy L Thompson   if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1);
379d979a051Sjeremylt   switch (idx) {
380d979a051Sjeremylt     case 110:
381d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_110;
382d979a051Sjeremylt       break;
383d979a051Sjeremylt     case 111:
384d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_111;
385d979a051Sjeremylt       break;
386d979a051Sjeremylt     case 180:
387d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_180;
388d979a051Sjeremylt       break;
389d979a051Sjeremylt     case 181:
390d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_181;
391d979a051Sjeremylt       break;
392d979a051Sjeremylt     case 310:
393d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_310;
394d979a051Sjeremylt       break;
395d979a051Sjeremylt     case 311:
396d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_311;
397d979a051Sjeremylt       break;
398d979a051Sjeremylt     case 380:
399d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_380;
400d979a051Sjeremylt       break;
401d979a051Sjeremylt     case 381:
402d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_381;
403d979a051Sjeremylt       break;
404bf4d1581Sjeremylt     // LCOV_EXCL_START
405d979a051Sjeremylt     case 510:
406d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_510;
407d979a051Sjeremylt       break;
408bf4d1581Sjeremylt     // LCOV_EXCL_STOP
409d979a051Sjeremylt     case 511:
410d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_511;
411d979a051Sjeremylt       break;
412bf4d1581Sjeremylt     // LCOV_EXCL_START
413d979a051Sjeremylt     case 580:
414d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_580;
415d979a051Sjeremylt       break;
416bf4d1581Sjeremylt     // LCOV_EXCL_STOP
417d979a051Sjeremylt     case 581:
418d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_581;
419d979a051Sjeremylt       break;
420d979a051Sjeremylt     default:
421d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_Core;
422d979a051Sjeremylt       break;
423d979a051Sjeremylt   }
424d979a051Sjeremylt 
425e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
42621617c04Sjeremylt }
427fc0567d9Srezgarshakeri 
428fc0567d9Srezgarshakeri //------------------------------------------------------------------------------
429fc0567d9Srezgarshakeri // ElemRestriction Create Oriented
430fc0567d9Srezgarshakeri //------------------------------------------------------------------------------
4312b730f8bSJeremy L Thompson int CeedElemRestrictionCreateOriented_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orient,
432fc0567d9Srezgarshakeri                                           CeedElemRestriction r) {
433fc0567d9Srezgarshakeri   // Set up for normal restriction with explicit offsets. This sets up dispatch to
434fc0567d9Srezgarshakeri   // CeedElemRestrictionApply_Ref_* and manages the impl->offsets array copy/allocation.
435f30b1135SSebastian Grimberg   CeedElemRestriction_Ref *impl;
436f30b1135SSebastian Grimberg   CeedInt                  num_elem, elem_size;
4372b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionCreate_Ref(mem_type, copy_mode, offsets, r));
4382b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
4392b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
4402b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
441f30b1135SSebastian Grimberg 
442f30b1135SSebastian Grimberg   // Copy data
443fc0567d9Srezgarshakeri   switch (copy_mode) {
444fc0567d9Srezgarshakeri     case CEED_COPY_VALUES:
4452b730f8bSJeremy L Thompson       CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orient_allocated));
4462b730f8bSJeremy L Thompson       memcpy(impl->orient_allocated, orient, num_elem * elem_size * sizeof(orient[0]));
447fc0567d9Srezgarshakeri       impl->orient = impl->orient_allocated;
448fc0567d9Srezgarshakeri       break;
449fc0567d9Srezgarshakeri     case CEED_OWN_POINTER:
450fc0567d9Srezgarshakeri       impl->orient_allocated = (bool *)orient;
451fc0567d9Srezgarshakeri       impl->orient           = impl->orient_allocated;
452fc0567d9Srezgarshakeri       break;
453fc0567d9Srezgarshakeri     case CEED_USE_POINTER:
454fc0567d9Srezgarshakeri       impl->orient = orient;
455fc0567d9Srezgarshakeri   }
456f30b1135SSebastian Grimberg 
457fc0567d9Srezgarshakeri   return CEED_ERROR_SUCCESS;
458fc0567d9Srezgarshakeri }
4592a86cc9dSSebastian Grimberg 
460f10650afSjeremylt //------------------------------------------------------------------------------
461