xref: /libCEED/rust/libceed-sys/c-src/backends/ref/ceed-ref-restriction.c (revision 49aac155e7a09736f56fb3abac0f57dab29f7cbf)
13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
321617c04Sjeremylt //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
521617c04Sjeremylt //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
721617c04Sjeremylt 
8*49aac155SJeremy L Thompson #include <ceed.h>
9ec3da8bcSJed Brown #include <ceed/backend.h>
103d576824SJeremy L Thompson #include <stdbool.h>
113d576824SJeremy L Thompson #include <string.h>
122b730f8bSJeremy L Thompson 
1321617c04Sjeremylt #include "ceed-ref.h"
1421617c04Sjeremylt 
15f10650afSjeremylt //------------------------------------------------------------------------------
16f10650afSjeremylt // Core ElemRestriction Apply Code
17f10650afSjeremylt //------------------------------------------------------------------------------
182b730f8bSJeremy L Thompson static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
192b730f8bSJeremy L Thompson                                                     CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
202b730f8bSJeremy L Thompson                                                     CeedRequest *request) {
214ce2993fSjeremylt   CeedElemRestriction_Ref *impl;
222b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
2321617c04Sjeremylt   const CeedScalar *uu;
2421617c04Sjeremylt   CeedScalar       *vv;
25d1d35e2fSjeremylt   CeedInt           num_elem, elem_size, v_offset;
262b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
272b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
28d1d35e2fSjeremylt   v_offset = start * blk_size * elem_size * num_comp;
2921617c04Sjeremylt 
30b435c5a6Srezgarshakeri   bool is_oriented;
312b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionIsOriented(r, &is_oriented));
322b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu));
339c774eddSJeremy L Thompson   if (t_mode == CEED_TRANSPOSE) {
349c774eddSJeremy L Thompson     // Sum into for transpose mode, e-vec to l-vec
352b730f8bSJeremy L Thompson     CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv));
369c774eddSJeremy L Thompson   } else {
379c774eddSJeremy L Thompson     // Overwrite for notranspose mode, l-vec to e-vec
382b730f8bSJeremy L Thompson     CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv));
399c774eddSJeremy L Thompson   }
407f90ec76Sjeremylt   // Restriction from L-vector to E-vector
4121617c04Sjeremylt   // Perform: v = r * u
42d1d35e2fSjeremylt   if (t_mode == CEED_NOTRANSPOSE) {
43d979a051Sjeremylt     // No offsets provided, Identity Restriction
44d979a051Sjeremylt     if (!impl->offsets) {
45d1d35e2fSjeremylt       bool has_backend_strides;
462b730f8bSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides));
47d1d35e2fSjeremylt       if (has_backend_strides) {
48d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
497f90ec76Sjeremylt         // This if branch is left separate to allow better inlining
502b730f8bSJeremy L Thompson         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
512b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
522b730f8bSJeremy L Thompson             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
532b730f8bSJeremy L Thompson               CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
542b730f8bSJeremy L Thompson                 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
552b730f8bSJeremy L Thompson                     uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp];
562b730f8bSJeremy L Thompson               }
572b730f8bSJeremy L Thompson             }
582b730f8bSJeremy L Thompson           }
592b730f8bSJeremy L Thompson         }
607f90ec76Sjeremylt       } else {
617f90ec76Sjeremylt         // User provided strides
627f90ec76Sjeremylt         CeedInt strides[3];
632b730f8bSJeremy L Thompson         CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides));
642b730f8bSJeremy L Thompson         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
652b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
662b730f8bSJeremy L Thompson             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
672b730f8bSJeremy L Thompson               CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) {
682b730f8bSJeremy L Thompson                 vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] =
692b730f8bSJeremy L Thompson                     uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]];
702b730f8bSJeremy L Thompson               }
712b730f8bSJeremy L Thompson             }
722b730f8bSJeremy L Thompson           }
732b730f8bSJeremy L Thompson         }
747509a596Sjeremylt       }
7521617c04Sjeremylt     } else {
76d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
77d1d35e2fSjeremylt       // vv has shape [elem_size, num_comp, num_elem], row-major
78d1d35e2fSjeremylt       // uu has shape [nnodes, num_comp]
792b730f8bSJeremy L Thompson       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
802b730f8bSJeremy L Thompson         CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
812b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) {
822b730f8bSJeremy L Thompson             vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] =
832b730f8bSJeremy L Thompson                 uu[impl->offsets[i + elem_size * e] + k * comp_stride] * (is_oriented && impl->orient[i + elem_size * e] ? -1. : 1.);
842b730f8bSJeremy L Thompson           }
852b730f8bSJeremy L Thompson         }
862b730f8bSJeremy L Thompson       }
87b435c5a6Srezgarshakeri     }
8821617c04Sjeremylt   } else {
897f90ec76Sjeremylt     // Restriction from E-vector to L-vector
908d94b059Sjeremylt     // Performing v += r^T * u
91d979a051Sjeremylt     // No offsets provided, Identity Restriction
92d979a051Sjeremylt     if (!impl->offsets) {
93d1d35e2fSjeremylt       bool has_backend_strides;
942b730f8bSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides));
95d1d35e2fSjeremylt       if (has_backend_strides) {
96d1d35e2fSjeremylt         // CPU backend strides are {1, elem_size, elem_size*num_comp}
977f90ec76Sjeremylt         // This if brach is left separate to allow better inlining
982b730f8bSJeremy L Thompson         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
992b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
1002b730f8bSJeremy L Thompson             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
1012b730f8bSJeremy L Thompson               CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) {
1022b730f8bSJeremy L Thompson                 vv[n + k * elem_size + (e + j) * elem_size * num_comp] +=
1032b730f8bSJeremy L Thompson                     uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset];
1042b730f8bSJeremy L Thompson               }
1052b730f8bSJeremy L Thompson             }
1062b730f8bSJeremy L Thompson           }
1072b730f8bSJeremy L Thompson         }
1087f90ec76Sjeremylt       } else {
1097f90ec76Sjeremylt         // User provided strides
1107f90ec76Sjeremylt         CeedInt strides[3];
1112b730f8bSJeremy L Thompson         CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides));
1122b730f8bSJeremy L Thompson         for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
1132b730f8bSJeremy L Thompson           CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) {
1142b730f8bSJeremy L Thompson             CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) {
1152b730f8bSJeremy L Thompson               CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) {
1162b730f8bSJeremy L Thompson                 vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] +=
1172b730f8bSJeremy L Thompson                     uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset];
1182b730f8bSJeremy L Thompson               }
1192b730f8bSJeremy L Thompson             }
1202b730f8bSJeremy L Thompson           }
1212b730f8bSJeremy L Thompson         }
122523b8ea0Sjeremylt       }
12321617c04Sjeremylt     } else {
124d979a051Sjeremylt       // Offsets provided, standard or blocked restriction
125d1d35e2fSjeremylt       // uu has shape [elem_size, num_comp, num_elem]
126d1d35e2fSjeremylt       // vv has shape [nnodes, num_comp]
1272b730f8bSJeremy L Thompson       for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) {
1282b730f8bSJeremy L Thompson         for (CeedInt k = 0; k < num_comp; k++) {
1292b730f8bSJeremy L Thompson           for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) {
1308d94b059Sjeremylt             // Iteration bound set to discard padding elements
1312b730f8bSJeremy L Thompson             for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) {
1322b730f8bSJeremy L Thompson               vv[impl->offsets[j + e * elem_size] + k * comp_stride] +=
1332b730f8bSJeremy L Thompson                   uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset] * (is_oriented && impl->orient[j + e * elem_size] ? -1. : 1.);
13421617c04Sjeremylt             }
135b435c5a6Srezgarshakeri           }
1362b730f8bSJeremy L Thompson         }
1372b730f8bSJeremy L Thompson       }
1382b730f8bSJeremy L Thompson     }
1392b730f8bSJeremy L Thompson   }
1402b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu));
1412b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArray(v, &vv));
1422b730f8bSJeremy L Thompson   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL;
143e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
14421617c04Sjeremylt }
14521617c04Sjeremylt 
146f10650afSjeremylt //------------------------------------------------------------------------------
147f10650afSjeremylt // ElemRestriction Apply - Common Sizes
148f10650afSjeremylt //------------------------------------------------------------------------------
1492b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
1502b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
1512b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, t_mode, u, v, request);
152d979a051Sjeremylt }
153d979a051Sjeremylt 
1542b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
1552b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
1562b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, u, v, request);
1574d2a38eeSjeremylt }
1584d2a38eeSjeremylt 
1592b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
1602b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
1612b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, t_mode, u, v, request);
1629c36149bSjeremylt }
1639c36149bSjeremylt 
1642b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
1652b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
1662b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, u, v, request);
1679c36149bSjeremylt }
1689c36149bSjeremylt 
1692b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
1702b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
1712b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, t_mode, u, v, request);
172d979a051Sjeremylt }
173d979a051Sjeremylt 
1742b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
1752b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
1762b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, u, v, request);
177d979a051Sjeremylt }
178d979a051Sjeremylt 
1792b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
1802b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
1812b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, t_mode, u, v, request);
182d979a051Sjeremylt }
183d979a051Sjeremylt 
1842b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
1852b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
1862b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, u, v, request);
187d979a051Sjeremylt }
188d979a051Sjeremylt 
189bf4d1581Sjeremylt // LCOV_EXCL_START
1902b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
1912b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
1922b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, t_mode, u, v, request);
193d979a051Sjeremylt }
194bf4d1581Sjeremylt // LCOV_EXCL_STOP
195d979a051Sjeremylt 
1962b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
1972b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
1982b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, u, v, request);
199d979a051Sjeremylt }
200d979a051Sjeremylt 
201bf4d1581Sjeremylt // LCOV_EXCL_START
2022b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
2032b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
2042b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, t_mode, u, v, request);
205d979a051Sjeremylt }
206bf4d1581Sjeremylt // LCOV_EXCL_STOP
207d979a051Sjeremylt 
2082b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride,
2092b730f8bSJeremy L Thompson                                             CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
2102b730f8bSJeremy L Thompson   return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, u, v, request);
2114d2a38eeSjeremylt }
2124d2a38eeSjeremylt 
213f10650afSjeremylt //------------------------------------------------------------------------------
214f10650afSjeremylt // ElemRestriction Apply
215f10650afSjeremylt //------------------------------------------------------------------------------
2162b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) {
217d1d35e2fSjeremylt   CeedInt num_blk, blk_size, num_comp, comp_stride;
2182b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
2192b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
2202b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
2212b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
2227509a596Sjeremylt   CeedElemRestriction_Ref *impl;
2232b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
2244d2a38eeSjeremylt 
2252b730f8bSJeremy L Thompson   return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v, request);
2269c36149bSjeremylt }
227be9261b7Sjeremylt 
228f10650afSjeremylt //------------------------------------------------------------------------------
229f10650afSjeremylt // ElemRestriction Apply Block
230f10650afSjeremylt //------------------------------------------------------------------------------
2312b730f8bSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v,
232074cb416Sjeremylt                                              CeedRequest *request) {
233d1d35e2fSjeremylt   CeedInt blk_size, num_comp, comp_stride;
2342b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
2352b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
2362b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
2377509a596Sjeremylt   CeedElemRestriction_Ref *impl;
2382b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
2394d2a38eeSjeremylt 
2402b730f8bSJeremy L Thompson   return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, t_mode, u, v, request);
2419c36149bSjeremylt }
242be9261b7Sjeremylt 
243f10650afSjeremylt //------------------------------------------------------------------------------
244bd33150aSjeremylt // ElemRestriction Get Offsets
245bd33150aSjeremylt //------------------------------------------------------------------------------
2462b730f8bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) {
247bd33150aSjeremylt   CeedElemRestriction_Ref *impl;
2482b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl));
249bd33150aSjeremylt   Ceed ceed;
2502b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed));
251bd33150aSjeremylt 
2522b730f8bSJeremy L Thompson   if (mem_type != CEED_MEM_HOST) {
253bd33150aSjeremylt     // LCOV_EXCL_START
254e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory");
255bd33150aSjeremylt     // LCOV_EXCL_STOP
2562b730f8bSJeremy L Thompson   }
257bd33150aSjeremylt 
258bd33150aSjeremylt   *offsets = impl->offsets;
259e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
260bd33150aSjeremylt }
261bd33150aSjeremylt 
262bd33150aSjeremylt //------------------------------------------------------------------------------
263f10650afSjeremylt // ElemRestriction Destroy
264f10650afSjeremylt //------------------------------------------------------------------------------
26521617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
266fe2413ffSjeremylt   CeedElemRestriction_Ref *impl;
2672b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
26821617c04Sjeremylt 
2692b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl->offsets_allocated));
2702b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl->orient_allocated));
2712b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl));
272e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
27321617c04Sjeremylt }
27421617c04Sjeremylt 
275f10650afSjeremylt //------------------------------------------------------------------------------
276f10650afSjeremylt // ElemRestriction Create
277f10650afSjeremylt //------------------------------------------------------------------------------
2782b730f8bSJeremy L Thompson int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, CeedElemRestriction r) {
27921617c04Sjeremylt   CeedElemRestriction_Ref *impl;
280d1d35e2fSjeremylt   CeedInt                  num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride;
2812b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
2822b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
2832b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk));
2842b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size));
2852b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp));
2862b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride));
2874ce2993fSjeremylt   Ceed ceed;
2882b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed));
28921617c04Sjeremylt 
2902b730f8bSJeremy L Thompson   if (mem_type != CEED_MEM_HOST) {
291c042f62fSJeremy L Thompson     // LCOV_EXCL_START
292e15f9bd0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported");
293c042f62fSJeremy L Thompson     // LCOV_EXCL_STOP
2942b730f8bSJeremy L Thompson   }
2952b730f8bSJeremy L Thompson   CeedCallBackend(CeedCalloc(1, &impl));
2963661185eSjeremylt 
29792fe105eSJeremy L Thompson   // Offsets data
298d1d35e2fSjeremylt   bool is_strided;
2992b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionIsStrided(r, &is_strided));
300d1d35e2fSjeremylt   if (!is_strided) {
30192fe105eSJeremy L Thompson     // Check indices for ref or memcheck backends
302d1d35e2fSjeremylt     Ceed parent_ceed = ceed, curr_ceed = NULL;
303d1d35e2fSjeremylt     while (parent_ceed != curr_ceed) {
304d1d35e2fSjeremylt       curr_ceed = parent_ceed;
3052b730f8bSJeremy L Thompson       CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed));
3063661185eSjeremylt     }
3073661185eSjeremylt     const char *resource;
3082b730f8bSJeremy L Thompson     CeedCallBackend(CeedGetResource(parent_ceed, &resource));
3092b730f8bSJeremy L Thompson     if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") ||
310d1d35e2fSjeremylt         !strcmp(resource, "/cpu/self/memcheck/blocked")) {
311e79b91d9SJeremy L Thompson       CeedSize l_size;
3122b730f8bSJeremy L Thompson       CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size));
3133661185eSjeremylt 
3142b730f8bSJeremy L Thompson       for (CeedInt i = 0; i < num_elem * elem_size; i++) {
3152b730f8bSJeremy L Thompson         if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride) {
3163661185eSjeremylt           // LCOV_EXCL_START
3172b730f8bSJeremy L Thompson           return CeedError(ceed, CEED_ERROR_BACKEND, "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i,
3182b730f8bSJeremy L Thompson                            offsets[i], l_size);
3193661185eSjeremylt           // LCOV_EXCL_STOP
3203661185eSjeremylt         }
3212b730f8bSJeremy L Thompson       }
3222b730f8bSJeremy L Thompson     }
3233661185eSjeremylt 
32492fe105eSJeremy L Thompson     // Copy data
325d1d35e2fSjeremylt     switch (copy_mode) {
32621617c04Sjeremylt       case CEED_COPY_VALUES:
3272b730f8bSJeremy L Thompson         CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated));
3282b730f8bSJeremy L Thompson         memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0]));
329d979a051Sjeremylt         impl->offsets = impl->offsets_allocated;
33021617c04Sjeremylt         break;
33121617c04Sjeremylt       case CEED_OWN_POINTER:
332d979a051Sjeremylt         impl->offsets_allocated = (CeedInt *)offsets;
333d979a051Sjeremylt         impl->offsets           = impl->offsets_allocated;
33421617c04Sjeremylt         break;
33521617c04Sjeremylt       case CEED_USE_POINTER:
336d979a051Sjeremylt         impl->offsets = offsets;
33721617c04Sjeremylt     }
33892fe105eSJeremy L Thompson   }
339fe2413ffSjeremylt 
3402b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionSetData(r, impl));
341d1d35e2fSjeremylt   CeedInt layout[3] = {1, elem_size, elem_size * num_comp};
3422b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionSetELayout(r, layout));
3432b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref));
3442b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref));
3452b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref));
3462b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref));
347d979a051Sjeremylt 
348d1d35e2fSjeremylt   // Set apply function based upon num_comp, blk_size, and comp_stride
349d979a051Sjeremylt   CeedInt idx = -1;
3502b730f8bSJeremy L Thompson   if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1);
351d979a051Sjeremylt   switch (idx) {
352d979a051Sjeremylt     case 110:
353d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_110;
354d979a051Sjeremylt       break;
355d979a051Sjeremylt     case 111:
356d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_111;
357d979a051Sjeremylt       break;
358d979a051Sjeremylt     case 180:
359d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_180;
360d979a051Sjeremylt       break;
361d979a051Sjeremylt     case 181:
362d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_181;
363d979a051Sjeremylt       break;
364d979a051Sjeremylt     case 310:
365d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_310;
366d979a051Sjeremylt       break;
367d979a051Sjeremylt     case 311:
368d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_311;
369d979a051Sjeremylt       break;
370d979a051Sjeremylt     case 380:
371d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_380;
372d979a051Sjeremylt       break;
373d979a051Sjeremylt     case 381:
374d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_381;
375d979a051Sjeremylt       break;
376bf4d1581Sjeremylt     // LCOV_EXCL_START
377d979a051Sjeremylt     case 510:
378d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_510;
379d979a051Sjeremylt       break;
380bf4d1581Sjeremylt     // LCOV_EXCL_STOP
381d979a051Sjeremylt     case 511:
382d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_511;
383d979a051Sjeremylt       break;
384bf4d1581Sjeremylt     // LCOV_EXCL_START
385d979a051Sjeremylt     case 580:
386d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_580;
387d979a051Sjeremylt       break;
388bf4d1581Sjeremylt     // LCOV_EXCL_STOP
389d979a051Sjeremylt     case 581:
390d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_581;
391d979a051Sjeremylt       break;
392d979a051Sjeremylt     default:
393d979a051Sjeremylt       impl->Apply = CeedElemRestrictionApply_Ref_Core;
394d979a051Sjeremylt       break;
395d979a051Sjeremylt   }
396d979a051Sjeremylt 
397e15f9bd0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
39821617c04Sjeremylt }
399fc0567d9Srezgarshakeri 
400fc0567d9Srezgarshakeri //------------------------------------------------------------------------------
401fc0567d9Srezgarshakeri // ElemRestriction Create Oriented
402fc0567d9Srezgarshakeri //------------------------------------------------------------------------------
4032b730f8bSJeremy L Thompson int CeedElemRestrictionCreateOriented_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orient,
404fc0567d9Srezgarshakeri                                           CeedElemRestriction r) {
405fc0567d9Srezgarshakeri   CeedElemRestriction_Ref *impl;
406fc0567d9Srezgarshakeri   CeedInt                  num_elem, elem_size;
407fc0567d9Srezgarshakeri   // Set up for normal restriction with explicit offsets. This sets up dispatch to
408fc0567d9Srezgarshakeri   // CeedElemRestrictionApply_Ref_* and manages the impl->offsets array copy/allocation.
4092b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionCreate_Ref(mem_type, copy_mode, offsets, r));
410fc0567d9Srezgarshakeri 
4112b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetData(r, &impl));
4122b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem));
4132b730f8bSJeremy L Thompson   CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size));
414fc0567d9Srezgarshakeri   switch (copy_mode) {
415fc0567d9Srezgarshakeri     case CEED_COPY_VALUES:
4162b730f8bSJeremy L Thompson       CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orient_allocated));
4172b730f8bSJeremy L Thompson       memcpy(impl->orient_allocated, orient, num_elem * elem_size * sizeof(orient[0]));
418fc0567d9Srezgarshakeri       impl->orient = impl->orient_allocated;
419fc0567d9Srezgarshakeri       break;
420fc0567d9Srezgarshakeri     case CEED_OWN_POINTER:
421fc0567d9Srezgarshakeri       impl->orient_allocated = (bool *)orient;
422fc0567d9Srezgarshakeri       impl->orient           = impl->orient_allocated;
423fc0567d9Srezgarshakeri       break;
424fc0567d9Srezgarshakeri     case CEED_USE_POINTER:
425fc0567d9Srezgarshakeri       impl->orient = orient;
426fc0567d9Srezgarshakeri   }
427fc0567d9Srezgarshakeri   return CEED_ERROR_SUCCESS;
428fc0567d9Srezgarshakeri }
429f10650afSjeremylt //------------------------------------------------------------------------------
430