13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 321617c04Sjeremylt // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 521617c04Sjeremylt // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 721617c04Sjeremylt 8*49aac155SJeremy L Thompson #include <ceed.h> 9ec3da8bcSJed Brown #include <ceed/backend.h> 103d576824SJeremy L Thompson #include <stdbool.h> 113d576824SJeremy L Thompson #include <string.h> 122b730f8bSJeremy L Thompson 1321617c04Sjeremylt #include "ceed-ref.h" 1421617c04Sjeremylt 15f10650afSjeremylt //------------------------------------------------------------------------------ 16f10650afSjeremylt // Core ElemRestriction Apply Code 17f10650afSjeremylt //------------------------------------------------------------------------------ 182b730f8bSJeremy L Thompson static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 192b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 202b730f8bSJeremy L Thompson CeedRequest *request) { 214ce2993fSjeremylt CeedElemRestriction_Ref *impl; 222b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 2321617c04Sjeremylt const CeedScalar *uu; 2421617c04Sjeremylt CeedScalar *vv; 25d1d35e2fSjeremylt CeedInt num_elem, elem_size, v_offset; 262b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 272b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 28d1d35e2fSjeremylt v_offset = start * blk_size * elem_size * num_comp; 2921617c04Sjeremylt 30b435c5a6Srezgarshakeri bool is_oriented; 312b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionIsOriented(r, &is_oriented)); 322b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 339c774eddSJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 349c774eddSJeremy L Thompson // Sum into for transpose mode, e-vec to l-vec 352b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 369c774eddSJeremy L Thompson } else { 379c774eddSJeremy L Thompson // Overwrite for notranspose mode, l-vec to e-vec 382b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 399c774eddSJeremy L Thompson } 407f90ec76Sjeremylt // Restriction from L-vector to E-vector 4121617c04Sjeremylt // Perform: v = r * u 42d1d35e2fSjeremylt if (t_mode == CEED_NOTRANSPOSE) { 43d979a051Sjeremylt // No offsets provided, Identity Restriction 44d979a051Sjeremylt if (!impl->offsets) { 45d1d35e2fSjeremylt bool has_backend_strides; 462b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 47d1d35e2fSjeremylt if (has_backend_strides) { 48d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 497f90ec76Sjeremylt // This if branch is left separate to allow better inlining 502b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 512b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 522b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 532b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 542b730f8bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 552b730f8bSJeremy L Thompson uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp]; 562b730f8bSJeremy L Thompson } 572b730f8bSJeremy L Thompson } 582b730f8bSJeremy L Thompson } 592b730f8bSJeremy L Thompson } 607f90ec76Sjeremylt } else { 617f90ec76Sjeremylt // User provided strides 627f90ec76Sjeremylt CeedInt strides[3]; 632b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 642b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 652b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 662b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 672b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 682b730f8bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 692b730f8bSJeremy L Thompson uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 702b730f8bSJeremy L Thompson } 712b730f8bSJeremy L Thompson } 722b730f8bSJeremy L Thompson } 732b730f8bSJeremy L Thompson } 747509a596Sjeremylt } 7521617c04Sjeremylt } else { 76d979a051Sjeremylt // Offsets provided, standard or blocked restriction 77d1d35e2fSjeremylt // vv has shape [elem_size, num_comp, num_elem], row-major 78d1d35e2fSjeremylt // uu has shape [nnodes, num_comp] 792b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 802b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 812b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 822b730f8bSJeremy L Thompson vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = 832b730f8bSJeremy L Thompson uu[impl->offsets[i + elem_size * e] + k * comp_stride] * (is_oriented && impl->orient[i + elem_size * e] ? -1. : 1.); 842b730f8bSJeremy L Thompson } 852b730f8bSJeremy L Thompson } 862b730f8bSJeremy L Thompson } 87b435c5a6Srezgarshakeri } 8821617c04Sjeremylt } else { 897f90ec76Sjeremylt // Restriction from E-vector to L-vector 908d94b059Sjeremylt // Performing v += r^T * u 91d979a051Sjeremylt // No offsets provided, Identity Restriction 92d979a051Sjeremylt if (!impl->offsets) { 93d1d35e2fSjeremylt bool has_backend_strides; 942b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 95d1d35e2fSjeremylt if (has_backend_strides) { 96d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 977f90ec76Sjeremylt // This if brach is left separate to allow better inlining 982b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 992b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 1002b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 1012b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 1022b730f8bSJeremy L Thompson vv[n + k * elem_size + (e + j) * elem_size * num_comp] += 1032b730f8bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 1042b730f8bSJeremy L Thompson } 1052b730f8bSJeremy L Thompson } 1062b730f8bSJeremy L Thompson } 1072b730f8bSJeremy L Thompson } 1087f90ec76Sjeremylt } else { 1097f90ec76Sjeremylt // User provided strides 1107f90ec76Sjeremylt CeedInt strides[3]; 1112b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 1122b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 1132b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 1142b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 1152b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 1162b730f8bSJeremy L Thompson vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 1172b730f8bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 1182b730f8bSJeremy L Thompson } 1192b730f8bSJeremy L Thompson } 1202b730f8bSJeremy L Thompson } 1212b730f8bSJeremy L Thompson } 122523b8ea0Sjeremylt } 12321617c04Sjeremylt } else { 124d979a051Sjeremylt // Offsets provided, standard or blocked restriction 125d1d35e2fSjeremylt // uu has shape [elem_size, num_comp, num_elem] 126d1d35e2fSjeremylt // vv has shape [nnodes, num_comp] 1272b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 1282b730f8bSJeremy L Thompson for (CeedInt k = 0; k < num_comp; k++) { 1292b730f8bSJeremy L Thompson for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 1308d94b059Sjeremylt // Iteration bound set to discard padding elements 1312b730f8bSJeremy L Thompson for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 1322b730f8bSJeremy L Thompson vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 1332b730f8bSJeremy L Thompson uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset] * (is_oriented && impl->orient[j + e * elem_size] ? -1. : 1.); 13421617c04Sjeremylt } 135b435c5a6Srezgarshakeri } 1362b730f8bSJeremy L Thompson } 1372b730f8bSJeremy L Thompson } 1382b730f8bSJeremy L Thompson } 1392b730f8bSJeremy L Thompson } 1402b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 1412b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 1422b730f8bSJeremy L Thompson if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 143e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 14421617c04Sjeremylt } 14521617c04Sjeremylt 146f10650afSjeremylt //------------------------------------------------------------------------------ 147f10650afSjeremylt // ElemRestriction Apply - Common Sizes 148f10650afSjeremylt //------------------------------------------------------------------------------ 1492b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 1502b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 1512b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, t_mode, u, v, request); 152d979a051Sjeremylt } 153d979a051Sjeremylt 1542b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 1552b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 1562b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, u, v, request); 1574d2a38eeSjeremylt } 1584d2a38eeSjeremylt 1592b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 1602b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 1612b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, t_mode, u, v, request); 1629c36149bSjeremylt } 1639c36149bSjeremylt 1642b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 1652b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 1662b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, u, v, request); 1679c36149bSjeremylt } 1689c36149bSjeremylt 1692b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 1702b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 1712b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, t_mode, u, v, request); 172d979a051Sjeremylt } 173d979a051Sjeremylt 1742b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 1752b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 1762b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, u, v, request); 177d979a051Sjeremylt } 178d979a051Sjeremylt 1792b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 1802b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 1812b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, t_mode, u, v, request); 182d979a051Sjeremylt } 183d979a051Sjeremylt 1842b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 1852b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 1862b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, u, v, request); 187d979a051Sjeremylt } 188d979a051Sjeremylt 189bf4d1581Sjeremylt // LCOV_EXCL_START 1902b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 1912b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 1922b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, t_mode, u, v, request); 193d979a051Sjeremylt } 194bf4d1581Sjeremylt // LCOV_EXCL_STOP 195d979a051Sjeremylt 1962b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 1972b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 1982b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, u, v, request); 199d979a051Sjeremylt } 200d979a051Sjeremylt 201bf4d1581Sjeremylt // LCOV_EXCL_START 2022b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 2032b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 2042b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, t_mode, u, v, request); 205d979a051Sjeremylt } 206bf4d1581Sjeremylt // LCOV_EXCL_STOP 207d979a051Sjeremylt 2082b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 2092b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 2102b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, u, v, request); 2114d2a38eeSjeremylt } 2124d2a38eeSjeremylt 213f10650afSjeremylt //------------------------------------------------------------------------------ 214f10650afSjeremylt // ElemRestriction Apply 215f10650afSjeremylt //------------------------------------------------------------------------------ 2162b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 217d1d35e2fSjeremylt CeedInt num_blk, blk_size, num_comp, comp_stride; 2182b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 2192b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 2202b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 2212b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 2227509a596Sjeremylt CeedElemRestriction_Ref *impl; 2232b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 2244d2a38eeSjeremylt 2252b730f8bSJeremy L Thompson return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v, request); 2269c36149bSjeremylt } 227be9261b7Sjeremylt 228f10650afSjeremylt //------------------------------------------------------------------------------ 229f10650afSjeremylt // ElemRestriction Apply Block 230f10650afSjeremylt //------------------------------------------------------------------------------ 2312b730f8bSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 232074cb416Sjeremylt CeedRequest *request) { 233d1d35e2fSjeremylt CeedInt blk_size, num_comp, comp_stride; 2342b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 2352b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 2362b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 2377509a596Sjeremylt CeedElemRestriction_Ref *impl; 2382b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 2394d2a38eeSjeremylt 2402b730f8bSJeremy L Thompson return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, t_mode, u, v, request); 2419c36149bSjeremylt } 242be9261b7Sjeremylt 243f10650afSjeremylt //------------------------------------------------------------------------------ 244bd33150aSjeremylt // ElemRestriction Get Offsets 245bd33150aSjeremylt //------------------------------------------------------------------------------ 2462b730f8bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 247bd33150aSjeremylt CeedElemRestriction_Ref *impl; 2482b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 249bd33150aSjeremylt Ceed ceed; 2502b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 251bd33150aSjeremylt 2522b730f8bSJeremy L Thompson if (mem_type != CEED_MEM_HOST) { 253bd33150aSjeremylt // LCOV_EXCL_START 254e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 255bd33150aSjeremylt // LCOV_EXCL_STOP 2562b730f8bSJeremy L Thompson } 257bd33150aSjeremylt 258bd33150aSjeremylt *offsets = impl->offsets; 259e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 260bd33150aSjeremylt } 261bd33150aSjeremylt 262bd33150aSjeremylt //------------------------------------------------------------------------------ 263f10650afSjeremylt // ElemRestriction Destroy 264f10650afSjeremylt //------------------------------------------------------------------------------ 26521617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 266fe2413ffSjeremylt CeedElemRestriction_Ref *impl; 2672b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 26821617c04Sjeremylt 2692b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->offsets_allocated)); 2702b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->orient_allocated)); 2712b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 272e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 27321617c04Sjeremylt } 27421617c04Sjeremylt 275f10650afSjeremylt //------------------------------------------------------------------------------ 276f10650afSjeremylt // ElemRestriction Create 277f10650afSjeremylt //------------------------------------------------------------------------------ 2782b730f8bSJeremy L Thompson int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, CeedElemRestriction r) { 27921617c04Sjeremylt CeedElemRestriction_Ref *impl; 280d1d35e2fSjeremylt CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 2812b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 2822b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 2832b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 2842b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 2852b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 2862b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 2874ce2993fSjeremylt Ceed ceed; 2882b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 28921617c04Sjeremylt 2902b730f8bSJeremy L Thompson if (mem_type != CEED_MEM_HOST) { 291c042f62fSJeremy L Thompson // LCOV_EXCL_START 292e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 293c042f62fSJeremy L Thompson // LCOV_EXCL_STOP 2942b730f8bSJeremy L Thompson } 2952b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl)); 2963661185eSjeremylt 29792fe105eSJeremy L Thompson // Offsets data 298d1d35e2fSjeremylt bool is_strided; 2992b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionIsStrided(r, &is_strided)); 300d1d35e2fSjeremylt if (!is_strided) { 30192fe105eSJeremy L Thompson // Check indices for ref or memcheck backends 302d1d35e2fSjeremylt Ceed parent_ceed = ceed, curr_ceed = NULL; 303d1d35e2fSjeremylt while (parent_ceed != curr_ceed) { 304d1d35e2fSjeremylt curr_ceed = parent_ceed; 3052b730f8bSJeremy L Thompson CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed)); 3063661185eSjeremylt } 3073661185eSjeremylt const char *resource; 3082b730f8bSJeremy L Thompson CeedCallBackend(CeedGetResource(parent_ceed, &resource)); 3092b730f8bSJeremy L Thompson if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 310d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/blocked")) { 311e79b91d9SJeremy L Thompson CeedSize l_size; 3122b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size)); 3133661185eSjeremylt 3142b730f8bSJeremy L Thompson for (CeedInt i = 0; i < num_elem * elem_size; i++) { 3152b730f8bSJeremy L Thompson if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride) { 3163661185eSjeremylt // LCOV_EXCL_START 3172b730f8bSJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, 3182b730f8bSJeremy L Thompson offsets[i], l_size); 3193661185eSjeremylt // LCOV_EXCL_STOP 3203661185eSjeremylt } 3212b730f8bSJeremy L Thompson } 3222b730f8bSJeremy L Thompson } 3233661185eSjeremylt 32492fe105eSJeremy L Thompson // Copy data 325d1d35e2fSjeremylt switch (copy_mode) { 32621617c04Sjeremylt case CEED_COPY_VALUES: 3272b730f8bSJeremy L Thompson CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated)); 3282b730f8bSJeremy L Thompson memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0])); 329d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 33021617c04Sjeremylt break; 33121617c04Sjeremylt case CEED_OWN_POINTER: 332d979a051Sjeremylt impl->offsets_allocated = (CeedInt *)offsets; 333d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 33421617c04Sjeremylt break; 33521617c04Sjeremylt case CEED_USE_POINTER: 336d979a051Sjeremylt impl->offsets = offsets; 33721617c04Sjeremylt } 33892fe105eSJeremy L Thompson } 339fe2413ffSjeremylt 3402b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetData(r, impl)); 341d1d35e2fSjeremylt CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 3422b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetELayout(r, layout)); 3432b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref)); 3442b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 3452b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 3462b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref)); 347d979a051Sjeremylt 348d1d35e2fSjeremylt // Set apply function based upon num_comp, blk_size, and comp_stride 349d979a051Sjeremylt CeedInt idx = -1; 3502b730f8bSJeremy L Thompson if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1); 351d979a051Sjeremylt switch (idx) { 352d979a051Sjeremylt case 110: 353d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_110; 354d979a051Sjeremylt break; 355d979a051Sjeremylt case 111: 356d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_111; 357d979a051Sjeremylt break; 358d979a051Sjeremylt case 180: 359d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_180; 360d979a051Sjeremylt break; 361d979a051Sjeremylt case 181: 362d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_181; 363d979a051Sjeremylt break; 364d979a051Sjeremylt case 310: 365d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_310; 366d979a051Sjeremylt break; 367d979a051Sjeremylt case 311: 368d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_311; 369d979a051Sjeremylt break; 370d979a051Sjeremylt case 380: 371d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_380; 372d979a051Sjeremylt break; 373d979a051Sjeremylt case 381: 374d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_381; 375d979a051Sjeremylt break; 376bf4d1581Sjeremylt // LCOV_EXCL_START 377d979a051Sjeremylt case 510: 378d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_510; 379d979a051Sjeremylt break; 380bf4d1581Sjeremylt // LCOV_EXCL_STOP 381d979a051Sjeremylt case 511: 382d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_511; 383d979a051Sjeremylt break; 384bf4d1581Sjeremylt // LCOV_EXCL_START 385d979a051Sjeremylt case 580: 386d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_580; 387d979a051Sjeremylt break; 388bf4d1581Sjeremylt // LCOV_EXCL_STOP 389d979a051Sjeremylt case 581: 390d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_581; 391d979a051Sjeremylt break; 392d979a051Sjeremylt default: 393d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_Core; 394d979a051Sjeremylt break; 395d979a051Sjeremylt } 396d979a051Sjeremylt 397e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 39821617c04Sjeremylt } 399fc0567d9Srezgarshakeri 400fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 401fc0567d9Srezgarshakeri // ElemRestriction Create Oriented 402fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 4032b730f8bSJeremy L Thompson int CeedElemRestrictionCreateOriented_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orient, 404fc0567d9Srezgarshakeri CeedElemRestriction r) { 405fc0567d9Srezgarshakeri CeedElemRestriction_Ref *impl; 406fc0567d9Srezgarshakeri CeedInt num_elem, elem_size; 407fc0567d9Srezgarshakeri // Set up for normal restriction with explicit offsets. This sets up dispatch to 408fc0567d9Srezgarshakeri // CeedElemRestrictionApply_Ref_* and manages the impl->offsets array copy/allocation. 4092b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionCreate_Ref(mem_type, copy_mode, offsets, r)); 410fc0567d9Srezgarshakeri 4112b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 4122b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 4132b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 414fc0567d9Srezgarshakeri switch (copy_mode) { 415fc0567d9Srezgarshakeri case CEED_COPY_VALUES: 4162b730f8bSJeremy L Thompson CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orient_allocated)); 4172b730f8bSJeremy L Thompson memcpy(impl->orient_allocated, orient, num_elem * elem_size * sizeof(orient[0])); 418fc0567d9Srezgarshakeri impl->orient = impl->orient_allocated; 419fc0567d9Srezgarshakeri break; 420fc0567d9Srezgarshakeri case CEED_OWN_POINTER: 421fc0567d9Srezgarshakeri impl->orient_allocated = (bool *)orient; 422fc0567d9Srezgarshakeri impl->orient = impl->orient_allocated; 423fc0567d9Srezgarshakeri break; 424fc0567d9Srezgarshakeri case CEED_USE_POINTER: 425fc0567d9Srezgarshakeri impl->orient = orient; 426fc0567d9Srezgarshakeri } 427fc0567d9Srezgarshakeri return CEED_ERROR_SUCCESS; 428fc0567d9Srezgarshakeri } 429f10650afSjeremylt //------------------------------------------------------------------------------ 430