13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 321617c04Sjeremylt // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 521617c04Sjeremylt // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 721617c04Sjeremylt 849aac155SJeremy L Thompson #include <ceed.h> 9ec3da8bcSJed Brown #include <ceed/backend.h> 103d576824SJeremy L Thompson #include <stdbool.h> 113d576824SJeremy L Thompson #include <string.h> 122b730f8bSJeremy L Thompson 1321617c04Sjeremylt #include "ceed-ref.h" 1421617c04Sjeremylt 15f10650afSjeremylt //------------------------------------------------------------------------------ 16f10650afSjeremylt // Core ElemRestriction Apply Code 17f10650afSjeremylt //------------------------------------------------------------------------------ 182b730f8bSJeremy L Thompson static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 19*f30b1135SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 202b730f8bSJeremy L Thompson CeedRequest *request) { 214ce2993fSjeremylt CeedElemRestriction_Ref *impl; 222b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 2321617c04Sjeremylt const CeedScalar *uu; 2421617c04Sjeremylt CeedScalar *vv; 25d1d35e2fSjeremylt CeedInt num_elem, elem_size, v_offset; 262b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 272b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 28d1d35e2fSjeremylt v_offset = start * blk_size * elem_size * num_comp; 2921617c04Sjeremylt 30b435c5a6Srezgarshakeri bool is_oriented; 312b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionIsOriented(r, &is_oriented)); 322b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 339c774eddSJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 349c774eddSJeremy L Thompson // Sum into for transpose mode, e-vec to l-vec 352b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 369c774eddSJeremy L Thompson } else { 379c774eddSJeremy L Thompson // Overwrite for notranspose mode, l-vec to e-vec 382b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 399c774eddSJeremy L Thompson } 407f90ec76Sjeremylt // Restriction from L-vector to E-vector 4121617c04Sjeremylt // Perform: v = r * u 42d1d35e2fSjeremylt if (t_mode == CEED_NOTRANSPOSE) { 43d979a051Sjeremylt // No offsets provided, Identity Restriction 44d979a051Sjeremylt if (!impl->offsets) { 45d1d35e2fSjeremylt bool has_backend_strides; 462b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 47d1d35e2fSjeremylt if (has_backend_strides) { 48d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 497f90ec76Sjeremylt // This if branch is left separate to allow better inlining 502b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 512b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 522b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 532b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 542b730f8bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 552b730f8bSJeremy L Thompson uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp]; 562b730f8bSJeremy L Thompson } 572b730f8bSJeremy L Thompson } 582b730f8bSJeremy L Thompson } 592b730f8bSJeremy L Thompson } 607f90ec76Sjeremylt } else { 617f90ec76Sjeremylt // User provided strides 627f90ec76Sjeremylt CeedInt strides[3]; 632b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 642b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 652b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 662b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 672b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 682b730f8bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 692b730f8bSJeremy L Thompson uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 702b730f8bSJeremy L Thompson } 712b730f8bSJeremy L Thompson } 722b730f8bSJeremy L Thompson } 732b730f8bSJeremy L Thompson } 747509a596Sjeremylt } 7521617c04Sjeremylt } else { 76d979a051Sjeremylt // Offsets provided, standard or blocked restriction 77d1d35e2fSjeremylt // vv has shape [elem_size, num_comp, num_elem], row-major 78d1d35e2fSjeremylt // uu has shape [nnodes, num_comp] 792b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 802b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 812b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 82*f30b1135SSebastian Grimberg if (!use_orient || !impl->orient) { 83*f30b1135SSebastian Grimberg // Unsigned restriction 842b730f8bSJeremy L Thompson vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = 85*f30b1135SSebastian Grimberg uu[impl->offsets[i + elem_size * e] + k * comp_stride]; 86*f30b1135SSebastian Grimberg } else { 87*f30b1135SSebastian Grimberg // Signed restriction 88*f30b1135SSebastian Grimberg vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = 89*f30b1135SSebastian Grimberg uu[impl->offsets[i + elem_size * e] + k * comp_stride] * (impl->orient[i + elem_size * e] ? -1.0 : 1.0); 90*f30b1135SSebastian Grimberg } 912b730f8bSJeremy L Thompson } 922b730f8bSJeremy L Thompson } 932b730f8bSJeremy L Thompson } 94b435c5a6Srezgarshakeri } 9521617c04Sjeremylt } else { 967f90ec76Sjeremylt // Restriction from E-vector to L-vector 978d94b059Sjeremylt // Performing v += r^T * u 98d979a051Sjeremylt // No offsets provided, Identity Restriction 99d979a051Sjeremylt if (!impl->offsets) { 100d1d35e2fSjeremylt bool has_backend_strides; 1012b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 102d1d35e2fSjeremylt if (has_backend_strides) { 103d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 1047f90ec76Sjeremylt // This if brach is left separate to allow better inlining 1052b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 1062b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 1072b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 1082b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 1092b730f8bSJeremy L Thompson vv[n + k * elem_size + (e + j) * elem_size * num_comp] += 1102b730f8bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 1112b730f8bSJeremy L Thompson } 1122b730f8bSJeremy L Thompson } 1132b730f8bSJeremy L Thompson } 1142b730f8bSJeremy L Thompson } 1157f90ec76Sjeremylt } else { 1167f90ec76Sjeremylt // User provided strides 1177f90ec76Sjeremylt CeedInt strides[3]; 1182b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 1192b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 1202b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 1212b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 1222b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 1232b730f8bSJeremy L Thompson vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 1242b730f8bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 1252b730f8bSJeremy L Thompson } 1262b730f8bSJeremy L Thompson } 1272b730f8bSJeremy L Thompson } 1282b730f8bSJeremy L Thompson } 129523b8ea0Sjeremylt } 13021617c04Sjeremylt } else { 131d979a051Sjeremylt // Offsets provided, standard or blocked restriction 132d1d35e2fSjeremylt // uu has shape [elem_size, num_comp, num_elem] 133d1d35e2fSjeremylt // vv has shape [nnodes, num_comp] 1342b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 1352b730f8bSJeremy L Thompson for (CeedInt k = 0; k < num_comp; k++) { 1362b730f8bSJeremy L Thompson for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 1378d94b059Sjeremylt // Iteration bound set to discard padding elements 1382b730f8bSJeremy L Thompson for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 139*f30b1135SSebastian Grimberg if (!use_orient || !impl->orient) { 140*f30b1135SSebastian Grimberg // Unsigned restriction 1412b730f8bSJeremy L Thompson vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 142*f30b1135SSebastian Grimberg uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset]; 143*f30b1135SSebastian Grimberg } else { 144*f30b1135SSebastian Grimberg // Signed restriction 145*f30b1135SSebastian Grimberg vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 146*f30b1135SSebastian Grimberg uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset] * (impl->orient[j + e * elem_size] ? -1.0 : 1.0); 147*f30b1135SSebastian Grimberg } 14821617c04Sjeremylt } 149b435c5a6Srezgarshakeri } 1502b730f8bSJeremy L Thompson } 1512b730f8bSJeremy L Thompson } 1522b730f8bSJeremy L Thompson } 1532b730f8bSJeremy L Thompson } 1542b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 1552b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 1562b730f8bSJeremy L Thompson if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 157e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 15821617c04Sjeremylt } 15921617c04Sjeremylt 160f10650afSjeremylt //------------------------------------------------------------------------------ 161f10650afSjeremylt // ElemRestriction Apply - Common Sizes 162f10650afSjeremylt //------------------------------------------------------------------------------ 1632b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 164*f30b1135SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 165*f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, use_orient, t_mode, u, v, request); 166d979a051Sjeremylt } 167d979a051Sjeremylt 1682b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 169*f30b1135SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 170*f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, use_orient, t_mode, u, v, request); 1714d2a38eeSjeremylt } 1724d2a38eeSjeremylt 1732b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 174*f30b1135SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 175*f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, use_orient, t_mode, u, v, request); 1769c36149bSjeremylt } 1779c36149bSjeremylt 1782b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 179*f30b1135SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 180*f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, use_orient, t_mode, u, v, request); 1819c36149bSjeremylt } 1829c36149bSjeremylt 1832b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 184*f30b1135SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 185*f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, use_orient, t_mode, u, v, request); 186d979a051Sjeremylt } 187d979a051Sjeremylt 1882b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 189*f30b1135SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 190*f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, use_orient, t_mode, u, v, request); 191d979a051Sjeremylt } 192d979a051Sjeremylt 1932b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 194*f30b1135SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 195*f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, use_orient, t_mode, u, v, request); 196d979a051Sjeremylt } 197d979a051Sjeremylt 1982b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 199*f30b1135SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 200*f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, use_orient, t_mode, u, v, request); 201d979a051Sjeremylt } 202d979a051Sjeremylt 203bf4d1581Sjeremylt // LCOV_EXCL_START 2042b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 205*f30b1135SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 206*f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, use_orient, t_mode, u, v, request); 207d979a051Sjeremylt } 208bf4d1581Sjeremylt // LCOV_EXCL_STOP 209d979a051Sjeremylt 2102b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 211*f30b1135SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 212*f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, use_orient, t_mode, u, v, request); 213d979a051Sjeremylt } 214d979a051Sjeremylt 215bf4d1581Sjeremylt // LCOV_EXCL_START 2162b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 217*f30b1135SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 218*f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, use_orient, t_mode, u, v, request); 219d979a051Sjeremylt } 220bf4d1581Sjeremylt // LCOV_EXCL_STOP 221d979a051Sjeremylt 2222b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 223*f30b1135SSebastian Grimberg CeedInt start, CeedInt stop, bool use_orient, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 224*f30b1135SSebastian Grimberg return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, use_orient, t_mode, u, v, request); 2254d2a38eeSjeremylt } 2264d2a38eeSjeremylt 227f10650afSjeremylt //------------------------------------------------------------------------------ 228f10650afSjeremylt // ElemRestriction Apply 229f10650afSjeremylt //------------------------------------------------------------------------------ 2302b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 231d1d35e2fSjeremylt CeedInt num_blk, blk_size, num_comp, comp_stride; 2322b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 2332b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 2342b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 2352b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 2367509a596Sjeremylt CeedElemRestriction_Ref *impl; 2372b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 2384d2a38eeSjeremylt 239*f30b1135SSebastian Grimberg return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, true, t_mode, u, v, request); 240*f30b1135SSebastian Grimberg } 241*f30b1135SSebastian Grimberg 242*f30b1135SSebastian Grimberg //------------------------------------------------------------------------------ 243*f30b1135SSebastian Grimberg // ElemRestriction Apply Unsigned 244*f30b1135SSebastian Grimberg //------------------------------------------------------------------------------ 245*f30b1135SSebastian Grimberg static int CeedElemRestrictionApplyUnsigned_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 246*f30b1135SSebastian Grimberg CeedInt num_blk, blk_size, num_comp, comp_stride; 247*f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 248*f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 249*f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 250*f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 251*f30b1135SSebastian Grimberg CeedElemRestriction_Ref *impl; 252*f30b1135SSebastian Grimberg CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 253*f30b1135SSebastian Grimberg 254*f30b1135SSebastian Grimberg return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, false, t_mode, u, v, request); 2559c36149bSjeremylt } 256be9261b7Sjeremylt 257f10650afSjeremylt //------------------------------------------------------------------------------ 258f10650afSjeremylt // ElemRestriction Apply Block 259f10650afSjeremylt //------------------------------------------------------------------------------ 2602b730f8bSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 261074cb416Sjeremylt CeedRequest *request) { 262d1d35e2fSjeremylt CeedInt blk_size, num_comp, comp_stride; 2632b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 2642b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 2652b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 2667509a596Sjeremylt CeedElemRestriction_Ref *impl; 2672b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 2684d2a38eeSjeremylt 269*f30b1135SSebastian Grimberg return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, true, t_mode, u, v, request); 2709c36149bSjeremylt } 271be9261b7Sjeremylt 272f10650afSjeremylt //------------------------------------------------------------------------------ 273bd33150aSjeremylt // ElemRestriction Get Offsets 274bd33150aSjeremylt //------------------------------------------------------------------------------ 2752b730f8bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 276bd33150aSjeremylt CeedElemRestriction_Ref *impl; 2772b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 278bd33150aSjeremylt Ceed ceed; 2792b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 280bd33150aSjeremylt 2816574a04fSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 282bd33150aSjeremylt 283bd33150aSjeremylt *offsets = impl->offsets; 284e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 285bd33150aSjeremylt } 286bd33150aSjeremylt 287bd33150aSjeremylt //------------------------------------------------------------------------------ 288f10650afSjeremylt // ElemRestriction Destroy 289f10650afSjeremylt //------------------------------------------------------------------------------ 29021617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 291fe2413ffSjeremylt CeedElemRestriction_Ref *impl; 2922b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 29321617c04Sjeremylt 2942b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->offsets_allocated)); 2952b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->orient_allocated)); 2962b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 297e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 29821617c04Sjeremylt } 29921617c04Sjeremylt 300f10650afSjeremylt //------------------------------------------------------------------------------ 301f10650afSjeremylt // ElemRestriction Create 302f10650afSjeremylt //------------------------------------------------------------------------------ 3032b730f8bSJeremy L Thompson int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, CeedElemRestriction r) { 30421617c04Sjeremylt CeedElemRestriction_Ref *impl; 305d1d35e2fSjeremylt CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 3062b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 3072b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 3082b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 3092b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 3102b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 3112b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 3124ce2993fSjeremylt Ceed ceed; 3132b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 31421617c04Sjeremylt 3156574a04fSJeremy L Thompson CeedCheck(mem_type == CEED_MEM_HOST, ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 3162b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl)); 3173661185eSjeremylt 31892fe105eSJeremy L Thompson // Offsets data 319d1d35e2fSjeremylt bool is_strided; 3202b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionIsStrided(r, &is_strided)); 321d1d35e2fSjeremylt if (!is_strided) { 32292fe105eSJeremy L Thompson // Check indices for ref or memcheck backends 323d1d35e2fSjeremylt Ceed parent_ceed = ceed, curr_ceed = NULL; 324d1d35e2fSjeremylt while (parent_ceed != curr_ceed) { 325d1d35e2fSjeremylt curr_ceed = parent_ceed; 3262b730f8bSJeremy L Thompson CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed)); 3273661185eSjeremylt } 3283661185eSjeremylt const char *resource; 3292b730f8bSJeremy L Thompson CeedCallBackend(CeedGetResource(parent_ceed, &resource)); 3302b730f8bSJeremy L Thompson if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 331d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/blocked")) { 332e79b91d9SJeremy L Thompson CeedSize l_size; 3332b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size)); 3343661185eSjeremylt 3352b730f8bSJeremy L Thompson for (CeedInt i = 0; i < num_elem * elem_size; i++) { 3366574a04fSJeremy L Thompson CeedCheck(offsets[i] >= 0 && offsets[i] + (num_comp - 1) * comp_stride < l_size, ceed, CEED_ERROR_BACKEND, 3376574a04fSJeremy L Thompson "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, offsets[i], l_size); 3382b730f8bSJeremy L Thompson } 3392b730f8bSJeremy L Thompson } 3403661185eSjeremylt 34192fe105eSJeremy L Thompson // Copy data 342d1d35e2fSjeremylt switch (copy_mode) { 34321617c04Sjeremylt case CEED_COPY_VALUES: 3442b730f8bSJeremy L Thompson CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated)); 3452b730f8bSJeremy L Thompson memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0])); 346d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 34721617c04Sjeremylt break; 34821617c04Sjeremylt case CEED_OWN_POINTER: 349d979a051Sjeremylt impl->offsets_allocated = (CeedInt *)offsets; 350d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 35121617c04Sjeremylt break; 35221617c04Sjeremylt case CEED_USE_POINTER: 353d979a051Sjeremylt impl->offsets = offsets; 35421617c04Sjeremylt } 35592fe105eSJeremy L Thompson } 356fe2413ffSjeremylt 3572b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetData(r, impl)); 358d1d35e2fSjeremylt CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 3592b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetELayout(r, layout)); 3602b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref)); 361*f30b1135SSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyUnsigned", CeedElemRestrictionApplyUnsigned_Ref)); 3622b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 3632b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 3642b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref)); 365d979a051Sjeremylt 366d1d35e2fSjeremylt // Set apply function based upon num_comp, blk_size, and comp_stride 367d979a051Sjeremylt CeedInt idx = -1; 3682b730f8bSJeremy L Thompson if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1); 369d979a051Sjeremylt switch (idx) { 370d979a051Sjeremylt case 110: 371d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_110; 372d979a051Sjeremylt break; 373d979a051Sjeremylt case 111: 374d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_111; 375d979a051Sjeremylt break; 376d979a051Sjeremylt case 180: 377d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_180; 378d979a051Sjeremylt break; 379d979a051Sjeremylt case 181: 380d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_181; 381d979a051Sjeremylt break; 382d979a051Sjeremylt case 310: 383d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_310; 384d979a051Sjeremylt break; 385d979a051Sjeremylt case 311: 386d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_311; 387d979a051Sjeremylt break; 388d979a051Sjeremylt case 380: 389d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_380; 390d979a051Sjeremylt break; 391d979a051Sjeremylt case 381: 392d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_381; 393d979a051Sjeremylt break; 394bf4d1581Sjeremylt // LCOV_EXCL_START 395d979a051Sjeremylt case 510: 396d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_510; 397d979a051Sjeremylt break; 398bf4d1581Sjeremylt // LCOV_EXCL_STOP 399d979a051Sjeremylt case 511: 400d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_511; 401d979a051Sjeremylt break; 402bf4d1581Sjeremylt // LCOV_EXCL_START 403d979a051Sjeremylt case 580: 404d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_580; 405d979a051Sjeremylt break; 406bf4d1581Sjeremylt // LCOV_EXCL_STOP 407d979a051Sjeremylt case 581: 408d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_581; 409d979a051Sjeremylt break; 410d979a051Sjeremylt default: 411d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_Core; 412d979a051Sjeremylt break; 413d979a051Sjeremylt } 414d979a051Sjeremylt 415e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 41621617c04Sjeremylt } 417fc0567d9Srezgarshakeri 418fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 419fc0567d9Srezgarshakeri // ElemRestriction Create Oriented 420fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 4212b730f8bSJeremy L Thompson int CeedElemRestrictionCreateOriented_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orient, 422fc0567d9Srezgarshakeri CeedElemRestriction r) { 423fc0567d9Srezgarshakeri // Set up for normal restriction with explicit offsets. This sets up dispatch to 424fc0567d9Srezgarshakeri // CeedElemRestrictionApply_Ref_* and manages the impl->offsets array copy/allocation. 425*f30b1135SSebastian Grimberg CeedElemRestriction_Ref *impl; 426*f30b1135SSebastian Grimberg CeedInt num_elem, elem_size; 4272b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionCreate_Ref(mem_type, copy_mode, offsets, r)); 4282b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 4292b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 4302b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 431*f30b1135SSebastian Grimberg 432*f30b1135SSebastian Grimberg // Copy data 433fc0567d9Srezgarshakeri switch (copy_mode) { 434fc0567d9Srezgarshakeri case CEED_COPY_VALUES: 4352b730f8bSJeremy L Thompson CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orient_allocated)); 4362b730f8bSJeremy L Thompson memcpy(impl->orient_allocated, orient, num_elem * elem_size * sizeof(orient[0])); 437fc0567d9Srezgarshakeri impl->orient = impl->orient_allocated; 438fc0567d9Srezgarshakeri break; 439fc0567d9Srezgarshakeri case CEED_OWN_POINTER: 440fc0567d9Srezgarshakeri impl->orient_allocated = (bool *)orient; 441fc0567d9Srezgarshakeri impl->orient = impl->orient_allocated; 442fc0567d9Srezgarshakeri break; 443fc0567d9Srezgarshakeri case CEED_USE_POINTER: 444fc0567d9Srezgarshakeri impl->orient = orient; 445fc0567d9Srezgarshakeri } 446*f30b1135SSebastian Grimberg 447fc0567d9Srezgarshakeri return CEED_ERROR_SUCCESS; 448fc0567d9Srezgarshakeri } 4492a86cc9dSSebastian Grimberg 450f10650afSjeremylt //------------------------------------------------------------------------------ 451