13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 321617c04Sjeremylt // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 521617c04Sjeremylt // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 721617c04Sjeremylt 8ec3da8bcSJed Brown #include <ceed/backend.h> 9*2b730f8bSJeremy L Thompson #include <ceed/ceed.h> 103d576824SJeremy L Thompson #include <stdbool.h> 113d576824SJeremy L Thompson #include <string.h> 12*2b730f8bSJeremy L Thompson 1321617c04Sjeremylt #include "ceed-ref.h" 1421617c04Sjeremylt 15f10650afSjeremylt //------------------------------------------------------------------------------ 16f10650afSjeremylt // Core ElemRestriction Apply Code 17f10650afSjeremylt //------------------------------------------------------------------------------ 18*2b730f8bSJeremy L Thompson static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 19*2b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 20*2b730f8bSJeremy L Thompson CeedRequest *request) { 214ce2993fSjeremylt CeedElemRestriction_Ref *impl; 22*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 2321617c04Sjeremylt const CeedScalar *uu; 2421617c04Sjeremylt CeedScalar *vv; 25d1d35e2fSjeremylt CeedInt num_elem, elem_size, v_offset; 26*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 27*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 28d1d35e2fSjeremylt v_offset = start * blk_size * elem_size * num_comp; 2921617c04Sjeremylt 30b435c5a6Srezgarshakeri bool is_oriented; 31*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionIsOriented(r, &is_oriented)); 32*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu)); 339c774eddSJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 349c774eddSJeremy L Thompson // Sum into for transpose mode, e-vec to l-vec 35*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_HOST, &vv)); 369c774eddSJeremy L Thompson } else { 379c774eddSJeremy L Thompson // Overwrite for notranspose mode, l-vec to e-vec 38*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv)); 399c774eddSJeremy L Thompson } 407f90ec76Sjeremylt // Restriction from L-vector to E-vector 4121617c04Sjeremylt // Perform: v = r * u 42d1d35e2fSjeremylt if (t_mode == CEED_NOTRANSPOSE) { 43d979a051Sjeremylt // No offsets provided, Identity Restriction 44d979a051Sjeremylt if (!impl->offsets) { 45d1d35e2fSjeremylt bool has_backend_strides; 46*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 47d1d35e2fSjeremylt if (has_backend_strides) { 48d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 497f90ec76Sjeremylt // This if branch is left separate to allow better inlining 50*2b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 51*2b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 52*2b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 53*2b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 54*2b730f8bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 55*2b730f8bSJeremy L Thompson uu[n + k * elem_size + CeedIntMin(e + j, num_elem - 1) * elem_size * num_comp]; 56*2b730f8bSJeremy L Thompson } 57*2b730f8bSJeremy L Thompson } 58*2b730f8bSJeremy L Thompson } 59*2b730f8bSJeremy L Thompson } 607f90ec76Sjeremylt } else { 617f90ec76Sjeremylt // User provided strides 627f90ec76Sjeremylt CeedInt strides[3]; 63*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 64*2b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 65*2b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 66*2b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 67*2b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < blk_size; j++) { 68*2b730f8bSJeremy L Thompson vv[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset] = 69*2b730f8bSJeremy L Thompson uu[n * strides[0] + k * strides[1] + CeedIntMin(e + j, num_elem - 1) * strides[2]]; 70*2b730f8bSJeremy L Thompson } 71*2b730f8bSJeremy L Thompson } 72*2b730f8bSJeremy L Thompson } 73*2b730f8bSJeremy L Thompson } 747509a596Sjeremylt } 7521617c04Sjeremylt } else { 76d979a051Sjeremylt // Offsets provided, standard or blocked restriction 77d1d35e2fSjeremylt // vv has shape [elem_size, num_comp, num_elem], row-major 78d1d35e2fSjeremylt // uu has shape [nnodes, num_comp] 79*2b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 80*2b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 81*2b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt i = 0; i < elem_size * blk_size; i++) { 82*2b730f8bSJeremy L Thompson vv[elem_size * (k * blk_size + num_comp * e) + i - v_offset] = 83*2b730f8bSJeremy L Thompson uu[impl->offsets[i + elem_size * e] + k * comp_stride] * (is_oriented && impl->orient[i + elem_size * e] ? -1. : 1.); 84*2b730f8bSJeremy L Thompson } 85*2b730f8bSJeremy L Thompson } 86*2b730f8bSJeremy L Thompson } 87b435c5a6Srezgarshakeri } 8821617c04Sjeremylt } else { 897f90ec76Sjeremylt // Restriction from E-vector to L-vector 908d94b059Sjeremylt // Performing v += r^T * u 91d979a051Sjeremylt // No offsets provided, Identity Restriction 92d979a051Sjeremylt if (!impl->offsets) { 93d1d35e2fSjeremylt bool has_backend_strides; 94*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionHasBackendStrides(r, &has_backend_strides)); 95d1d35e2fSjeremylt if (has_backend_strides) { 96d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 977f90ec76Sjeremylt // This if brach is left separate to allow better inlining 98*2b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 99*2b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 100*2b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 101*2b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 102*2b730f8bSJeremy L Thompson vv[n + k * elem_size + (e + j) * elem_size * num_comp] += 103*2b730f8bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 104*2b730f8bSJeremy L Thompson } 105*2b730f8bSJeremy L Thompson } 106*2b730f8bSJeremy L Thompson } 107*2b730f8bSJeremy L Thompson } 1087f90ec76Sjeremylt } else { 1097f90ec76Sjeremylt // User provided strides 1107f90ec76Sjeremylt CeedInt strides[3]; 111*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetStrides(r, &strides)); 112*2b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 113*2b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt k = 0; k < num_comp; k++) { 114*2b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt n = 0; n < elem_size; n++) { 115*2b730f8bSJeremy L Thompson CeedPragmaSIMD for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem - e); j++) { 116*2b730f8bSJeremy L Thompson vv[n * strides[0] + k * strides[1] + (e + j) * strides[2]] += 117*2b730f8bSJeremy L Thompson uu[e * elem_size * num_comp + (k * elem_size + n) * blk_size + j - v_offset]; 118*2b730f8bSJeremy L Thompson } 119*2b730f8bSJeremy L Thompson } 120*2b730f8bSJeremy L Thompson } 121*2b730f8bSJeremy L Thompson } 122523b8ea0Sjeremylt } 12321617c04Sjeremylt } else { 124d979a051Sjeremylt // Offsets provided, standard or blocked restriction 125d1d35e2fSjeremylt // uu has shape [elem_size, num_comp, num_elem] 126d1d35e2fSjeremylt // vv has shape [nnodes, num_comp] 127*2b730f8bSJeremy L Thompson for (CeedInt e = start * blk_size; e < stop * blk_size; e += blk_size) { 128*2b730f8bSJeremy L Thompson for (CeedInt k = 0; k < num_comp; k++) { 129*2b730f8bSJeremy L Thompson for (CeedInt i = 0; i < elem_size * blk_size; i += blk_size) { 1308d94b059Sjeremylt // Iteration bound set to discard padding elements 131*2b730f8bSJeremy L Thompson for (CeedInt j = i; j < i + CeedIntMin(blk_size, num_elem - e); j++) { 132*2b730f8bSJeremy L Thompson vv[impl->offsets[j + e * elem_size] + k * comp_stride] += 133*2b730f8bSJeremy L Thompson uu[elem_size * (k * blk_size + num_comp * e) + j - v_offset] * (is_oriented && impl->orient[j + e * elem_size] ? -1. : 1.); 13421617c04Sjeremylt } 135b435c5a6Srezgarshakeri } 136*2b730f8bSJeremy L Thompson } 137*2b730f8bSJeremy L Thompson } 138*2b730f8bSJeremy L Thompson } 139*2b730f8bSJeremy L Thompson } 140*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(u, &uu)); 141*2b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &vv)); 142*2b730f8bSJeremy L Thompson if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) *request = NULL; 143e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 14421617c04Sjeremylt } 14521617c04Sjeremylt 146f10650afSjeremylt //------------------------------------------------------------------------------ 147f10650afSjeremylt // ElemRestriction Apply - Common Sizes 148f10650afSjeremylt //------------------------------------------------------------------------------ 149*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 150*2b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 151*2b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, t_mode, u, v, request); 152d979a051Sjeremylt } 153d979a051Sjeremylt 154*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 155*2b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 156*2b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, u, v, request); 1574d2a38eeSjeremylt } 1584d2a38eeSjeremylt 159*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 160*2b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 161*2b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, t_mode, u, v, request); 1629c36149bSjeremylt } 1639c36149bSjeremylt 164*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 165*2b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 166*2b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, u, v, request); 1679c36149bSjeremylt } 1689c36149bSjeremylt 169*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 170*2b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 171*2b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, t_mode, u, v, request); 172d979a051Sjeremylt } 173d979a051Sjeremylt 174*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 175*2b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 176*2b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, u, v, request); 177d979a051Sjeremylt } 178d979a051Sjeremylt 179*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 180*2b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 181*2b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, t_mode, u, v, request); 182d979a051Sjeremylt } 183d979a051Sjeremylt 184*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 185*2b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 186*2b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, u, v, request); 187d979a051Sjeremylt } 188d979a051Sjeremylt 189bf4d1581Sjeremylt // LCOV_EXCL_START 190*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 191*2b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 192*2b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, t_mode, u, v, request); 193d979a051Sjeremylt } 194bf4d1581Sjeremylt // LCOV_EXCL_STOP 195d979a051Sjeremylt 196*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 197*2b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 198*2b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, u, v, request); 199d979a051Sjeremylt } 200d979a051Sjeremylt 201bf4d1581Sjeremylt // LCOV_EXCL_START 202*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 203*2b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 204*2b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, t_mode, u, v, request); 205d979a051Sjeremylt } 206bf4d1581Sjeremylt // LCOV_EXCL_STOP 207d979a051Sjeremylt 208*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 209*2b730f8bSJeremy L Thompson CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 210*2b730f8bSJeremy L Thompson return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, u, v, request); 2114d2a38eeSjeremylt } 2124d2a38eeSjeremylt 213f10650afSjeremylt //------------------------------------------------------------------------------ 214f10650afSjeremylt // ElemRestriction Apply 215f10650afSjeremylt //------------------------------------------------------------------------------ 216*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, CeedTransposeMode t_mode, CeedVector u, CeedVector v, CeedRequest *request) { 217d1d35e2fSjeremylt CeedInt num_blk, blk_size, num_comp, comp_stride; 218*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 219*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 220*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 221*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 2227509a596Sjeremylt CeedElemRestriction_Ref *impl; 223*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 2244d2a38eeSjeremylt 225*2b730f8bSJeremy L Thompson return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v, request); 2269c36149bSjeremylt } 227be9261b7Sjeremylt 228f10650afSjeremylt //------------------------------------------------------------------------------ 229f10650afSjeremylt // ElemRestriction Apply Block 230f10650afSjeremylt //------------------------------------------------------------------------------ 231*2b730f8bSJeremy L Thompson static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 232074cb416Sjeremylt CeedRequest *request) { 233d1d35e2fSjeremylt CeedInt blk_size, num_comp, comp_stride; 234*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 235*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 236*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 2377509a596Sjeremylt CeedElemRestriction_Ref *impl; 238*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 2394d2a38eeSjeremylt 240*2b730f8bSJeremy L Thompson return impl->Apply(r, num_comp, blk_size, comp_stride, block, block + 1, t_mode, u, v, request); 2419c36149bSjeremylt } 242be9261b7Sjeremylt 243f10650afSjeremylt //------------------------------------------------------------------------------ 244bd33150aSjeremylt // ElemRestriction Get Offsets 245bd33150aSjeremylt //------------------------------------------------------------------------------ 246*2b730f8bSJeremy L Thompson static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, CeedMemType mem_type, const CeedInt **offsets) { 247bd33150aSjeremylt CeedElemRestriction_Ref *impl; 248*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(rstr, &impl)); 249bd33150aSjeremylt Ceed ceed; 250*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(rstr, &ceed)); 251bd33150aSjeremylt 252*2b730f8bSJeremy L Thompson if (mem_type != CEED_MEM_HOST) { 253bd33150aSjeremylt // LCOV_EXCL_START 254e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 255bd33150aSjeremylt // LCOV_EXCL_STOP 256*2b730f8bSJeremy L Thompson } 257bd33150aSjeremylt 258bd33150aSjeremylt *offsets = impl->offsets; 259e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 260bd33150aSjeremylt } 261bd33150aSjeremylt 262bd33150aSjeremylt //------------------------------------------------------------------------------ 263f10650afSjeremylt // ElemRestriction Destroy 264f10650afSjeremylt //------------------------------------------------------------------------------ 26521617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 266fe2413ffSjeremylt CeedElemRestriction_Ref *impl; 267*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 26821617c04Sjeremylt 269*2b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->offsets_allocated)); 270*2b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->orient_allocated)); 271*2b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 272e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 27321617c04Sjeremylt } 27421617c04Sjeremylt 275f10650afSjeremylt //------------------------------------------------------------------------------ 276f10650afSjeremylt // ElemRestriction Create 277f10650afSjeremylt //------------------------------------------------------------------------------ 278*2b730f8bSJeremy L Thompson int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, CeedElemRestriction r) { 27921617c04Sjeremylt CeedElemRestriction_Ref *impl; 280d1d35e2fSjeremylt CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 281*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 282*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 283*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumBlocks(r, &num_blk)); 284*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetBlockSize(r, &blk_size)); 285*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumComponents(r, &num_comp)); 286*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCompStride(r, &comp_stride)); 2874ce2993fSjeremylt Ceed ceed; 288*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetCeed(r, &ceed)); 28921617c04Sjeremylt 290*2b730f8bSJeremy L Thompson if (mem_type != CEED_MEM_HOST) { 291c042f62fSJeremy L Thompson // LCOV_EXCL_START 292e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 293c042f62fSJeremy L Thompson // LCOV_EXCL_STOP 294*2b730f8bSJeremy L Thompson } 295*2b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl)); 2963661185eSjeremylt 29792fe105eSJeremy L Thompson // Offsets data 298d1d35e2fSjeremylt bool is_strided; 299*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionIsStrided(r, &is_strided)); 300d1d35e2fSjeremylt if (!is_strided) { 30192fe105eSJeremy L Thompson // Check indices for ref or memcheck backends 302d1d35e2fSjeremylt Ceed parent_ceed = ceed, curr_ceed = NULL; 303d1d35e2fSjeremylt while (parent_ceed != curr_ceed) { 304d1d35e2fSjeremylt curr_ceed = parent_ceed; 305*2b730f8bSJeremy L Thompson CeedCallBackend(CeedGetParent(curr_ceed, &parent_ceed)); 3063661185eSjeremylt } 3073661185eSjeremylt const char *resource; 308*2b730f8bSJeremy L Thompson CeedCallBackend(CeedGetResource(parent_ceed, &resource)); 309*2b730f8bSJeremy L Thompson if (!strcmp(resource, "/cpu/self/ref/serial") || !strcmp(resource, "/cpu/self/ref/blocked") || !strcmp(resource, "/cpu/self/memcheck/serial") || 310d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/blocked")) { 311e79b91d9SJeremy L Thompson CeedSize l_size; 312*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetLVectorSize(r, &l_size)); 3133661185eSjeremylt 314*2b730f8bSJeremy L Thompson for (CeedInt i = 0; i < num_elem * elem_size; i++) { 315*2b730f8bSJeremy L Thompson if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride) { 3163661185eSjeremylt // LCOV_EXCL_START 317*2b730f8bSJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Restriction offset %" CeedInt_FMT " (%" CeedInt_FMT ") out of range [0, %" CeedInt_FMT "]", i, 318*2b730f8bSJeremy L Thompson offsets[i], l_size); 3193661185eSjeremylt // LCOV_EXCL_STOP 3203661185eSjeremylt } 321*2b730f8bSJeremy L Thompson } 322*2b730f8bSJeremy L Thompson } 3233661185eSjeremylt 32492fe105eSJeremy L Thompson // Copy data 325d1d35e2fSjeremylt switch (copy_mode) { 32621617c04Sjeremylt case CEED_COPY_VALUES: 327*2b730f8bSJeremy L Thompson CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->offsets_allocated)); 328*2b730f8bSJeremy L Thompson memcpy(impl->offsets_allocated, offsets, num_elem * elem_size * sizeof(offsets[0])); 329d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 33021617c04Sjeremylt break; 33121617c04Sjeremylt case CEED_OWN_POINTER: 332d979a051Sjeremylt impl->offsets_allocated = (CeedInt *)offsets; 333d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 33421617c04Sjeremylt break; 33521617c04Sjeremylt case CEED_USE_POINTER: 336d979a051Sjeremylt impl->offsets = offsets; 33721617c04Sjeremylt } 33892fe105eSJeremy L Thompson } 339fe2413ffSjeremylt 340*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetData(r, impl)); 341d1d35e2fSjeremylt CeedInt layout[3] = {1, elem_size, elem_size * num_comp}; 342*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionSetELayout(r, layout)); 343*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", CeedElemRestrictionApply_Ref)); 344*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", CeedElemRestrictionApplyBlock_Ref)); 345*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", CeedElemRestrictionGetOffsets_Ref)); 346*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", CeedElemRestrictionDestroy_Ref)); 347d979a051Sjeremylt 348d1d35e2fSjeremylt // Set apply function based upon num_comp, blk_size, and comp_stride 349d979a051Sjeremylt CeedInt idx = -1; 350*2b730f8bSJeremy L Thompson if (blk_size < 10) idx = 100 * num_comp + 10 * blk_size + (comp_stride == 1); 351d979a051Sjeremylt switch (idx) { 352d979a051Sjeremylt case 110: 353d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_110; 354d979a051Sjeremylt break; 355d979a051Sjeremylt case 111: 356d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_111; 357d979a051Sjeremylt break; 358d979a051Sjeremylt case 180: 359d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_180; 360d979a051Sjeremylt break; 361d979a051Sjeremylt case 181: 362d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_181; 363d979a051Sjeremylt break; 364d979a051Sjeremylt case 310: 365d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_310; 366d979a051Sjeremylt break; 367d979a051Sjeremylt case 311: 368d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_311; 369d979a051Sjeremylt break; 370d979a051Sjeremylt case 380: 371d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_380; 372d979a051Sjeremylt break; 373d979a051Sjeremylt case 381: 374d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_381; 375d979a051Sjeremylt break; 376bf4d1581Sjeremylt // LCOV_EXCL_START 377d979a051Sjeremylt case 510: 378d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_510; 379d979a051Sjeremylt break; 380bf4d1581Sjeremylt // LCOV_EXCL_STOP 381d979a051Sjeremylt case 511: 382d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_511; 383d979a051Sjeremylt break; 384bf4d1581Sjeremylt // LCOV_EXCL_START 385d979a051Sjeremylt case 580: 386d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_580; 387d979a051Sjeremylt break; 388bf4d1581Sjeremylt // LCOV_EXCL_STOP 389d979a051Sjeremylt case 581: 390d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_581; 391d979a051Sjeremylt break; 392d979a051Sjeremylt default: 393d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_Core; 394d979a051Sjeremylt break; 395d979a051Sjeremylt } 396d979a051Sjeremylt 397e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 39821617c04Sjeremylt } 399fc0567d9Srezgarshakeri 400fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 401fc0567d9Srezgarshakeri // ElemRestriction Create Oriented 402fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 403*2b730f8bSJeremy L Thompson int CeedElemRestrictionCreateOriented_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, const bool *orient, 404fc0567d9Srezgarshakeri CeedElemRestriction r) { 405fc0567d9Srezgarshakeri CeedElemRestriction_Ref *impl; 406fc0567d9Srezgarshakeri CeedInt num_elem, elem_size; 407fc0567d9Srezgarshakeri // Set up for normal restriction with explicit offsets. This sets up dispatch to 408fc0567d9Srezgarshakeri // CeedElemRestrictionApply_Ref_* and manages the impl->offsets array copy/allocation. 409*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionCreate_Ref(mem_type, copy_mode, offsets, r)); 410fc0567d9Srezgarshakeri 411*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetData(r, &impl)); 412*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetNumElements(r, &num_elem)); 413*2b730f8bSJeremy L Thompson CeedCallBackend(CeedElemRestrictionGetElementSize(r, &elem_size)); 414fc0567d9Srezgarshakeri switch (copy_mode) { 415fc0567d9Srezgarshakeri case CEED_COPY_VALUES: 416*2b730f8bSJeremy L Thompson CeedCallBackend(CeedMalloc(num_elem * elem_size, &impl->orient_allocated)); 417*2b730f8bSJeremy L Thompson memcpy(impl->orient_allocated, orient, num_elem * elem_size * sizeof(orient[0])); 418fc0567d9Srezgarshakeri impl->orient = impl->orient_allocated; 419fc0567d9Srezgarshakeri break; 420fc0567d9Srezgarshakeri case CEED_OWN_POINTER: 421fc0567d9Srezgarshakeri impl->orient_allocated = (bool *)orient; 422fc0567d9Srezgarshakeri impl->orient = impl->orient_allocated; 423fc0567d9Srezgarshakeri break; 424fc0567d9Srezgarshakeri case CEED_USE_POINTER: 425fc0567d9Srezgarshakeri impl->orient = orient; 426fc0567d9Srezgarshakeri } 427fc0567d9Srezgarshakeri return CEED_ERROR_SUCCESS; 428fc0567d9Srezgarshakeri } 429f10650afSjeremylt //------------------------------------------------------------------------------ 430