1*3d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2*3d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 321617c04Sjeremylt // 4*3d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 521617c04Sjeremylt // 6*3d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 721617c04Sjeremylt 8ec3da8bcSJed Brown #include <ceed/ceed.h> 9ec3da8bcSJed Brown #include <ceed/backend.h> 103d576824SJeremy L Thompson #include <stdbool.h> 113d576824SJeremy L Thompson #include <string.h> 1221617c04Sjeremylt #include "ceed-ref.h" 1321617c04Sjeremylt 14f10650afSjeremylt //------------------------------------------------------------------------------ 15f10650afSjeremylt // Core ElemRestriction Apply Code 16f10650afSjeremylt //------------------------------------------------------------------------------ 17be9261b7Sjeremylt static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 18d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 19d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 209c36149bSjeremylt CeedVector v, CeedRequest *request) { 2121617c04Sjeremylt int ierr; 224ce2993fSjeremylt CeedElemRestriction_Ref *impl; 23e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 2421617c04Sjeremylt const CeedScalar *uu; 2521617c04Sjeremylt CeedScalar *vv; 26d1d35e2fSjeremylt CeedInt num_elem, elem_size, v_offset; 27d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 28d1d35e2fSjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 29d1d35e2fSjeremylt v_offset = start*blk_size*elem_size*num_comp; 3021617c04Sjeremylt 31b435c5a6Srezgarshakeri bool is_oriented; 32b435c5a6Srezgarshakeri ierr = CeedElemRestrictionIsOriented(r, &is_oriented); CeedChkBackend(ierr); 33e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr); 349c774eddSJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 359c774eddSJeremy L Thompson // Sum into for transpose mode, e-vec to l-vec 36e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr); 379c774eddSJeremy L Thompson } else { 389c774eddSJeremy L Thompson // Overwrite for notranspose mode, l-vec to e-vec 399c774eddSJeremy L Thompson ierr = CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr); 409c774eddSJeremy L Thompson } 417f90ec76Sjeremylt // Restriction from L-vector to E-vector 4221617c04Sjeremylt // Perform: v = r * u 43d1d35e2fSjeremylt if (t_mode == CEED_NOTRANSPOSE) { 44d979a051Sjeremylt // No offsets provided, Identity Restriction 45d979a051Sjeremylt if (!impl->offsets) { 46d1d35e2fSjeremylt bool has_backend_strides; 47d1d35e2fSjeremylt ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides); 48e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 49d1d35e2fSjeremylt if (has_backend_strides) { 50d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 517f90ec76Sjeremylt // This if branch is left separate to allow better inlining 52d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 53e1b98f6eSjeremylt CeedPragmaSIMD 54d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 551d79ecccSjeremylt CeedPragmaSIMD 56d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 577509a596Sjeremylt CeedPragmaSIMD 58d1d35e2fSjeremylt for (CeedInt j = 0; j < blk_size; j++) 59d1d35e2fSjeremylt vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset] 60d1d35e2fSjeremylt = uu[n + k*elem_size + 61d1d35e2fSjeremylt CeedIntMin(e+j, num_elem-1)*elem_size*num_comp]; 627f90ec76Sjeremylt } else { 637f90ec76Sjeremylt // User provided strides 647f90ec76Sjeremylt CeedInt strides[3]; 65e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 66d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 677f90ec76Sjeremylt CeedPragmaSIMD 68d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 697f90ec76Sjeremylt CeedPragmaSIMD 70d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 717f90ec76Sjeremylt CeedPragmaSIMD 72d1d35e2fSjeremylt for (CeedInt j = 0; j < blk_size; j++) 73d1d35e2fSjeremylt vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset] 747509a596Sjeremylt = uu[n*strides[0] + k*strides[1] + 75d1d35e2fSjeremylt CeedIntMin(e+j, num_elem-1)*strides[2]]; 767509a596Sjeremylt } 7721617c04Sjeremylt } else { 78d979a051Sjeremylt // Offsets provided, standard or blocked restriction 79d1d35e2fSjeremylt // vv has shape [elem_size, num_comp, num_elem], row-major 80d1d35e2fSjeremylt // uu has shape [nnodes, num_comp] 81d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 82e1b98f6eSjeremylt CeedPragmaSIMD 83d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 84e1b98f6eSjeremylt CeedPragmaSIMD 85d1d35e2fSjeremylt for (CeedInt i = 0; i < elem_size*blk_size; i++) 86d1d35e2fSjeremylt vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset] 87fc0567d9Srezgarshakeri = uu[impl->offsets[i+elem_size*e] + k*comp_stride] * 88000294e3Srezgarshakeri (is_oriented && impl->orient[i+elem_size*e] ? -1. : 1.); 89b435c5a6Srezgarshakeri } 9021617c04Sjeremylt } else { 917f90ec76Sjeremylt // Restriction from E-vector to L-vector 928d94b059Sjeremylt // Performing v += r^T * u 93d979a051Sjeremylt // No offsets provided, Identity Restriction 94d979a051Sjeremylt if (!impl->offsets) { 95d1d35e2fSjeremylt bool has_backend_strides; 96d1d35e2fSjeremylt ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides); 97e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 98d1d35e2fSjeremylt if (has_backend_strides) { 99d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 1007f90ec76Sjeremylt // This if brach is left separate to allow better inlining 101d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 102523b8ea0Sjeremylt CeedPragmaSIMD 103d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 1047509a596Sjeremylt CeedPragmaSIMD 105d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 1067f90ec76Sjeremylt CeedPragmaSIMD 107d1d35e2fSjeremylt for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++) 108d1d35e2fSjeremylt vv[n + k*elem_size + (e+j)*elem_size*num_comp] 109d1d35e2fSjeremylt += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]; 1107f90ec76Sjeremylt } else { 1117f90ec76Sjeremylt // User provided strides 1127f90ec76Sjeremylt CeedInt strides[3]; 113e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 114d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 1157f90ec76Sjeremylt CeedPragmaSIMD 116d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 1177f90ec76Sjeremylt CeedPragmaSIMD 118d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 1197f90ec76Sjeremylt CeedPragmaSIMD 120d1d35e2fSjeremylt for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++) 1217509a596Sjeremylt vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]] 122d1d35e2fSjeremylt += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]; 123523b8ea0Sjeremylt } 12421617c04Sjeremylt } else { 125d979a051Sjeremylt // Offsets provided, standard or blocked restriction 126d1d35e2fSjeremylt // uu has shape [elem_size, num_comp, num_elem] 127d1d35e2fSjeremylt // vv has shape [nnodes, num_comp] 128d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 129d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 130d1d35e2fSjeremylt for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size) 1318d94b059Sjeremylt // Iteration bound set to discard padding elements 132d1d35e2fSjeremylt for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++) 133d1d35e2fSjeremylt vv[impl->offsets[j+e*elem_size] + k*comp_stride] 134fc0567d9Srezgarshakeri += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset] * 135000294e3Srezgarshakeri (is_oriented && impl->orient[j+e*elem_size] ? -1. : 1.); 13621617c04Sjeremylt } 137b435c5a6Srezgarshakeri } 138e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr); 139e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArray(v, &vv); CeedChkBackend(ierr); 14021617c04Sjeremylt if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 14121617c04Sjeremylt *request = NULL; 142e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 14321617c04Sjeremylt } 14421617c04Sjeremylt 145f10650afSjeremylt //------------------------------------------------------------------------------ 146f10650afSjeremylt // ElemRestriction Apply - Common Sizes 147f10650afSjeremylt //------------------------------------------------------------------------------ 148d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, 149d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 150d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 151d979a051Sjeremylt CeedVector v, CeedRequest *request) { 152d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, 153d1d35e2fSjeremylt t_mode, u, v, request); 154d979a051Sjeremylt } 155d979a051Sjeremylt 156d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, 157d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 158d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 159d979a051Sjeremylt CeedVector v, CeedRequest *request) { 160d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, 1619c36149bSjeremylt u, v, request); 1624d2a38eeSjeremylt } 1634d2a38eeSjeremylt 164d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, 165d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 166d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 167d979a051Sjeremylt CeedVector v, CeedRequest *request) { 168d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, 169d1d35e2fSjeremylt t_mode, u, v, request); 1709c36149bSjeremylt } 1719c36149bSjeremylt 172d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, 173d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 174d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 175d979a051Sjeremylt CeedVector v, CeedRequest *request) { 176d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, 1779c36149bSjeremylt u, v, request); 1789c36149bSjeremylt } 1799c36149bSjeremylt 180d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, 181d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 182d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 183d979a051Sjeremylt CeedVector v, CeedRequest *request) { 184d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, 185d1d35e2fSjeremylt t_mode, u, v, request); 186d979a051Sjeremylt } 187d979a051Sjeremylt 188d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, 189d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 190d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 191d979a051Sjeremylt CeedVector v, CeedRequest *request) { 192d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, 193d979a051Sjeremylt u, v, request); 194d979a051Sjeremylt } 195d979a051Sjeremylt 196d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, 197d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 198d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 199d979a051Sjeremylt CeedVector v, CeedRequest *request) { 200d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, 201d1d35e2fSjeremylt t_mode, u, v, request); 202d979a051Sjeremylt } 203d979a051Sjeremylt 204d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, 205d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 206d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 207d979a051Sjeremylt CeedVector v, CeedRequest *request) { 208d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, 209d979a051Sjeremylt u, v, request); 210d979a051Sjeremylt } 211d979a051Sjeremylt 212bf4d1581Sjeremylt // LCOV_EXCL_START 213d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, 214d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 215d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 216d979a051Sjeremylt CeedVector v, CeedRequest *request) { 217d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, 218d1d35e2fSjeremylt t_mode, u, v, request); 219d979a051Sjeremylt } 220bf4d1581Sjeremylt // LCOV_EXCL_STOP 221d979a051Sjeremylt 222d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, 223d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 224d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 225d979a051Sjeremylt CeedVector v, CeedRequest *request) { 226d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, 227d979a051Sjeremylt u, v, request); 228d979a051Sjeremylt } 229d979a051Sjeremylt 230bf4d1581Sjeremylt // LCOV_EXCL_START 231d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, 232d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 233d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 234d979a051Sjeremylt CeedVector v, CeedRequest *request) { 235d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, 236d1d35e2fSjeremylt t_mode, u, v, request); 237d979a051Sjeremylt } 238bf4d1581Sjeremylt // LCOV_EXCL_STOP 239d979a051Sjeremylt 240d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, 241d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 242d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 243d979a051Sjeremylt CeedVector v, CeedRequest *request) { 244d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, 2459c36149bSjeremylt u, v, request); 2464d2a38eeSjeremylt } 2474d2a38eeSjeremylt 248f10650afSjeremylt //------------------------------------------------------------------------------ 249f10650afSjeremylt // ElemRestriction Apply 250f10650afSjeremylt //------------------------------------------------------------------------------ 251be9261b7Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 252d1d35e2fSjeremylt CeedTransposeMode t_mode, CeedVector u, 253be9261b7Sjeremylt CeedVector v, CeedRequest *request) { 254be9261b7Sjeremylt int ierr; 255d1d35e2fSjeremylt CeedInt num_blk, blk_size, num_comp, comp_stride; 256d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr); 257d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 258d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 259d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 2607509a596Sjeremylt CeedElemRestriction_Ref *impl; 261e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 2624d2a38eeSjeremylt 263d1d35e2fSjeremylt return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v, 264d979a051Sjeremylt request); 2659c36149bSjeremylt } 266be9261b7Sjeremylt 267f10650afSjeremylt //------------------------------------------------------------------------------ 268f10650afSjeremylt // ElemRestriction Apply Block 269f10650afSjeremylt //------------------------------------------------------------------------------ 270be9261b7Sjeremylt static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 271d1d35e2fSjeremylt CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 272074cb416Sjeremylt CeedRequest *request) { 2734d2a38eeSjeremylt int ierr; 274d1d35e2fSjeremylt CeedInt blk_size, num_comp, comp_stride; 275d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 276d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 277d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 2787509a596Sjeremylt CeedElemRestriction_Ref *impl; 279e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 2804d2a38eeSjeremylt 281d1d35e2fSjeremylt return impl->Apply(r, num_comp, blk_size, comp_stride, block, block+1, t_mode, 282d1d35e2fSjeremylt u, v, request); 2839c36149bSjeremylt } 284be9261b7Sjeremylt 285f10650afSjeremylt //------------------------------------------------------------------------------ 286bd33150aSjeremylt // ElemRestriction Get Offsets 287bd33150aSjeremylt //------------------------------------------------------------------------------ 288bd33150aSjeremylt static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, 289d1d35e2fSjeremylt CeedMemType mem_type, const CeedInt **offsets) { 290bd33150aSjeremylt int ierr; 291bd33150aSjeremylt CeedElemRestriction_Ref *impl; 292e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(rstr, &impl); CeedChkBackend(ierr); 293bd33150aSjeremylt Ceed ceed; 294e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChkBackend(ierr); 295bd33150aSjeremylt 296d1d35e2fSjeremylt if (mem_type != CEED_MEM_HOST) 297bd33150aSjeremylt // LCOV_EXCL_START 298e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 299bd33150aSjeremylt // LCOV_EXCL_STOP 300bd33150aSjeremylt 301bd33150aSjeremylt *offsets = impl->offsets; 302e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 303bd33150aSjeremylt } 304bd33150aSjeremylt 305bd33150aSjeremylt //------------------------------------------------------------------------------ 306f10650afSjeremylt // ElemRestriction Destroy 307f10650afSjeremylt //------------------------------------------------------------------------------ 30821617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 30921617c04Sjeremylt int ierr; 310fe2413ffSjeremylt CeedElemRestriction_Ref *impl; 311e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 31221617c04Sjeremylt 313e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl->offsets_allocated); CeedChkBackend(ierr); 314e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl); CeedChkBackend(ierr); 315e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 31621617c04Sjeremylt } 31721617c04Sjeremylt 318f10650afSjeremylt //------------------------------------------------------------------------------ 319f10650afSjeremylt // ElemRestriction Create 320f10650afSjeremylt //------------------------------------------------------------------------------ 321d1d35e2fSjeremylt int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, 322d979a051Sjeremylt const CeedInt *offsets, 323d979a051Sjeremylt CeedElemRestriction r) { 32421617c04Sjeremylt int ierr; 32521617c04Sjeremylt CeedElemRestriction_Ref *impl; 326d1d35e2fSjeremylt CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 327d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 328d1d35e2fSjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 329d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr); 330d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 331d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 332d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 3334ce2993fSjeremylt Ceed ceed; 334e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 33521617c04Sjeremylt 336d1d35e2fSjeremylt if (mem_type != CEED_MEM_HOST) 337c042f62fSJeremy L Thompson // LCOV_EXCL_START 338e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 339c042f62fSJeremy L Thompson // LCOV_EXCL_STOP 340e15f9bd0SJeremy L Thompson ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 3413661185eSjeremylt 34292fe105eSJeremy L Thompson // Offsets data 343d1d35e2fSjeremylt bool is_strided; 344d1d35e2fSjeremylt ierr = CeedElemRestrictionIsStrided(r, &is_strided); CeedChkBackend(ierr); 345d1d35e2fSjeremylt if (!is_strided) { 34692fe105eSJeremy L Thompson // Check indices for ref or memcheck backends 347d1d35e2fSjeremylt Ceed parent_ceed = ceed, curr_ceed = NULL; 348d1d35e2fSjeremylt while (parent_ceed != curr_ceed) { 349d1d35e2fSjeremylt curr_ceed = parent_ceed; 350d1d35e2fSjeremylt ierr = CeedGetParent(curr_ceed, &parent_ceed); CeedChkBackend(ierr); 3513661185eSjeremylt } 3523661185eSjeremylt const char *resource; 353d1d35e2fSjeremylt ierr = CeedGetResource(parent_ceed, &resource); CeedChkBackend(ierr); 354d1d35e2fSjeremylt if (!strcmp(resource, "/cpu/self/ref/serial") || 355d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/ref/blocked") || 356d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/serial") || 357d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/blocked")) { 358e79b91d9SJeremy L Thompson CeedSize l_size; 359d1d35e2fSjeremylt ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 3603661185eSjeremylt 361d1d35e2fSjeremylt for (CeedInt i = 0; i < num_elem*elem_size; i++) 362d1d35e2fSjeremylt if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride) 3633661185eSjeremylt // LCOV_EXCL_START 364e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 365e15f9bd0SJeremy L Thompson "Restriction offset %d (%d) out of range " 366d1d35e2fSjeremylt "[0, %d]", i, offsets[i], l_size); 3673661185eSjeremylt // LCOV_EXCL_STOP 3683661185eSjeremylt } 3693661185eSjeremylt 37092fe105eSJeremy L Thompson // Copy data 371d1d35e2fSjeremylt switch (copy_mode) { 37221617c04Sjeremylt case CEED_COPY_VALUES: 373d1d35e2fSjeremylt ierr = CeedMalloc(num_elem*elem_size, &impl->offsets_allocated); 374e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 375d979a051Sjeremylt memcpy(impl->offsets_allocated, offsets, 376d1d35e2fSjeremylt num_elem * elem_size * sizeof(offsets[0])); 377d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 37821617c04Sjeremylt break; 37921617c04Sjeremylt case CEED_OWN_POINTER: 380d979a051Sjeremylt impl->offsets_allocated = (CeedInt *)offsets; 381d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 38221617c04Sjeremylt break; 38321617c04Sjeremylt case CEED_USE_POINTER: 384d979a051Sjeremylt impl->offsets = offsets; 38521617c04Sjeremylt } 38692fe105eSJeremy L Thompson } 387fe2413ffSjeremylt 388e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionSetData(r, impl); CeedChkBackend(ierr); 389d1d35e2fSjeremylt CeedInt layout[3] = {1, elem_size, elem_size*num_comp}; 390e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionSetELayout(r, layout); CeedChkBackend(ierr); 391fe2413ffSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 392e15f9bd0SJeremy L Thompson CeedElemRestrictionApply_Ref); CeedChkBackend(ierr); 393be9261b7Sjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 394be9261b7Sjeremylt CeedElemRestrictionApplyBlock_Ref); 395e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 396bd33150aSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", 397bd33150aSjeremylt CeedElemRestrictionGetOffsets_Ref); 398e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 399fe2413ffSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 400e15f9bd0SJeremy L Thompson CeedElemRestrictionDestroy_Ref); CeedChkBackend(ierr); 401d979a051Sjeremylt 402d1d35e2fSjeremylt // Set apply function based upon num_comp, blk_size, and comp_stride 403d979a051Sjeremylt CeedInt idx = -1; 404d1d35e2fSjeremylt if (blk_size < 10) 405d1d35e2fSjeremylt idx = 100*num_comp + 10*blk_size + (comp_stride == 1); 406d979a051Sjeremylt switch (idx) { 407d979a051Sjeremylt case 110: 408d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_110; 409d979a051Sjeremylt break; 410d979a051Sjeremylt case 111: 411d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_111; 412d979a051Sjeremylt break; 413d979a051Sjeremylt case 180: 414d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_180; 415d979a051Sjeremylt break; 416d979a051Sjeremylt case 181: 417d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_181; 418d979a051Sjeremylt break; 419d979a051Sjeremylt case 310: 420d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_310; 421d979a051Sjeremylt break; 422d979a051Sjeremylt case 311: 423d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_311; 424d979a051Sjeremylt break; 425d979a051Sjeremylt case 380: 426d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_380; 427d979a051Sjeremylt break; 428d979a051Sjeremylt case 381: 429d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_381; 430d979a051Sjeremylt break; 431bf4d1581Sjeremylt // LCOV_EXCL_START 432d979a051Sjeremylt case 510: 433d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_510; 434d979a051Sjeremylt break; 435bf4d1581Sjeremylt // LCOV_EXCL_STOP 436d979a051Sjeremylt case 511: 437d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_511; 438d979a051Sjeremylt break; 439bf4d1581Sjeremylt // LCOV_EXCL_START 440d979a051Sjeremylt case 580: 441d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_580; 442d979a051Sjeremylt break; 443bf4d1581Sjeremylt // LCOV_EXCL_STOP 444d979a051Sjeremylt case 581: 445d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_581; 446d979a051Sjeremylt break; 447d979a051Sjeremylt default: 448d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_Core; 449d979a051Sjeremylt break; 450d979a051Sjeremylt } 451d979a051Sjeremylt 452e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 45321617c04Sjeremylt } 454fc0567d9Srezgarshakeri 455fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 456fc0567d9Srezgarshakeri // ElemRestriction Create Oriented 457fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 458fc0567d9Srezgarshakeri int CeedElemRestrictionCreateOriented_Ref(CeedMemType mem_type, 459fc0567d9Srezgarshakeri CeedCopyMode copy_mode, 460fc0567d9Srezgarshakeri const CeedInt *offsets, const bool *orient, 461fc0567d9Srezgarshakeri CeedElemRestriction r) { 462fc0567d9Srezgarshakeri int ierr; 463fc0567d9Srezgarshakeri CeedElemRestriction_Ref *impl; 464fc0567d9Srezgarshakeri CeedInt num_elem, elem_size; 465fc0567d9Srezgarshakeri // Set up for normal restriction with explicit offsets. This sets up dispatch to 466fc0567d9Srezgarshakeri // CeedElemRestrictionApply_Ref_* and manages the impl->offsets array copy/allocation. 467fc0567d9Srezgarshakeri ierr = CeedElemRestrictionCreate_Ref(mem_type, copy_mode, offsets, r); 468fc0567d9Srezgarshakeri CeedChkBackend(ierr); 469fc0567d9Srezgarshakeri 470fc0567d9Srezgarshakeri ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 471fc0567d9Srezgarshakeri ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 472fc0567d9Srezgarshakeri ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 473fc0567d9Srezgarshakeri switch (copy_mode) { 474fc0567d9Srezgarshakeri case CEED_COPY_VALUES: 475fc0567d9Srezgarshakeri ierr = CeedMalloc(num_elem * elem_size, &impl->orient_allocated); 476fc0567d9Srezgarshakeri CeedChkBackend(ierr); 477fc0567d9Srezgarshakeri memcpy(impl->orient_allocated, orient, 478fc0567d9Srezgarshakeri num_elem * elem_size * sizeof(orient[0])); 479fc0567d9Srezgarshakeri impl->orient = impl->orient_allocated; 480fc0567d9Srezgarshakeri break; 481fc0567d9Srezgarshakeri case CEED_OWN_POINTER: 482fc0567d9Srezgarshakeri impl->orient_allocated = (bool *)orient; 483fc0567d9Srezgarshakeri impl->orient = impl->orient_allocated; 484fc0567d9Srezgarshakeri break; 485fc0567d9Srezgarshakeri case CEED_USE_POINTER: 486fc0567d9Srezgarshakeri impl->orient = orient; 487fc0567d9Srezgarshakeri } 488fc0567d9Srezgarshakeri return CEED_ERROR_SUCCESS; 489fc0567d9Srezgarshakeri } 490f10650afSjeremylt //------------------------------------------------------------------------------ 491