121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details. 421617c04Sjeremylt // 521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software 621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral 721617c04Sjeremylt // element discretizations for exascale applications. For more information and 821617c04Sjeremylt // source code availability see http://github.com/ceed. 921617c04Sjeremylt // 1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office 1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for 1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including 1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early 1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative. 1621617c04Sjeremylt 17ec3da8bcSJed Brown #include <ceed/ceed.h> 18ec3da8bcSJed Brown #include <ceed/backend.h> 193d576824SJeremy L Thompson #include <stdbool.h> 203d576824SJeremy L Thompson #include <string.h> 2121617c04Sjeremylt #include "ceed-ref.h" 2221617c04Sjeremylt 23f10650afSjeremylt //------------------------------------------------------------------------------ 24f10650afSjeremylt // Core ElemRestriction Apply Code 25f10650afSjeremylt //------------------------------------------------------------------------------ 26be9261b7Sjeremylt static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 27d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 28d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 299c36149bSjeremylt CeedVector v, CeedRequest *request) { 3021617c04Sjeremylt int ierr; 314ce2993fSjeremylt CeedElemRestriction_Ref *impl; 32e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 3321617c04Sjeremylt const CeedScalar *uu; 3421617c04Sjeremylt CeedScalar *vv; 35d1d35e2fSjeremylt CeedInt num_elem, elem_size, v_offset; 36d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 37d1d35e2fSjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 38d1d35e2fSjeremylt v_offset = start*blk_size*elem_size*num_comp; 3921617c04Sjeremylt 40e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr); 419c774eddSJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 429c774eddSJeremy L Thompson // Sum into for transpose mode, e-vec to l-vec 43e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr); 449c774eddSJeremy L Thompson } else { 459c774eddSJeremy L Thompson // Overwrite for notranspose mode, l-vec to e-vec 469c774eddSJeremy L Thompson ierr = CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr); 479c774eddSJeremy L Thompson } 487f90ec76Sjeremylt // Restriction from L-vector to E-vector 4921617c04Sjeremylt // Perform: v = r * u 50d1d35e2fSjeremylt if (t_mode == CEED_NOTRANSPOSE) { 51d979a051Sjeremylt // No offsets provided, Identity Restriction 52d979a051Sjeremylt if (!impl->offsets) { 53d1d35e2fSjeremylt bool has_backend_strides; 54d1d35e2fSjeremylt ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides); 55e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 56d1d35e2fSjeremylt if (has_backend_strides) { 57d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 587f90ec76Sjeremylt // This if branch is left separate to allow better inlining 59d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 60e1b98f6eSjeremylt CeedPragmaSIMD 61d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 621d79ecccSjeremylt CeedPragmaSIMD 63d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 647509a596Sjeremylt CeedPragmaSIMD 65d1d35e2fSjeremylt for (CeedInt j = 0; j < blk_size; j++) 66d1d35e2fSjeremylt vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset] 67d1d35e2fSjeremylt = uu[n + k*elem_size + 68d1d35e2fSjeremylt CeedIntMin(e+j, num_elem-1)*elem_size*num_comp]; 697f90ec76Sjeremylt } else { 707f90ec76Sjeremylt // User provided strides 717f90ec76Sjeremylt CeedInt strides[3]; 72e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 73d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 747f90ec76Sjeremylt CeedPragmaSIMD 75d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 767f90ec76Sjeremylt CeedPragmaSIMD 77d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 787f90ec76Sjeremylt CeedPragmaSIMD 79d1d35e2fSjeremylt for (CeedInt j = 0; j < blk_size; j++) 80d1d35e2fSjeremylt vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset] 817509a596Sjeremylt = uu[n*strides[0] + k*strides[1] + 82d1d35e2fSjeremylt CeedIntMin(e+j, num_elem-1)*strides[2]]; 837509a596Sjeremylt } 8421617c04Sjeremylt } else { 85d979a051Sjeremylt // Offsets provided, standard or blocked restriction 86d1d35e2fSjeremylt // vv has shape [elem_size, num_comp, num_elem], row-major 87d1d35e2fSjeremylt // uu has shape [nnodes, num_comp] 88d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 89e1b98f6eSjeremylt CeedPragmaSIMD 90d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 91e1b98f6eSjeremylt CeedPragmaSIMD 92d1d35e2fSjeremylt for (CeedInt i = 0; i < elem_size*blk_size; i++) 93d1d35e2fSjeremylt vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset] 94*fc0567d9Srezgarshakeri = uu[impl->offsets[i+elem_size*e] + k*comp_stride] * 95*fc0567d9Srezgarshakeri (impl->orient && impl->orient[i+elem_size*e] ? -1. : 1.); 9621617c04Sjeremylt } 9721617c04Sjeremylt } else { 987f90ec76Sjeremylt // Restriction from E-vector to L-vector 998d94b059Sjeremylt // Performing v += r^T * u 100d979a051Sjeremylt // No offsets provided, Identity Restriction 101d979a051Sjeremylt if (!impl->offsets) { 102d1d35e2fSjeremylt bool has_backend_strides; 103d1d35e2fSjeremylt ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides); 104e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 105d1d35e2fSjeremylt if (has_backend_strides) { 106d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 1077f90ec76Sjeremylt // This if brach is left separate to allow better inlining 108d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 109523b8ea0Sjeremylt CeedPragmaSIMD 110d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 1117509a596Sjeremylt CeedPragmaSIMD 112d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 1137f90ec76Sjeremylt CeedPragmaSIMD 114d1d35e2fSjeremylt for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++) 115d1d35e2fSjeremylt vv[n + k*elem_size + (e+j)*elem_size*num_comp] 116d1d35e2fSjeremylt += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]; 1177f90ec76Sjeremylt } else { 1187f90ec76Sjeremylt // User provided strides 1197f90ec76Sjeremylt CeedInt strides[3]; 120e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 121d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 1227f90ec76Sjeremylt CeedPragmaSIMD 123d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 1247f90ec76Sjeremylt CeedPragmaSIMD 125d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 1267f90ec76Sjeremylt CeedPragmaSIMD 127d1d35e2fSjeremylt for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++) 1287509a596Sjeremylt vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]] 129d1d35e2fSjeremylt += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]; 130523b8ea0Sjeremylt } 13121617c04Sjeremylt } else { 132d979a051Sjeremylt // Offsets provided, standard or blocked restriction 133d1d35e2fSjeremylt // uu has shape [elem_size, num_comp, num_elem] 134d1d35e2fSjeremylt // vv has shape [nnodes, num_comp] 135d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 136d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 137d1d35e2fSjeremylt for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size) 1388d94b059Sjeremylt // Iteration bound set to discard padding elements 139d1d35e2fSjeremylt for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++) 140d1d35e2fSjeremylt vv[impl->offsets[j+e*elem_size] + k*comp_stride] 141*fc0567d9Srezgarshakeri += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset] * 142*fc0567d9Srezgarshakeri (impl->orient && impl->orient[j+e*elem_size] ? -1. : 1.); 14321617c04Sjeremylt } 14421617c04Sjeremylt } 145e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr); 146e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArray(v, &vv); CeedChkBackend(ierr); 14721617c04Sjeremylt if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 14821617c04Sjeremylt *request = NULL; 149e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 15021617c04Sjeremylt } 15121617c04Sjeremylt 152f10650afSjeremylt //------------------------------------------------------------------------------ 153f10650afSjeremylt // ElemRestriction Apply - Common Sizes 154f10650afSjeremylt //------------------------------------------------------------------------------ 155d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, 156d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 157d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 158d979a051Sjeremylt CeedVector v, CeedRequest *request) { 159d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, 160d1d35e2fSjeremylt t_mode, u, v, request); 161d979a051Sjeremylt } 162d979a051Sjeremylt 163d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, 164d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 165d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 166d979a051Sjeremylt CeedVector v, CeedRequest *request) { 167d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, 1689c36149bSjeremylt u, v, request); 1694d2a38eeSjeremylt } 1704d2a38eeSjeremylt 171d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, 172d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 173d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 174d979a051Sjeremylt CeedVector v, CeedRequest *request) { 175d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, 176d1d35e2fSjeremylt t_mode, u, v, request); 1779c36149bSjeremylt } 1789c36149bSjeremylt 179d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, 180d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 181d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 182d979a051Sjeremylt CeedVector v, CeedRequest *request) { 183d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, 1849c36149bSjeremylt u, v, request); 1859c36149bSjeremylt } 1869c36149bSjeremylt 187d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, 188d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 189d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 190d979a051Sjeremylt CeedVector v, CeedRequest *request) { 191d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, 192d1d35e2fSjeremylt t_mode, u, v, request); 193d979a051Sjeremylt } 194d979a051Sjeremylt 195d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, 196d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 197d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 198d979a051Sjeremylt CeedVector v, CeedRequest *request) { 199d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, 200d979a051Sjeremylt u, v, request); 201d979a051Sjeremylt } 202d979a051Sjeremylt 203d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, 204d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 205d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 206d979a051Sjeremylt CeedVector v, CeedRequest *request) { 207d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, 208d1d35e2fSjeremylt t_mode, u, v, request); 209d979a051Sjeremylt } 210d979a051Sjeremylt 211d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, 212d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 213d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 214d979a051Sjeremylt CeedVector v, CeedRequest *request) { 215d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, 216d979a051Sjeremylt u, v, request); 217d979a051Sjeremylt } 218d979a051Sjeremylt 219bf4d1581Sjeremylt // LCOV_EXCL_START 220d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, 221d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 222d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 223d979a051Sjeremylt CeedVector v, CeedRequest *request) { 224d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, 225d1d35e2fSjeremylt t_mode, u, v, request); 226d979a051Sjeremylt } 227bf4d1581Sjeremylt // LCOV_EXCL_STOP 228d979a051Sjeremylt 229d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, 230d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 231d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 232d979a051Sjeremylt CeedVector v, CeedRequest *request) { 233d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, 234d979a051Sjeremylt u, v, request); 235d979a051Sjeremylt } 236d979a051Sjeremylt 237bf4d1581Sjeremylt // LCOV_EXCL_START 238d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, 239d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 240d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 241d979a051Sjeremylt CeedVector v, CeedRequest *request) { 242d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, 243d1d35e2fSjeremylt t_mode, u, v, request); 244d979a051Sjeremylt } 245bf4d1581Sjeremylt // LCOV_EXCL_STOP 246d979a051Sjeremylt 247d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, 248d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 249d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 250d979a051Sjeremylt CeedVector v, CeedRequest *request) { 251d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, 2529c36149bSjeremylt u, v, request); 2534d2a38eeSjeremylt } 2544d2a38eeSjeremylt 255f10650afSjeremylt //------------------------------------------------------------------------------ 256f10650afSjeremylt // ElemRestriction Apply 257f10650afSjeremylt //------------------------------------------------------------------------------ 258be9261b7Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 259d1d35e2fSjeremylt CeedTransposeMode t_mode, CeedVector u, 260be9261b7Sjeremylt CeedVector v, CeedRequest *request) { 261be9261b7Sjeremylt int ierr; 262d1d35e2fSjeremylt CeedInt num_blk, blk_size, num_comp, comp_stride; 263d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr); 264d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 265d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 266d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 2677509a596Sjeremylt CeedElemRestriction_Ref *impl; 268e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 2694d2a38eeSjeremylt 270d1d35e2fSjeremylt return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v, 271d979a051Sjeremylt request); 2729c36149bSjeremylt } 273be9261b7Sjeremylt 274f10650afSjeremylt //------------------------------------------------------------------------------ 275f10650afSjeremylt // ElemRestriction Apply Block 276f10650afSjeremylt //------------------------------------------------------------------------------ 277be9261b7Sjeremylt static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 278d1d35e2fSjeremylt CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 279074cb416Sjeremylt CeedRequest *request) { 2804d2a38eeSjeremylt int ierr; 281d1d35e2fSjeremylt CeedInt blk_size, num_comp, comp_stride; 282d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 283d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 284d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 2857509a596Sjeremylt CeedElemRestriction_Ref *impl; 286e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 2874d2a38eeSjeremylt 288d1d35e2fSjeremylt return impl->Apply(r, num_comp, blk_size, comp_stride, block, block+1, t_mode, 289d1d35e2fSjeremylt u, v, request); 2909c36149bSjeremylt } 291be9261b7Sjeremylt 292f10650afSjeremylt //------------------------------------------------------------------------------ 293bd33150aSjeremylt // ElemRestriction Get Offsets 294bd33150aSjeremylt //------------------------------------------------------------------------------ 295bd33150aSjeremylt static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, 296d1d35e2fSjeremylt CeedMemType mem_type, const CeedInt **offsets) { 297bd33150aSjeremylt int ierr; 298bd33150aSjeremylt CeedElemRestriction_Ref *impl; 299e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(rstr, &impl); CeedChkBackend(ierr); 300bd33150aSjeremylt Ceed ceed; 301e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChkBackend(ierr); 302bd33150aSjeremylt 303d1d35e2fSjeremylt if (mem_type != CEED_MEM_HOST) 304bd33150aSjeremylt // LCOV_EXCL_START 305e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 306bd33150aSjeremylt // LCOV_EXCL_STOP 307bd33150aSjeremylt 308bd33150aSjeremylt *offsets = impl->offsets; 309e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 310bd33150aSjeremylt } 311bd33150aSjeremylt 312bd33150aSjeremylt //------------------------------------------------------------------------------ 313f10650afSjeremylt // ElemRestriction Destroy 314f10650afSjeremylt //------------------------------------------------------------------------------ 31521617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 31621617c04Sjeremylt int ierr; 317fe2413ffSjeremylt CeedElemRestriction_Ref *impl; 318e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 31921617c04Sjeremylt 320e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl->offsets_allocated); CeedChkBackend(ierr); 321e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl); CeedChkBackend(ierr); 322e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 32321617c04Sjeremylt } 32421617c04Sjeremylt 325f10650afSjeremylt //------------------------------------------------------------------------------ 326f10650afSjeremylt // ElemRestriction Create 327f10650afSjeremylt //------------------------------------------------------------------------------ 328d1d35e2fSjeremylt int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, 329d979a051Sjeremylt const CeedInt *offsets, 330d979a051Sjeremylt CeedElemRestriction r) { 33121617c04Sjeremylt int ierr; 33221617c04Sjeremylt CeedElemRestriction_Ref *impl; 333d1d35e2fSjeremylt CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 334d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 335d1d35e2fSjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 336d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr); 337d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 338d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 339d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 3404ce2993fSjeremylt Ceed ceed; 341e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 34221617c04Sjeremylt 343d1d35e2fSjeremylt if (mem_type != CEED_MEM_HOST) 344c042f62fSJeremy L Thompson // LCOV_EXCL_START 345e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 346c042f62fSJeremy L Thompson // LCOV_EXCL_STOP 347e15f9bd0SJeremy L Thompson ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 3483661185eSjeremylt 34992fe105eSJeremy L Thompson // Offsets data 350d1d35e2fSjeremylt bool is_strided; 351d1d35e2fSjeremylt ierr = CeedElemRestrictionIsStrided(r, &is_strided); CeedChkBackend(ierr); 352d1d35e2fSjeremylt if (!is_strided) { 35392fe105eSJeremy L Thompson // Check indices for ref or memcheck backends 354d1d35e2fSjeremylt Ceed parent_ceed = ceed, curr_ceed = NULL; 355d1d35e2fSjeremylt while (parent_ceed != curr_ceed) { 356d1d35e2fSjeremylt curr_ceed = parent_ceed; 357d1d35e2fSjeremylt ierr = CeedGetParent(curr_ceed, &parent_ceed); CeedChkBackend(ierr); 3583661185eSjeremylt } 3593661185eSjeremylt const char *resource; 360d1d35e2fSjeremylt ierr = CeedGetResource(parent_ceed, &resource); CeedChkBackend(ierr); 361d1d35e2fSjeremylt if (!strcmp(resource, "/cpu/self/ref/serial") || 362d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/ref/blocked") || 363d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/serial") || 364d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/blocked")) { 365d1d35e2fSjeremylt CeedInt l_size; 366d1d35e2fSjeremylt ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 3673661185eSjeremylt 368d1d35e2fSjeremylt for (CeedInt i = 0; i < num_elem*elem_size; i++) 369d1d35e2fSjeremylt if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride) 3703661185eSjeremylt // LCOV_EXCL_START 371e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 372e15f9bd0SJeremy L Thompson "Restriction offset %d (%d) out of range " 373d1d35e2fSjeremylt "[0, %d]", i, offsets[i], l_size); 3743661185eSjeremylt // LCOV_EXCL_STOP 3753661185eSjeremylt } 3763661185eSjeremylt 37792fe105eSJeremy L Thompson // Copy data 378d1d35e2fSjeremylt switch (copy_mode) { 37921617c04Sjeremylt case CEED_COPY_VALUES: 380d1d35e2fSjeremylt ierr = CeedMalloc(num_elem*elem_size, &impl->offsets_allocated); 381e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 382d979a051Sjeremylt memcpy(impl->offsets_allocated, offsets, 383d1d35e2fSjeremylt num_elem * elem_size * sizeof(offsets[0])); 384d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 38521617c04Sjeremylt break; 38621617c04Sjeremylt case CEED_OWN_POINTER: 387d979a051Sjeremylt impl->offsets_allocated = (CeedInt *)offsets; 388d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 38921617c04Sjeremylt break; 39021617c04Sjeremylt case CEED_USE_POINTER: 391d979a051Sjeremylt impl->offsets = offsets; 39221617c04Sjeremylt } 39392fe105eSJeremy L Thompson } 394fe2413ffSjeremylt 395e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionSetData(r, impl); CeedChkBackend(ierr); 396d1d35e2fSjeremylt CeedInt layout[3] = {1, elem_size, elem_size*num_comp}; 397e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionSetELayout(r, layout); CeedChkBackend(ierr); 398fe2413ffSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 399e15f9bd0SJeremy L Thompson CeedElemRestrictionApply_Ref); CeedChkBackend(ierr); 400be9261b7Sjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 401be9261b7Sjeremylt CeedElemRestrictionApplyBlock_Ref); 402e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 403bd33150aSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", 404bd33150aSjeremylt CeedElemRestrictionGetOffsets_Ref); 405e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 406fe2413ffSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 407e15f9bd0SJeremy L Thompson CeedElemRestrictionDestroy_Ref); CeedChkBackend(ierr); 408d979a051Sjeremylt 409d1d35e2fSjeremylt // Set apply function based upon num_comp, blk_size, and comp_stride 410d979a051Sjeremylt CeedInt idx = -1; 411d1d35e2fSjeremylt if (blk_size < 10) 412d1d35e2fSjeremylt idx = 100*num_comp + 10*blk_size + (comp_stride == 1); 413d979a051Sjeremylt switch (idx) { 414d979a051Sjeremylt case 110: 415d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_110; 416d979a051Sjeremylt break; 417d979a051Sjeremylt case 111: 418d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_111; 419d979a051Sjeremylt break; 420d979a051Sjeremylt case 180: 421d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_180; 422d979a051Sjeremylt break; 423d979a051Sjeremylt case 181: 424d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_181; 425d979a051Sjeremylt break; 426d979a051Sjeremylt case 310: 427d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_310; 428d979a051Sjeremylt break; 429d979a051Sjeremylt case 311: 430d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_311; 431d979a051Sjeremylt break; 432d979a051Sjeremylt case 380: 433d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_380; 434d979a051Sjeremylt break; 435d979a051Sjeremylt case 381: 436d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_381; 437d979a051Sjeremylt break; 438bf4d1581Sjeremylt // LCOV_EXCL_START 439d979a051Sjeremylt case 510: 440d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_510; 441d979a051Sjeremylt break; 442bf4d1581Sjeremylt // LCOV_EXCL_STOP 443d979a051Sjeremylt case 511: 444d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_511; 445d979a051Sjeremylt break; 446bf4d1581Sjeremylt // LCOV_EXCL_START 447d979a051Sjeremylt case 580: 448d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_580; 449d979a051Sjeremylt break; 450bf4d1581Sjeremylt // LCOV_EXCL_STOP 451d979a051Sjeremylt case 581: 452d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_581; 453d979a051Sjeremylt break; 454d979a051Sjeremylt default: 455d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_Core; 456d979a051Sjeremylt break; 457d979a051Sjeremylt } 458d979a051Sjeremylt 459e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 46021617c04Sjeremylt } 461*fc0567d9Srezgarshakeri 462*fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 463*fc0567d9Srezgarshakeri // ElemRestriction Create Oriented 464*fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 465*fc0567d9Srezgarshakeri int CeedElemRestrictionCreateOriented_Ref(CeedMemType mem_type, 466*fc0567d9Srezgarshakeri CeedCopyMode copy_mode, 467*fc0567d9Srezgarshakeri const CeedInt *offsets, const bool *orient, 468*fc0567d9Srezgarshakeri CeedElemRestriction r) { 469*fc0567d9Srezgarshakeri int ierr; 470*fc0567d9Srezgarshakeri CeedElemRestriction_Ref *impl; 471*fc0567d9Srezgarshakeri CeedInt num_elem, elem_size; 472*fc0567d9Srezgarshakeri // Set up for normal restriction with explicit offsets. This sets up dispatch to 473*fc0567d9Srezgarshakeri // CeedElemRestrictionApply_Ref_* and manages the impl->offsets array copy/allocation. 474*fc0567d9Srezgarshakeri ierr = CeedElemRestrictionCreate_Ref(mem_type, copy_mode, offsets, r); 475*fc0567d9Srezgarshakeri CeedChkBackend(ierr); 476*fc0567d9Srezgarshakeri 477*fc0567d9Srezgarshakeri ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 478*fc0567d9Srezgarshakeri ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 479*fc0567d9Srezgarshakeri ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 480*fc0567d9Srezgarshakeri switch (copy_mode) { 481*fc0567d9Srezgarshakeri case CEED_COPY_VALUES: 482*fc0567d9Srezgarshakeri ierr = CeedMalloc(num_elem * elem_size, &impl->orient_allocated); 483*fc0567d9Srezgarshakeri CeedChkBackend(ierr); 484*fc0567d9Srezgarshakeri memcpy(impl->orient_allocated, orient, 485*fc0567d9Srezgarshakeri num_elem * elem_size * sizeof(orient[0])); 486*fc0567d9Srezgarshakeri impl->orient = impl->orient_allocated; 487*fc0567d9Srezgarshakeri break; 488*fc0567d9Srezgarshakeri case CEED_OWN_POINTER: 489*fc0567d9Srezgarshakeri impl->orient_allocated = (bool *)orient; 490*fc0567d9Srezgarshakeri impl->orient = impl->orient_allocated; 491*fc0567d9Srezgarshakeri break; 492*fc0567d9Srezgarshakeri case CEED_USE_POINTER: 493*fc0567d9Srezgarshakeri impl->orient = orient; 494*fc0567d9Srezgarshakeri } 495*fc0567d9Srezgarshakeri return CEED_ERROR_SUCCESS; 496*fc0567d9Srezgarshakeri } 497f10650afSjeremylt //------------------------------------------------------------------------------ 498