121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details. 421617c04Sjeremylt // 521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software 621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral 721617c04Sjeremylt // element discretizations for exascale applications. For more information and 821617c04Sjeremylt // source code availability see http://github.com/ceed. 921617c04Sjeremylt // 1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office 1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for 1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including 1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early 1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative. 1621617c04Sjeremylt 17ec3da8bcSJed Brown #include <ceed/ceed.h> 18ec3da8bcSJed Brown #include <ceed/backend.h> 193d576824SJeremy L Thompson #include <stdbool.h> 203d576824SJeremy L Thompson #include <string.h> 2121617c04Sjeremylt #include "ceed-ref.h" 2221617c04Sjeremylt 23f10650afSjeremylt //------------------------------------------------------------------------------ 24f10650afSjeremylt // Core ElemRestriction Apply Code 25f10650afSjeremylt //------------------------------------------------------------------------------ 26be9261b7Sjeremylt static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 27d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 28d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 299c36149bSjeremylt CeedVector v, CeedRequest *request) { 3021617c04Sjeremylt int ierr; 314ce2993fSjeremylt CeedElemRestriction_Ref *impl; 32e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 3321617c04Sjeremylt const CeedScalar *uu; 3421617c04Sjeremylt CeedScalar *vv; 35d1d35e2fSjeremylt CeedInt num_elem, elem_size, v_offset; 36d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 37d1d35e2fSjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 38d1d35e2fSjeremylt v_offset = start*blk_size*elem_size*num_comp; 3921617c04Sjeremylt 40e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr); 41*9c774eddSJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 42*9c774eddSJeremy L Thompson // Sum into for transpose mode, e-vec to l-vec 43e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr); 44*9c774eddSJeremy L Thompson } else { 45*9c774eddSJeremy L Thompson // Overwrite for notranspose mode, l-vec to e-vec 46*9c774eddSJeremy L Thompson ierr = CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr); 47*9c774eddSJeremy L Thompson } 487f90ec76Sjeremylt // Restriction from L-vector to E-vector 4921617c04Sjeremylt // Perform: v = r * u 50d1d35e2fSjeremylt if (t_mode == CEED_NOTRANSPOSE) { 51d979a051Sjeremylt // No offsets provided, Identity Restriction 52d979a051Sjeremylt if (!impl->offsets) { 53d1d35e2fSjeremylt bool has_backend_strides; 54d1d35e2fSjeremylt ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides); 55e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 56d1d35e2fSjeremylt if (has_backend_strides) { 57d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 587f90ec76Sjeremylt // This if branch is left separate to allow better inlining 59d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 60e1b98f6eSjeremylt CeedPragmaSIMD 61d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 621d79ecccSjeremylt CeedPragmaSIMD 63d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 647509a596Sjeremylt CeedPragmaSIMD 65d1d35e2fSjeremylt for (CeedInt j = 0; j < blk_size; j++) 66d1d35e2fSjeremylt vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset] 67d1d35e2fSjeremylt = uu[n + k*elem_size + 68d1d35e2fSjeremylt CeedIntMin(e+j, num_elem-1)*elem_size*num_comp]; 697f90ec76Sjeremylt } else { 707f90ec76Sjeremylt // User provided strides 717f90ec76Sjeremylt CeedInt strides[3]; 72e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 73d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 747f90ec76Sjeremylt CeedPragmaSIMD 75d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 767f90ec76Sjeremylt CeedPragmaSIMD 77d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 787f90ec76Sjeremylt CeedPragmaSIMD 79d1d35e2fSjeremylt for (CeedInt j = 0; j < blk_size; j++) 80d1d35e2fSjeremylt vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset] 817509a596Sjeremylt = uu[n*strides[0] + k*strides[1] + 82d1d35e2fSjeremylt CeedIntMin(e+j, num_elem-1)*strides[2]]; 837509a596Sjeremylt } 8421617c04Sjeremylt } else { 85d979a051Sjeremylt // Offsets provided, standard or blocked restriction 86d1d35e2fSjeremylt // vv has shape [elem_size, num_comp, num_elem], row-major 87d1d35e2fSjeremylt // uu has shape [nnodes, num_comp] 88d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 89e1b98f6eSjeremylt CeedPragmaSIMD 90d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 91e1b98f6eSjeremylt CeedPragmaSIMD 92d1d35e2fSjeremylt for (CeedInt i = 0; i < elem_size*blk_size; i++) 93d1d35e2fSjeremylt vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset] 94d1d35e2fSjeremylt = uu[impl->offsets[i+elem_size*e] + k*comp_stride]; 9521617c04Sjeremylt } 9621617c04Sjeremylt } else { 977f90ec76Sjeremylt // Restriction from E-vector to L-vector 988d94b059Sjeremylt // Performing v += r^T * u 99d979a051Sjeremylt // No offsets provided, Identity Restriction 100d979a051Sjeremylt if (!impl->offsets) { 101d1d35e2fSjeremylt bool has_backend_strides; 102d1d35e2fSjeremylt ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides); 103e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 104d1d35e2fSjeremylt if (has_backend_strides) { 105d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 1067f90ec76Sjeremylt // This if brach is left separate to allow better inlining 107d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 108523b8ea0Sjeremylt CeedPragmaSIMD 109d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 1107509a596Sjeremylt CeedPragmaSIMD 111d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 1127f90ec76Sjeremylt CeedPragmaSIMD 113d1d35e2fSjeremylt for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++) 114d1d35e2fSjeremylt vv[n + k*elem_size + (e+j)*elem_size*num_comp] 115d1d35e2fSjeremylt += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]; 1167f90ec76Sjeremylt } else { 1177f90ec76Sjeremylt // User provided strides 1187f90ec76Sjeremylt CeedInt strides[3]; 119e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 120d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 1217f90ec76Sjeremylt CeedPragmaSIMD 122d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 1237f90ec76Sjeremylt CeedPragmaSIMD 124d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 1257f90ec76Sjeremylt CeedPragmaSIMD 126d1d35e2fSjeremylt for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++) 1277509a596Sjeremylt vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]] 128d1d35e2fSjeremylt += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]; 129523b8ea0Sjeremylt } 13021617c04Sjeremylt } else { 131d979a051Sjeremylt // Offsets provided, standard or blocked restriction 132d1d35e2fSjeremylt // uu has shape [elem_size, num_comp, num_elem] 133d1d35e2fSjeremylt // vv has shape [nnodes, num_comp] 134d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 135d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 136d1d35e2fSjeremylt for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size) 1378d94b059Sjeremylt // Iteration bound set to discard padding elements 138d1d35e2fSjeremylt for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++) 139d1d35e2fSjeremylt vv[impl->offsets[j+e*elem_size] + k*comp_stride] 140d1d35e2fSjeremylt += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset]; 14121617c04Sjeremylt } 14221617c04Sjeremylt } 143e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr); 144e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArray(v, &vv); CeedChkBackend(ierr); 14521617c04Sjeremylt if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 14621617c04Sjeremylt *request = NULL; 147e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 14821617c04Sjeremylt } 14921617c04Sjeremylt 150f10650afSjeremylt //------------------------------------------------------------------------------ 151f10650afSjeremylt // ElemRestriction Apply - Common Sizes 152f10650afSjeremylt //------------------------------------------------------------------------------ 153d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, 154d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 155d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 156d979a051Sjeremylt CeedVector v, CeedRequest *request) { 157d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, 158d1d35e2fSjeremylt t_mode, u, v, request); 159d979a051Sjeremylt } 160d979a051Sjeremylt 161d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, 162d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 163d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 164d979a051Sjeremylt CeedVector v, CeedRequest *request) { 165d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, 1669c36149bSjeremylt u, v, request); 1674d2a38eeSjeremylt } 1684d2a38eeSjeremylt 169d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, 170d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 171d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 172d979a051Sjeremylt CeedVector v, CeedRequest *request) { 173d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, 174d1d35e2fSjeremylt t_mode, u, v, request); 1759c36149bSjeremylt } 1769c36149bSjeremylt 177d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, 178d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 179d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 180d979a051Sjeremylt CeedVector v, CeedRequest *request) { 181d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, 1829c36149bSjeremylt u, v, request); 1839c36149bSjeremylt } 1849c36149bSjeremylt 185d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, 186d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 187d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 188d979a051Sjeremylt CeedVector v, CeedRequest *request) { 189d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, 190d1d35e2fSjeremylt t_mode, u, v, request); 191d979a051Sjeremylt } 192d979a051Sjeremylt 193d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, 194d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 195d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 196d979a051Sjeremylt CeedVector v, CeedRequest *request) { 197d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, 198d979a051Sjeremylt u, v, request); 199d979a051Sjeremylt } 200d979a051Sjeremylt 201d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, 202d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 203d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 204d979a051Sjeremylt CeedVector v, CeedRequest *request) { 205d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, 206d1d35e2fSjeremylt t_mode, u, v, request); 207d979a051Sjeremylt } 208d979a051Sjeremylt 209d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, 210d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 211d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 212d979a051Sjeremylt CeedVector v, CeedRequest *request) { 213d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, 214d979a051Sjeremylt u, v, request); 215d979a051Sjeremylt } 216d979a051Sjeremylt 217bf4d1581Sjeremylt // LCOV_EXCL_START 218d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, 219d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 220d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 221d979a051Sjeremylt CeedVector v, CeedRequest *request) { 222d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, 223d1d35e2fSjeremylt t_mode, u, v, request); 224d979a051Sjeremylt } 225bf4d1581Sjeremylt // LCOV_EXCL_STOP 226d979a051Sjeremylt 227d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, 228d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 229d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 230d979a051Sjeremylt CeedVector v, CeedRequest *request) { 231d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, 232d979a051Sjeremylt u, v, request); 233d979a051Sjeremylt } 234d979a051Sjeremylt 235bf4d1581Sjeremylt // LCOV_EXCL_START 236d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, 237d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 238d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 239d979a051Sjeremylt CeedVector v, CeedRequest *request) { 240d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, 241d1d35e2fSjeremylt t_mode, u, v, request); 242d979a051Sjeremylt } 243bf4d1581Sjeremylt // LCOV_EXCL_STOP 244d979a051Sjeremylt 245d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, 246d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 247d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 248d979a051Sjeremylt CeedVector v, CeedRequest *request) { 249d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, 2509c36149bSjeremylt u, v, request); 2514d2a38eeSjeremylt } 2524d2a38eeSjeremylt 253f10650afSjeremylt //------------------------------------------------------------------------------ 254f10650afSjeremylt // ElemRestriction Apply 255f10650afSjeremylt //------------------------------------------------------------------------------ 256be9261b7Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 257d1d35e2fSjeremylt CeedTransposeMode t_mode, CeedVector u, 258be9261b7Sjeremylt CeedVector v, CeedRequest *request) { 259be9261b7Sjeremylt int ierr; 260d1d35e2fSjeremylt CeedInt num_blk, blk_size, num_comp, comp_stride; 261d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr); 262d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 263d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 264d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 2657509a596Sjeremylt CeedElemRestriction_Ref *impl; 266e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 2674d2a38eeSjeremylt 268d1d35e2fSjeremylt return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v, 269d979a051Sjeremylt request); 2709c36149bSjeremylt } 271be9261b7Sjeremylt 272f10650afSjeremylt //------------------------------------------------------------------------------ 273f10650afSjeremylt // ElemRestriction Apply Block 274f10650afSjeremylt //------------------------------------------------------------------------------ 275be9261b7Sjeremylt static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 276d1d35e2fSjeremylt CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 277074cb416Sjeremylt CeedRequest *request) { 2784d2a38eeSjeremylt int ierr; 279d1d35e2fSjeremylt CeedInt blk_size, num_comp, comp_stride; 280d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 281d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 282d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 2837509a596Sjeremylt CeedElemRestriction_Ref *impl; 284e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 2854d2a38eeSjeremylt 286d1d35e2fSjeremylt return impl->Apply(r, num_comp, blk_size, comp_stride, block, block+1, t_mode, 287d1d35e2fSjeremylt u, v, request); 2889c36149bSjeremylt } 289be9261b7Sjeremylt 290f10650afSjeremylt //------------------------------------------------------------------------------ 291bd33150aSjeremylt // ElemRestriction Get Offsets 292bd33150aSjeremylt //------------------------------------------------------------------------------ 293bd33150aSjeremylt static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, 294d1d35e2fSjeremylt CeedMemType mem_type, const CeedInt **offsets) { 295bd33150aSjeremylt int ierr; 296bd33150aSjeremylt CeedElemRestriction_Ref *impl; 297e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(rstr, &impl); CeedChkBackend(ierr); 298bd33150aSjeremylt Ceed ceed; 299e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChkBackend(ierr); 300bd33150aSjeremylt 301d1d35e2fSjeremylt if (mem_type != CEED_MEM_HOST) 302bd33150aSjeremylt // LCOV_EXCL_START 303e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 304bd33150aSjeremylt // LCOV_EXCL_STOP 305bd33150aSjeremylt 306bd33150aSjeremylt *offsets = impl->offsets; 307e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 308bd33150aSjeremylt } 309bd33150aSjeremylt 310bd33150aSjeremylt //------------------------------------------------------------------------------ 311f10650afSjeremylt // ElemRestriction Destroy 312f10650afSjeremylt //------------------------------------------------------------------------------ 31321617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 31421617c04Sjeremylt int ierr; 315fe2413ffSjeremylt CeedElemRestriction_Ref *impl; 316e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 31721617c04Sjeremylt 318e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl->offsets_allocated); CeedChkBackend(ierr); 319e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl); CeedChkBackend(ierr); 320e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 32121617c04Sjeremylt } 32221617c04Sjeremylt 323f10650afSjeremylt //------------------------------------------------------------------------------ 324f10650afSjeremylt // ElemRestriction Create 325f10650afSjeremylt //------------------------------------------------------------------------------ 326d1d35e2fSjeremylt int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, 327d979a051Sjeremylt const CeedInt *offsets, 328d979a051Sjeremylt CeedElemRestriction r) { 32921617c04Sjeremylt int ierr; 33021617c04Sjeremylt CeedElemRestriction_Ref *impl; 331d1d35e2fSjeremylt CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 332d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 333d1d35e2fSjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 334d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr); 335d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 336d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 337d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 3384ce2993fSjeremylt Ceed ceed; 339e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 34021617c04Sjeremylt 341d1d35e2fSjeremylt if (mem_type != CEED_MEM_HOST) 342c042f62fSJeremy L Thompson // LCOV_EXCL_START 343e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 344c042f62fSJeremy L Thompson // LCOV_EXCL_STOP 345e15f9bd0SJeremy L Thompson ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 3463661185eSjeremylt 34792fe105eSJeremy L Thompson // Offsets data 348d1d35e2fSjeremylt bool is_strided; 349d1d35e2fSjeremylt ierr = CeedElemRestrictionIsStrided(r, &is_strided); CeedChkBackend(ierr); 350d1d35e2fSjeremylt if (!is_strided) { 35192fe105eSJeremy L Thompson // Check indices for ref or memcheck backends 352d1d35e2fSjeremylt Ceed parent_ceed = ceed, curr_ceed = NULL; 353d1d35e2fSjeremylt while (parent_ceed != curr_ceed) { 354d1d35e2fSjeremylt curr_ceed = parent_ceed; 355d1d35e2fSjeremylt ierr = CeedGetParent(curr_ceed, &parent_ceed); CeedChkBackend(ierr); 3563661185eSjeremylt } 3573661185eSjeremylt const char *resource; 358d1d35e2fSjeremylt ierr = CeedGetResource(parent_ceed, &resource); CeedChkBackend(ierr); 359d1d35e2fSjeremylt if (!strcmp(resource, "/cpu/self/ref/serial") || 360d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/ref/blocked") || 361d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/serial") || 362d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/blocked")) { 363d1d35e2fSjeremylt CeedInt l_size; 364d1d35e2fSjeremylt ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 3653661185eSjeremylt 366d1d35e2fSjeremylt for (CeedInt i = 0; i < num_elem*elem_size; i++) 367d1d35e2fSjeremylt if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride) 3683661185eSjeremylt // LCOV_EXCL_START 369e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 370e15f9bd0SJeremy L Thompson "Restriction offset %d (%d) out of range " 371d1d35e2fSjeremylt "[0, %d]", i, offsets[i], l_size); 3723661185eSjeremylt // LCOV_EXCL_STOP 3733661185eSjeremylt } 3743661185eSjeremylt 37592fe105eSJeremy L Thompson // Copy data 376d1d35e2fSjeremylt switch (copy_mode) { 37721617c04Sjeremylt case CEED_COPY_VALUES: 378d1d35e2fSjeremylt ierr = CeedMalloc(num_elem*elem_size, &impl->offsets_allocated); 379e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 380d979a051Sjeremylt memcpy(impl->offsets_allocated, offsets, 381d1d35e2fSjeremylt num_elem * elem_size * sizeof(offsets[0])); 382d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 38321617c04Sjeremylt break; 38421617c04Sjeremylt case CEED_OWN_POINTER: 385d979a051Sjeremylt impl->offsets_allocated = (CeedInt *)offsets; 386d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 38721617c04Sjeremylt break; 38821617c04Sjeremylt case CEED_USE_POINTER: 389d979a051Sjeremylt impl->offsets = offsets; 39021617c04Sjeremylt } 39192fe105eSJeremy L Thompson } 392fe2413ffSjeremylt 393e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionSetData(r, impl); CeedChkBackend(ierr); 394d1d35e2fSjeremylt CeedInt layout[3] = {1, elem_size, elem_size*num_comp}; 395e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionSetELayout(r, layout); CeedChkBackend(ierr); 396fe2413ffSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 397e15f9bd0SJeremy L Thompson CeedElemRestrictionApply_Ref); CeedChkBackend(ierr); 398be9261b7Sjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 399be9261b7Sjeremylt CeedElemRestrictionApplyBlock_Ref); 400e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 401bd33150aSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", 402bd33150aSjeremylt CeedElemRestrictionGetOffsets_Ref); 403e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 404fe2413ffSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 405e15f9bd0SJeremy L Thompson CeedElemRestrictionDestroy_Ref); CeedChkBackend(ierr); 406d979a051Sjeremylt 407d1d35e2fSjeremylt // Set apply function based upon num_comp, blk_size, and comp_stride 408d979a051Sjeremylt CeedInt idx = -1; 409d1d35e2fSjeremylt if (blk_size < 10) 410d1d35e2fSjeremylt idx = 100*num_comp + 10*blk_size + (comp_stride == 1); 411d979a051Sjeremylt switch (idx) { 412d979a051Sjeremylt case 110: 413d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_110; 414d979a051Sjeremylt break; 415d979a051Sjeremylt case 111: 416d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_111; 417d979a051Sjeremylt break; 418d979a051Sjeremylt case 180: 419d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_180; 420d979a051Sjeremylt break; 421d979a051Sjeremylt case 181: 422d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_181; 423d979a051Sjeremylt break; 424d979a051Sjeremylt case 310: 425d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_310; 426d979a051Sjeremylt break; 427d979a051Sjeremylt case 311: 428d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_311; 429d979a051Sjeremylt break; 430d979a051Sjeremylt case 380: 431d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_380; 432d979a051Sjeremylt break; 433d979a051Sjeremylt case 381: 434d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_381; 435d979a051Sjeremylt break; 436bf4d1581Sjeremylt // LCOV_EXCL_START 437d979a051Sjeremylt case 510: 438d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_510; 439d979a051Sjeremylt break; 440bf4d1581Sjeremylt // LCOV_EXCL_STOP 441d979a051Sjeremylt case 511: 442d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_511; 443d979a051Sjeremylt break; 444bf4d1581Sjeremylt // LCOV_EXCL_START 445d979a051Sjeremylt case 580: 446d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_580; 447d979a051Sjeremylt break; 448bf4d1581Sjeremylt // LCOV_EXCL_STOP 449d979a051Sjeremylt case 581: 450d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_581; 451d979a051Sjeremylt break; 452d979a051Sjeremylt default: 453d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_Core; 454d979a051Sjeremylt break; 455d979a051Sjeremylt } 456d979a051Sjeremylt 457e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 45821617c04Sjeremylt } 459f10650afSjeremylt //------------------------------------------------------------------------------ 460