121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details. 421617c04Sjeremylt // 521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software 621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral 721617c04Sjeremylt // element discretizations for exascale applications. For more information and 821617c04Sjeremylt // source code availability see http://github.com/ceed. 921617c04Sjeremylt // 1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office 1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for 1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including 1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early 1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative. 1621617c04Sjeremylt 17ec3da8bcSJed Brown #include <ceed/ceed.h> 18ec3da8bcSJed Brown #include <ceed/backend.h> 193d576824SJeremy L Thompson #include <stdbool.h> 203d576824SJeremy L Thompson #include <string.h> 2121617c04Sjeremylt #include "ceed-ref.h" 2221617c04Sjeremylt 23f10650afSjeremylt //------------------------------------------------------------------------------ 24f10650afSjeremylt // Core ElemRestriction Apply Code 25f10650afSjeremylt //------------------------------------------------------------------------------ 26be9261b7Sjeremylt static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 27d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 28d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 299c36149bSjeremylt CeedVector v, CeedRequest *request) { 3021617c04Sjeremylt int ierr; 314ce2993fSjeremylt CeedElemRestriction_Ref *impl; 32e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 3321617c04Sjeremylt const CeedScalar *uu; 3421617c04Sjeremylt CeedScalar *vv; 35d1d35e2fSjeremylt CeedInt num_elem, elem_size, v_offset; 36d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 37d1d35e2fSjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 38d1d35e2fSjeremylt v_offset = start*blk_size*elem_size*num_comp; 3921617c04Sjeremylt 40b435c5a6Srezgarshakeri bool is_oriented; 41b435c5a6Srezgarshakeri ierr = CeedElemRestrictionIsOriented(r, &is_oriented); CeedChkBackend(ierr); 42e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr); 439c774eddSJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 449c774eddSJeremy L Thompson // Sum into for transpose mode, e-vec to l-vec 45e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr); 469c774eddSJeremy L Thompson } else { 479c774eddSJeremy L Thompson // Overwrite for notranspose mode, l-vec to e-vec 489c774eddSJeremy L Thompson ierr = CeedVectorGetArrayWrite(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr); 499c774eddSJeremy L Thompson } 507f90ec76Sjeremylt // Restriction from L-vector to E-vector 5121617c04Sjeremylt // Perform: v = r * u 52d1d35e2fSjeremylt if (t_mode == CEED_NOTRANSPOSE) { 53d979a051Sjeremylt // No offsets provided, Identity Restriction 54d979a051Sjeremylt if (!impl->offsets) { 55d1d35e2fSjeremylt bool has_backend_strides; 56d1d35e2fSjeremylt ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides); 57e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 58d1d35e2fSjeremylt if (has_backend_strides) { 59d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 607f90ec76Sjeremylt // This if branch is left separate to allow better inlining 61d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 62e1b98f6eSjeremylt CeedPragmaSIMD 63d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 641d79ecccSjeremylt CeedPragmaSIMD 65d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 667509a596Sjeremylt CeedPragmaSIMD 67d1d35e2fSjeremylt for (CeedInt j = 0; j < blk_size; j++) 68d1d35e2fSjeremylt vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset] 69d1d35e2fSjeremylt = uu[n + k*elem_size + 70d1d35e2fSjeremylt CeedIntMin(e+j, num_elem-1)*elem_size*num_comp]; 717f90ec76Sjeremylt } else { 727f90ec76Sjeremylt // User provided strides 737f90ec76Sjeremylt CeedInt strides[3]; 74e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 75d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 767f90ec76Sjeremylt CeedPragmaSIMD 77d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 787f90ec76Sjeremylt CeedPragmaSIMD 79d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 807f90ec76Sjeremylt CeedPragmaSIMD 81d1d35e2fSjeremylt for (CeedInt j = 0; j < blk_size; j++) 82d1d35e2fSjeremylt vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset] 837509a596Sjeremylt = uu[n*strides[0] + k*strides[1] + 84d1d35e2fSjeremylt CeedIntMin(e+j, num_elem-1)*strides[2]]; 857509a596Sjeremylt } 8621617c04Sjeremylt } else { 87d979a051Sjeremylt // Offsets provided, standard or blocked restriction 88d1d35e2fSjeremylt // vv has shape [elem_size, num_comp, num_elem], row-major 89d1d35e2fSjeremylt // uu has shape [nnodes, num_comp] 90d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 91e1b98f6eSjeremylt CeedPragmaSIMD 92d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 93e1b98f6eSjeremylt CeedPragmaSIMD 94d1d35e2fSjeremylt for (CeedInt i = 0; i < elem_size*blk_size; i++) 95d1d35e2fSjeremylt vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset] 96fc0567d9Srezgarshakeri = uu[impl->offsets[i+elem_size*e] + k*comp_stride] * 97000294e3Srezgarshakeri (is_oriented && impl->orient[i+elem_size*e] ? -1. : 1.); 98b435c5a6Srezgarshakeri } 9921617c04Sjeremylt } else { 1007f90ec76Sjeremylt // Restriction from E-vector to L-vector 1018d94b059Sjeremylt // Performing v += r^T * u 102d979a051Sjeremylt // No offsets provided, Identity Restriction 103d979a051Sjeremylt if (!impl->offsets) { 104d1d35e2fSjeremylt bool has_backend_strides; 105d1d35e2fSjeremylt ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides); 106e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 107d1d35e2fSjeremylt if (has_backend_strides) { 108d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 1097f90ec76Sjeremylt // This if brach is left separate to allow better inlining 110d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 111523b8ea0Sjeremylt CeedPragmaSIMD 112d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 1137509a596Sjeremylt CeedPragmaSIMD 114d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 1157f90ec76Sjeremylt CeedPragmaSIMD 116d1d35e2fSjeremylt for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++) 117d1d35e2fSjeremylt vv[n + k*elem_size + (e+j)*elem_size*num_comp] 118d1d35e2fSjeremylt += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]; 1197f90ec76Sjeremylt } else { 1207f90ec76Sjeremylt // User provided strides 1217f90ec76Sjeremylt CeedInt strides[3]; 122e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 123d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 1247f90ec76Sjeremylt CeedPragmaSIMD 125d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 1267f90ec76Sjeremylt CeedPragmaSIMD 127d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 1287f90ec76Sjeremylt CeedPragmaSIMD 129d1d35e2fSjeremylt for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++) 1307509a596Sjeremylt vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]] 131d1d35e2fSjeremylt += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]; 132523b8ea0Sjeremylt } 13321617c04Sjeremylt } else { 134d979a051Sjeremylt // Offsets provided, standard or blocked restriction 135d1d35e2fSjeremylt // uu has shape [elem_size, num_comp, num_elem] 136d1d35e2fSjeremylt // vv has shape [nnodes, num_comp] 137d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 138d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 139d1d35e2fSjeremylt for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size) 1408d94b059Sjeremylt // Iteration bound set to discard padding elements 141d1d35e2fSjeremylt for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++) 142d1d35e2fSjeremylt vv[impl->offsets[j+e*elem_size] + k*comp_stride] 143fc0567d9Srezgarshakeri += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset] * 144000294e3Srezgarshakeri (is_oriented && impl->orient[j+e*elem_size] ? -1. : 1.); 14521617c04Sjeremylt } 146b435c5a6Srezgarshakeri } 147e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr); 148e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArray(v, &vv); CeedChkBackend(ierr); 14921617c04Sjeremylt if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 15021617c04Sjeremylt *request = NULL; 151e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 15221617c04Sjeremylt } 15321617c04Sjeremylt 154f10650afSjeremylt //------------------------------------------------------------------------------ 155f10650afSjeremylt // ElemRestriction Apply - Common Sizes 156f10650afSjeremylt //------------------------------------------------------------------------------ 157d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, 158d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 159d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 160d979a051Sjeremylt CeedVector v, CeedRequest *request) { 161d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, 162d1d35e2fSjeremylt t_mode, u, v, request); 163d979a051Sjeremylt } 164d979a051Sjeremylt 165d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, 166d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 167d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 168d979a051Sjeremylt CeedVector v, CeedRequest *request) { 169d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, 1709c36149bSjeremylt u, v, request); 1714d2a38eeSjeremylt } 1724d2a38eeSjeremylt 173d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, 174d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 175d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 176d979a051Sjeremylt CeedVector v, CeedRequest *request) { 177d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, 178d1d35e2fSjeremylt t_mode, u, v, request); 1799c36149bSjeremylt } 1809c36149bSjeremylt 181d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, 182d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 183d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 184d979a051Sjeremylt CeedVector v, CeedRequest *request) { 185d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, 1869c36149bSjeremylt u, v, request); 1879c36149bSjeremylt } 1889c36149bSjeremylt 189d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, 190d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 191d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 192d979a051Sjeremylt CeedVector v, CeedRequest *request) { 193d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, 194d1d35e2fSjeremylt t_mode, u, v, request); 195d979a051Sjeremylt } 196d979a051Sjeremylt 197d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, 198d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 199d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 200d979a051Sjeremylt CeedVector v, CeedRequest *request) { 201d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, 202d979a051Sjeremylt u, v, request); 203d979a051Sjeremylt } 204d979a051Sjeremylt 205d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, 206d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 207d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 208d979a051Sjeremylt CeedVector v, CeedRequest *request) { 209d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, 210d1d35e2fSjeremylt t_mode, u, v, request); 211d979a051Sjeremylt } 212d979a051Sjeremylt 213d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, 214d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 215d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 216d979a051Sjeremylt CeedVector v, CeedRequest *request) { 217d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, 218d979a051Sjeremylt u, v, request); 219d979a051Sjeremylt } 220d979a051Sjeremylt 221bf4d1581Sjeremylt // LCOV_EXCL_START 222d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, 223d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 224d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 225d979a051Sjeremylt CeedVector v, CeedRequest *request) { 226d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, 227d1d35e2fSjeremylt t_mode, u, v, request); 228d979a051Sjeremylt } 229bf4d1581Sjeremylt // LCOV_EXCL_STOP 230d979a051Sjeremylt 231d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, 232d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 233d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 234d979a051Sjeremylt CeedVector v, CeedRequest *request) { 235d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, 236d979a051Sjeremylt u, v, request); 237d979a051Sjeremylt } 238d979a051Sjeremylt 239bf4d1581Sjeremylt // LCOV_EXCL_START 240d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, 241d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 242d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 243d979a051Sjeremylt CeedVector v, CeedRequest *request) { 244d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, 245d1d35e2fSjeremylt t_mode, u, v, request); 246d979a051Sjeremylt } 247bf4d1581Sjeremylt // LCOV_EXCL_STOP 248d979a051Sjeremylt 249d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, 250d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 251d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 252d979a051Sjeremylt CeedVector v, CeedRequest *request) { 253d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, 2549c36149bSjeremylt u, v, request); 2554d2a38eeSjeremylt } 2564d2a38eeSjeremylt 257f10650afSjeremylt //------------------------------------------------------------------------------ 258f10650afSjeremylt // ElemRestriction Apply 259f10650afSjeremylt //------------------------------------------------------------------------------ 260be9261b7Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 261d1d35e2fSjeremylt CeedTransposeMode t_mode, CeedVector u, 262be9261b7Sjeremylt CeedVector v, CeedRequest *request) { 263be9261b7Sjeremylt int ierr; 264d1d35e2fSjeremylt CeedInt num_blk, blk_size, num_comp, comp_stride; 265d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr); 266d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 267d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 268d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 2697509a596Sjeremylt CeedElemRestriction_Ref *impl; 270e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 2714d2a38eeSjeremylt 272d1d35e2fSjeremylt return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v, 273d979a051Sjeremylt request); 2749c36149bSjeremylt } 275be9261b7Sjeremylt 276f10650afSjeremylt //------------------------------------------------------------------------------ 277f10650afSjeremylt // ElemRestriction Apply Block 278f10650afSjeremylt //------------------------------------------------------------------------------ 279be9261b7Sjeremylt static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 280d1d35e2fSjeremylt CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 281074cb416Sjeremylt CeedRequest *request) { 2824d2a38eeSjeremylt int ierr; 283d1d35e2fSjeremylt CeedInt blk_size, num_comp, comp_stride; 284d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 285d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 286d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 2877509a596Sjeremylt CeedElemRestriction_Ref *impl; 288e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 2894d2a38eeSjeremylt 290d1d35e2fSjeremylt return impl->Apply(r, num_comp, blk_size, comp_stride, block, block+1, t_mode, 291d1d35e2fSjeremylt u, v, request); 2929c36149bSjeremylt } 293be9261b7Sjeremylt 294f10650afSjeremylt //------------------------------------------------------------------------------ 295bd33150aSjeremylt // ElemRestriction Get Offsets 296bd33150aSjeremylt //------------------------------------------------------------------------------ 297bd33150aSjeremylt static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, 298d1d35e2fSjeremylt CeedMemType mem_type, const CeedInt **offsets) { 299bd33150aSjeremylt int ierr; 300bd33150aSjeremylt CeedElemRestriction_Ref *impl; 301e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(rstr, &impl); CeedChkBackend(ierr); 302bd33150aSjeremylt Ceed ceed; 303e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChkBackend(ierr); 304bd33150aSjeremylt 305d1d35e2fSjeremylt if (mem_type != CEED_MEM_HOST) 306bd33150aSjeremylt // LCOV_EXCL_START 307e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 308bd33150aSjeremylt // LCOV_EXCL_STOP 309bd33150aSjeremylt 310bd33150aSjeremylt *offsets = impl->offsets; 311e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 312bd33150aSjeremylt } 313bd33150aSjeremylt 314bd33150aSjeremylt //------------------------------------------------------------------------------ 315f10650afSjeremylt // ElemRestriction Destroy 316f10650afSjeremylt //------------------------------------------------------------------------------ 31721617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 31821617c04Sjeremylt int ierr; 319fe2413ffSjeremylt CeedElemRestriction_Ref *impl; 320e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 32121617c04Sjeremylt 322e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl->offsets_allocated); CeedChkBackend(ierr); 323e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl); CeedChkBackend(ierr); 324e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 32521617c04Sjeremylt } 32621617c04Sjeremylt 327f10650afSjeremylt //------------------------------------------------------------------------------ 328f10650afSjeremylt // ElemRestriction Create 329f10650afSjeremylt //------------------------------------------------------------------------------ 330d1d35e2fSjeremylt int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, 331d979a051Sjeremylt const CeedInt *offsets, 332d979a051Sjeremylt CeedElemRestriction r) { 33321617c04Sjeremylt int ierr; 33421617c04Sjeremylt CeedElemRestriction_Ref *impl; 335d1d35e2fSjeremylt CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 336d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 337d1d35e2fSjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 338d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr); 339d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 340d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 341d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 3424ce2993fSjeremylt Ceed ceed; 343e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 34421617c04Sjeremylt 345d1d35e2fSjeremylt if (mem_type != CEED_MEM_HOST) 346c042f62fSJeremy L Thompson // LCOV_EXCL_START 347e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 348c042f62fSJeremy L Thompson // LCOV_EXCL_STOP 349e15f9bd0SJeremy L Thompson ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 3503661185eSjeremylt 35192fe105eSJeremy L Thompson // Offsets data 352d1d35e2fSjeremylt bool is_strided; 353d1d35e2fSjeremylt ierr = CeedElemRestrictionIsStrided(r, &is_strided); CeedChkBackend(ierr); 354d1d35e2fSjeremylt if (!is_strided) { 35592fe105eSJeremy L Thompson // Check indices for ref or memcheck backends 356d1d35e2fSjeremylt Ceed parent_ceed = ceed, curr_ceed = NULL; 357d1d35e2fSjeremylt while (parent_ceed != curr_ceed) { 358d1d35e2fSjeremylt curr_ceed = parent_ceed; 359d1d35e2fSjeremylt ierr = CeedGetParent(curr_ceed, &parent_ceed); CeedChkBackend(ierr); 3603661185eSjeremylt } 3613661185eSjeremylt const char *resource; 362d1d35e2fSjeremylt ierr = CeedGetResource(parent_ceed, &resource); CeedChkBackend(ierr); 363d1d35e2fSjeremylt if (!strcmp(resource, "/cpu/self/ref/serial") || 364d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/ref/blocked") || 365d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/serial") || 366d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/blocked")) { 367*e79b91d9SJeremy L Thompson CeedSize l_size; 368d1d35e2fSjeremylt ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 3693661185eSjeremylt 370d1d35e2fSjeremylt for (CeedInt i = 0; i < num_elem*elem_size; i++) 371d1d35e2fSjeremylt if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride) 3723661185eSjeremylt // LCOV_EXCL_START 373e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 374e15f9bd0SJeremy L Thompson "Restriction offset %d (%d) out of range " 375d1d35e2fSjeremylt "[0, %d]", i, offsets[i], l_size); 3763661185eSjeremylt // LCOV_EXCL_STOP 3773661185eSjeremylt } 3783661185eSjeremylt 37992fe105eSJeremy L Thompson // Copy data 380d1d35e2fSjeremylt switch (copy_mode) { 38121617c04Sjeremylt case CEED_COPY_VALUES: 382d1d35e2fSjeremylt ierr = CeedMalloc(num_elem*elem_size, &impl->offsets_allocated); 383e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 384d979a051Sjeremylt memcpy(impl->offsets_allocated, offsets, 385d1d35e2fSjeremylt num_elem * elem_size * sizeof(offsets[0])); 386d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 38721617c04Sjeremylt break; 38821617c04Sjeremylt case CEED_OWN_POINTER: 389d979a051Sjeremylt impl->offsets_allocated = (CeedInt *)offsets; 390d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 39121617c04Sjeremylt break; 39221617c04Sjeremylt case CEED_USE_POINTER: 393d979a051Sjeremylt impl->offsets = offsets; 39421617c04Sjeremylt } 39592fe105eSJeremy L Thompson } 396fe2413ffSjeremylt 397e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionSetData(r, impl); CeedChkBackend(ierr); 398d1d35e2fSjeremylt CeedInt layout[3] = {1, elem_size, elem_size*num_comp}; 399e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionSetELayout(r, layout); CeedChkBackend(ierr); 400fe2413ffSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 401e15f9bd0SJeremy L Thompson CeedElemRestrictionApply_Ref); CeedChkBackend(ierr); 402be9261b7Sjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 403be9261b7Sjeremylt CeedElemRestrictionApplyBlock_Ref); 404e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 405bd33150aSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", 406bd33150aSjeremylt CeedElemRestrictionGetOffsets_Ref); 407e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 408fe2413ffSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 409e15f9bd0SJeremy L Thompson CeedElemRestrictionDestroy_Ref); CeedChkBackend(ierr); 410d979a051Sjeremylt 411d1d35e2fSjeremylt // Set apply function based upon num_comp, blk_size, and comp_stride 412d979a051Sjeremylt CeedInt idx = -1; 413d1d35e2fSjeremylt if (blk_size < 10) 414d1d35e2fSjeremylt idx = 100*num_comp + 10*blk_size + (comp_stride == 1); 415d979a051Sjeremylt switch (idx) { 416d979a051Sjeremylt case 110: 417d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_110; 418d979a051Sjeremylt break; 419d979a051Sjeremylt case 111: 420d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_111; 421d979a051Sjeremylt break; 422d979a051Sjeremylt case 180: 423d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_180; 424d979a051Sjeremylt break; 425d979a051Sjeremylt case 181: 426d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_181; 427d979a051Sjeremylt break; 428d979a051Sjeremylt case 310: 429d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_310; 430d979a051Sjeremylt break; 431d979a051Sjeremylt case 311: 432d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_311; 433d979a051Sjeremylt break; 434d979a051Sjeremylt case 380: 435d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_380; 436d979a051Sjeremylt break; 437d979a051Sjeremylt case 381: 438d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_381; 439d979a051Sjeremylt break; 440bf4d1581Sjeremylt // LCOV_EXCL_START 441d979a051Sjeremylt case 510: 442d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_510; 443d979a051Sjeremylt break; 444bf4d1581Sjeremylt // LCOV_EXCL_STOP 445d979a051Sjeremylt case 511: 446d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_511; 447d979a051Sjeremylt break; 448bf4d1581Sjeremylt // LCOV_EXCL_START 449d979a051Sjeremylt case 580: 450d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_580; 451d979a051Sjeremylt break; 452bf4d1581Sjeremylt // LCOV_EXCL_STOP 453d979a051Sjeremylt case 581: 454d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_581; 455d979a051Sjeremylt break; 456d979a051Sjeremylt default: 457d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_Core; 458d979a051Sjeremylt break; 459d979a051Sjeremylt } 460d979a051Sjeremylt 461e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 46221617c04Sjeremylt } 463fc0567d9Srezgarshakeri 464fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 465fc0567d9Srezgarshakeri // ElemRestriction Create Oriented 466fc0567d9Srezgarshakeri //------------------------------------------------------------------------------ 467fc0567d9Srezgarshakeri int CeedElemRestrictionCreateOriented_Ref(CeedMemType mem_type, 468fc0567d9Srezgarshakeri CeedCopyMode copy_mode, 469fc0567d9Srezgarshakeri const CeedInt *offsets, const bool *orient, 470fc0567d9Srezgarshakeri CeedElemRestriction r) { 471fc0567d9Srezgarshakeri int ierr; 472fc0567d9Srezgarshakeri CeedElemRestriction_Ref *impl; 473fc0567d9Srezgarshakeri CeedInt num_elem, elem_size; 474fc0567d9Srezgarshakeri // Set up for normal restriction with explicit offsets. This sets up dispatch to 475fc0567d9Srezgarshakeri // CeedElemRestrictionApply_Ref_* and manages the impl->offsets array copy/allocation. 476fc0567d9Srezgarshakeri ierr = CeedElemRestrictionCreate_Ref(mem_type, copy_mode, offsets, r); 477fc0567d9Srezgarshakeri CeedChkBackend(ierr); 478fc0567d9Srezgarshakeri 479fc0567d9Srezgarshakeri ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 480fc0567d9Srezgarshakeri ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 481fc0567d9Srezgarshakeri ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 482fc0567d9Srezgarshakeri switch (copy_mode) { 483fc0567d9Srezgarshakeri case CEED_COPY_VALUES: 484fc0567d9Srezgarshakeri ierr = CeedMalloc(num_elem * elem_size, &impl->orient_allocated); 485fc0567d9Srezgarshakeri CeedChkBackend(ierr); 486fc0567d9Srezgarshakeri memcpy(impl->orient_allocated, orient, 487fc0567d9Srezgarshakeri num_elem * elem_size * sizeof(orient[0])); 488fc0567d9Srezgarshakeri impl->orient = impl->orient_allocated; 489fc0567d9Srezgarshakeri break; 490fc0567d9Srezgarshakeri case CEED_OWN_POINTER: 491fc0567d9Srezgarshakeri impl->orient_allocated = (bool *)orient; 492fc0567d9Srezgarshakeri impl->orient = impl->orient_allocated; 493fc0567d9Srezgarshakeri break; 494fc0567d9Srezgarshakeri case CEED_USE_POINTER: 495fc0567d9Srezgarshakeri impl->orient = orient; 496fc0567d9Srezgarshakeri } 497fc0567d9Srezgarshakeri return CEED_ERROR_SUCCESS; 498fc0567d9Srezgarshakeri } 499f10650afSjeremylt //------------------------------------------------------------------------------ 500