121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details. 421617c04Sjeremylt // 521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software 621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral 721617c04Sjeremylt // element discretizations for exascale applications. For more information and 821617c04Sjeremylt // source code availability see http://github.com/ceed. 921617c04Sjeremylt // 1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office 1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for 1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including 1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early 1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative. 1621617c04Sjeremylt 17ec3da8bcSJed Brown #include <ceed/ceed.h> 18ec3da8bcSJed Brown #include <ceed/backend.h> 193d576824SJeremy L Thompson #include <stdbool.h> 203d576824SJeremy L Thompson #include <string.h> 2121617c04Sjeremylt #include "ceed-ref.h" 2221617c04Sjeremylt 23f10650afSjeremylt //------------------------------------------------------------------------------ 24f10650afSjeremylt // Core ElemRestriction Apply Code 25f10650afSjeremylt //------------------------------------------------------------------------------ 26be9261b7Sjeremylt static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 27*d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 28*d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 299c36149bSjeremylt CeedVector v, CeedRequest *request) { 3021617c04Sjeremylt int ierr; 314ce2993fSjeremylt CeedElemRestriction_Ref *impl; 32e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 3321617c04Sjeremylt const CeedScalar *uu; 3421617c04Sjeremylt CeedScalar *vv; 35*d1d35e2fSjeremylt CeedInt num_elem, elem_size, v_offset; 36*d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 37*d1d35e2fSjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 38*d1d35e2fSjeremylt v_offset = start*blk_size*elem_size*num_comp; 3921617c04Sjeremylt 40e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChkBackend(ierr); 41e15f9bd0SJeremy L Thompson ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChkBackend(ierr); 427f90ec76Sjeremylt // Restriction from L-vector to E-vector 4321617c04Sjeremylt // Perform: v = r * u 44*d1d35e2fSjeremylt if (t_mode == CEED_NOTRANSPOSE) { 45d979a051Sjeremylt // No offsets provided, Identity Restriction 46d979a051Sjeremylt if (!impl->offsets) { 47*d1d35e2fSjeremylt bool has_backend_strides; 48*d1d35e2fSjeremylt ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides); 49e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 50*d1d35e2fSjeremylt if (has_backend_strides) { 51*d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 527f90ec76Sjeremylt // This if branch is left separate to allow better inlining 53*d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 54e1b98f6eSjeremylt CeedPragmaSIMD 55*d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 561d79ecccSjeremylt CeedPragmaSIMD 57*d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 587509a596Sjeremylt CeedPragmaSIMD 59*d1d35e2fSjeremylt for (CeedInt j = 0; j < blk_size; j++) 60*d1d35e2fSjeremylt vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset] 61*d1d35e2fSjeremylt = uu[n + k*elem_size + 62*d1d35e2fSjeremylt CeedIntMin(e+j, num_elem-1)*elem_size*num_comp]; 637f90ec76Sjeremylt } else { 647f90ec76Sjeremylt // User provided strides 657f90ec76Sjeremylt CeedInt strides[3]; 66e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 67*d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 687f90ec76Sjeremylt CeedPragmaSIMD 69*d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 707f90ec76Sjeremylt CeedPragmaSIMD 71*d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 727f90ec76Sjeremylt CeedPragmaSIMD 73*d1d35e2fSjeremylt for (CeedInt j = 0; j < blk_size; j++) 74*d1d35e2fSjeremylt vv[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset] 757509a596Sjeremylt = uu[n*strides[0] + k*strides[1] + 76*d1d35e2fSjeremylt CeedIntMin(e+j, num_elem-1)*strides[2]]; 777509a596Sjeremylt } 7821617c04Sjeremylt } else { 79d979a051Sjeremylt // Offsets provided, standard or blocked restriction 80*d1d35e2fSjeremylt // vv has shape [elem_size, num_comp, num_elem], row-major 81*d1d35e2fSjeremylt // uu has shape [nnodes, num_comp] 82*d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 83e1b98f6eSjeremylt CeedPragmaSIMD 84*d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 85e1b98f6eSjeremylt CeedPragmaSIMD 86*d1d35e2fSjeremylt for (CeedInt i = 0; i < elem_size*blk_size; i++) 87*d1d35e2fSjeremylt vv[elem_size*(k*blk_size+num_comp*e) + i - v_offset] 88*d1d35e2fSjeremylt = uu[impl->offsets[i+elem_size*e] + k*comp_stride]; 8921617c04Sjeremylt } 9021617c04Sjeremylt } else { 917f90ec76Sjeremylt // Restriction from E-vector to L-vector 928d94b059Sjeremylt // Performing v += r^T * u 93d979a051Sjeremylt // No offsets provided, Identity Restriction 94d979a051Sjeremylt if (!impl->offsets) { 95*d1d35e2fSjeremylt bool has_backend_strides; 96*d1d35e2fSjeremylt ierr = CeedElemRestrictionHasBackendStrides(r, &has_backend_strides); 97e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 98*d1d35e2fSjeremylt if (has_backend_strides) { 99*d1d35e2fSjeremylt // CPU backend strides are {1, elem_size, elem_size*num_comp} 1007f90ec76Sjeremylt // This if brach is left separate to allow better inlining 101*d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 102523b8ea0Sjeremylt CeedPragmaSIMD 103*d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 1047509a596Sjeremylt CeedPragmaSIMD 105*d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 1067f90ec76Sjeremylt CeedPragmaSIMD 107*d1d35e2fSjeremylt for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++) 108*d1d35e2fSjeremylt vv[n + k*elem_size + (e+j)*elem_size*num_comp] 109*d1d35e2fSjeremylt += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]; 1107f90ec76Sjeremylt } else { 1117f90ec76Sjeremylt // User provided strides 1127f90ec76Sjeremylt CeedInt strides[3]; 113e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChkBackend(ierr); 114*d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 1157f90ec76Sjeremylt CeedPragmaSIMD 116*d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 1177f90ec76Sjeremylt CeedPragmaSIMD 118*d1d35e2fSjeremylt for (CeedInt n = 0; n < elem_size; n++) 1197f90ec76Sjeremylt CeedPragmaSIMD 120*d1d35e2fSjeremylt for (CeedInt j = 0; j < CeedIntMin(blk_size, num_elem-e); j++) 1217509a596Sjeremylt vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]] 122*d1d35e2fSjeremylt += uu[e*elem_size*num_comp + (k*elem_size+n)*blk_size + j - v_offset]; 123523b8ea0Sjeremylt } 12421617c04Sjeremylt } else { 125d979a051Sjeremylt // Offsets provided, standard or blocked restriction 126*d1d35e2fSjeremylt // uu has shape [elem_size, num_comp, num_elem] 127*d1d35e2fSjeremylt // vv has shape [nnodes, num_comp] 128*d1d35e2fSjeremylt for (CeedInt e = start*blk_size; e < stop*blk_size; e+=blk_size) 129*d1d35e2fSjeremylt for (CeedInt k = 0; k < num_comp; k++) 130*d1d35e2fSjeremylt for (CeedInt i = 0; i < elem_size*blk_size; i+=blk_size) 1318d94b059Sjeremylt // Iteration bound set to discard padding elements 132*d1d35e2fSjeremylt for (CeedInt j = i; j < i+CeedIntMin(blk_size, num_elem-e); j++) 133*d1d35e2fSjeremylt vv[impl->offsets[j+e*elem_size] + k*comp_stride] 134*d1d35e2fSjeremylt += uu[elem_size*(k*blk_size+num_comp*e) + j - v_offset]; 13521617c04Sjeremylt } 13621617c04Sjeremylt } 137e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChkBackend(ierr); 138e15f9bd0SJeremy L Thompson ierr = CeedVectorRestoreArray(v, &vv); CeedChkBackend(ierr); 13921617c04Sjeremylt if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 14021617c04Sjeremylt *request = NULL; 141e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 14221617c04Sjeremylt } 14321617c04Sjeremylt 144f10650afSjeremylt //------------------------------------------------------------------------------ 145f10650afSjeremylt // ElemRestriction Apply - Common Sizes 146f10650afSjeremylt //------------------------------------------------------------------------------ 147d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, 148*d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 149*d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 150d979a051Sjeremylt CeedVector v, CeedRequest *request) { 151*d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 1, comp_stride, start, stop, 152*d1d35e2fSjeremylt t_mode, u, v, request); 153d979a051Sjeremylt } 154d979a051Sjeremylt 155d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, 156*d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 157*d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 158d979a051Sjeremylt CeedVector v, CeedRequest *request) { 159*d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, t_mode, 1609c36149bSjeremylt u, v, request); 1614d2a38eeSjeremylt } 1624d2a38eeSjeremylt 163d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, 164*d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 165*d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 166d979a051Sjeremylt CeedVector v, CeedRequest *request) { 167*d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 8, comp_stride, start, stop, 168*d1d35e2fSjeremylt t_mode, u, v, request); 1699c36149bSjeremylt } 1709c36149bSjeremylt 171d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, 172*d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 173*d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 174d979a051Sjeremylt CeedVector v, CeedRequest *request) { 175*d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, t_mode, 1769c36149bSjeremylt u, v, request); 1779c36149bSjeremylt } 1789c36149bSjeremylt 179d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, 180*d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 181*d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 182d979a051Sjeremylt CeedVector v, CeedRequest *request) { 183*d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 1, comp_stride, start, stop, 184*d1d35e2fSjeremylt t_mode, u, v, request); 185d979a051Sjeremylt } 186d979a051Sjeremylt 187d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, 188*d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 189*d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 190d979a051Sjeremylt CeedVector v, CeedRequest *request) { 191*d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, t_mode, 192d979a051Sjeremylt u, v, request); 193d979a051Sjeremylt } 194d979a051Sjeremylt 195d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, 196*d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 197*d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 198d979a051Sjeremylt CeedVector v, CeedRequest *request) { 199*d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 8, comp_stride, start, stop, 200*d1d35e2fSjeremylt t_mode, u, v, request); 201d979a051Sjeremylt } 202d979a051Sjeremylt 203d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, 204*d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 205*d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 206d979a051Sjeremylt CeedVector v, CeedRequest *request) { 207*d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, t_mode, 208d979a051Sjeremylt u, v, request); 209d979a051Sjeremylt } 210d979a051Sjeremylt 211bf4d1581Sjeremylt // LCOV_EXCL_START 212d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, 213*d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 214*d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 215d979a051Sjeremylt CeedVector v, CeedRequest *request) { 216*d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 1, comp_stride, start, stop, 217*d1d35e2fSjeremylt t_mode, u, v, request); 218d979a051Sjeremylt } 219bf4d1581Sjeremylt // LCOV_EXCL_STOP 220d979a051Sjeremylt 221d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, 222*d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 223*d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 224d979a051Sjeremylt CeedVector v, CeedRequest *request) { 225*d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, t_mode, 226d979a051Sjeremylt u, v, request); 227d979a051Sjeremylt } 228d979a051Sjeremylt 229bf4d1581Sjeremylt // LCOV_EXCL_START 230d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, 231*d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 232*d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 233d979a051Sjeremylt CeedVector v, CeedRequest *request) { 234*d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 8, comp_stride, start, stop, 235*d1d35e2fSjeremylt t_mode, u, v, request); 236d979a051Sjeremylt } 237bf4d1581Sjeremylt // LCOV_EXCL_STOP 238d979a051Sjeremylt 239d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, 240*d1d35e2fSjeremylt const CeedInt num_comp, const CeedInt blk_size, const CeedInt comp_stride, 241*d1d35e2fSjeremylt CeedInt start, CeedInt stop, CeedTransposeMode t_mode, CeedVector u, 242d979a051Sjeremylt CeedVector v, CeedRequest *request) { 243*d1d35e2fSjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, t_mode, 2449c36149bSjeremylt u, v, request); 2454d2a38eeSjeremylt } 2464d2a38eeSjeremylt 247f10650afSjeremylt //------------------------------------------------------------------------------ 248f10650afSjeremylt // ElemRestriction Apply 249f10650afSjeremylt //------------------------------------------------------------------------------ 250be9261b7Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 251*d1d35e2fSjeremylt CeedTransposeMode t_mode, CeedVector u, 252be9261b7Sjeremylt CeedVector v, CeedRequest *request) { 253be9261b7Sjeremylt int ierr; 254*d1d35e2fSjeremylt CeedInt num_blk, blk_size, num_comp, comp_stride; 255*d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr); 256*d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 257*d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 258*d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 2597509a596Sjeremylt CeedElemRestriction_Ref *impl; 260e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 2614d2a38eeSjeremylt 262*d1d35e2fSjeremylt return impl->Apply(r, num_comp, blk_size, comp_stride, 0, num_blk, t_mode, u, v, 263d979a051Sjeremylt request); 2649c36149bSjeremylt } 265be9261b7Sjeremylt 266f10650afSjeremylt //------------------------------------------------------------------------------ 267f10650afSjeremylt // ElemRestriction Apply Block 268f10650afSjeremylt //------------------------------------------------------------------------------ 269be9261b7Sjeremylt static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 270*d1d35e2fSjeremylt CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector v, 271074cb416Sjeremylt CeedRequest *request) { 2724d2a38eeSjeremylt int ierr; 273*d1d35e2fSjeremylt CeedInt blk_size, num_comp, comp_stride; 274*d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 275*d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 276*d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 2777509a596Sjeremylt CeedElemRestriction_Ref *impl; 278e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 2794d2a38eeSjeremylt 280*d1d35e2fSjeremylt return impl->Apply(r, num_comp, blk_size, comp_stride, block, block+1, t_mode, 281*d1d35e2fSjeremylt u, v, request); 2829c36149bSjeremylt } 283be9261b7Sjeremylt 284f10650afSjeremylt //------------------------------------------------------------------------------ 285bd33150aSjeremylt // ElemRestriction Get Offsets 286bd33150aSjeremylt //------------------------------------------------------------------------------ 287bd33150aSjeremylt static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, 288*d1d35e2fSjeremylt CeedMemType mem_type, const CeedInt **offsets) { 289bd33150aSjeremylt int ierr; 290bd33150aSjeremylt CeedElemRestriction_Ref *impl; 291e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(rstr, &impl); CeedChkBackend(ierr); 292bd33150aSjeremylt Ceed ceed; 293e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChkBackend(ierr); 294bd33150aSjeremylt 295*d1d35e2fSjeremylt if (mem_type != CEED_MEM_HOST) 296bd33150aSjeremylt // LCOV_EXCL_START 297e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Can only provide to HOST memory"); 298bd33150aSjeremylt // LCOV_EXCL_STOP 299bd33150aSjeremylt 300bd33150aSjeremylt *offsets = impl->offsets; 301e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 302bd33150aSjeremylt } 303bd33150aSjeremylt 304bd33150aSjeremylt //------------------------------------------------------------------------------ 305f10650afSjeremylt // ElemRestriction Destroy 306f10650afSjeremylt //------------------------------------------------------------------------------ 30721617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 30821617c04Sjeremylt int ierr; 309fe2413ffSjeremylt CeedElemRestriction_Ref *impl; 310e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChkBackend(ierr); 31121617c04Sjeremylt 312e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl->offsets_allocated); CeedChkBackend(ierr); 313e15f9bd0SJeremy L Thompson ierr = CeedFree(&impl); CeedChkBackend(ierr); 314e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 31521617c04Sjeremylt } 31621617c04Sjeremylt 317f10650afSjeremylt //------------------------------------------------------------------------------ 318f10650afSjeremylt // ElemRestriction Create 319f10650afSjeremylt //------------------------------------------------------------------------------ 320*d1d35e2fSjeremylt int CeedElemRestrictionCreate_Ref(CeedMemType mem_type, CeedCopyMode copy_mode, 321d979a051Sjeremylt const CeedInt *offsets, 322d979a051Sjeremylt CeedElemRestriction r) { 32321617c04Sjeremylt int ierr; 32421617c04Sjeremylt CeedElemRestriction_Ref *impl; 325*d1d35e2fSjeremylt CeedInt num_elem, elem_size, num_blk, blk_size, num_comp, comp_stride; 326*d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumElements(r, &num_elem); CeedChkBackend(ierr); 327*d1d35e2fSjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elem_size); CeedChkBackend(ierr); 328*d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumBlocks(r, &num_blk); CeedChkBackend(ierr); 329*d1d35e2fSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blk_size); CeedChkBackend(ierr); 330*d1d35e2fSjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &num_comp); CeedChkBackend(ierr); 331*d1d35e2fSjeremylt ierr = CeedElemRestrictionGetCompStride(r, &comp_stride); CeedChkBackend(ierr); 3324ce2993fSjeremylt Ceed ceed; 333e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChkBackend(ierr); 33421617c04Sjeremylt 335*d1d35e2fSjeremylt if (mem_type != CEED_MEM_HOST) 336c042f62fSJeremy L Thompson // LCOV_EXCL_START 337e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Only MemType = HOST supported"); 338c042f62fSJeremy L Thompson // LCOV_EXCL_STOP 339e15f9bd0SJeremy L Thompson ierr = CeedCalloc(1, &impl); CeedChkBackend(ierr); 3403661185eSjeremylt 34192fe105eSJeremy L Thompson // Offsets data 342*d1d35e2fSjeremylt bool is_strided; 343*d1d35e2fSjeremylt ierr = CeedElemRestrictionIsStrided(r, &is_strided); CeedChkBackend(ierr); 344*d1d35e2fSjeremylt if (!is_strided) { 34592fe105eSJeremy L Thompson // Check indices for ref or memcheck backends 346*d1d35e2fSjeremylt Ceed parent_ceed = ceed, curr_ceed = NULL; 347*d1d35e2fSjeremylt while (parent_ceed != curr_ceed) { 348*d1d35e2fSjeremylt curr_ceed = parent_ceed; 349*d1d35e2fSjeremylt ierr = CeedGetParent(curr_ceed, &parent_ceed); CeedChkBackend(ierr); 3503661185eSjeremylt } 3513661185eSjeremylt const char *resource; 352*d1d35e2fSjeremylt ierr = CeedGetResource(parent_ceed, &resource); CeedChkBackend(ierr); 353*d1d35e2fSjeremylt if (!strcmp(resource, "/cpu/self/ref/serial") || 354*d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/ref/blocked") || 355*d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/serial") || 356*d1d35e2fSjeremylt !strcmp(resource, "/cpu/self/memcheck/blocked")) { 357*d1d35e2fSjeremylt CeedInt l_size; 358*d1d35e2fSjeremylt ierr = CeedElemRestrictionGetLVectorSize(r, &l_size); CeedChkBackend(ierr); 3593661185eSjeremylt 360*d1d35e2fSjeremylt for (CeedInt i = 0; i < num_elem*elem_size; i++) 361*d1d35e2fSjeremylt if (offsets[i] < 0 || l_size <= offsets[i] + (num_comp - 1) * comp_stride) 3623661185eSjeremylt // LCOV_EXCL_START 363e15f9bd0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 364e15f9bd0SJeremy L Thompson "Restriction offset %d (%d) out of range " 365*d1d35e2fSjeremylt "[0, %d]", i, offsets[i], l_size); 3663661185eSjeremylt // LCOV_EXCL_STOP 3673661185eSjeremylt } 3683661185eSjeremylt 36992fe105eSJeremy L Thompson // Copy data 370*d1d35e2fSjeremylt switch (copy_mode) { 37121617c04Sjeremylt case CEED_COPY_VALUES: 372*d1d35e2fSjeremylt ierr = CeedMalloc(num_elem*elem_size, &impl->offsets_allocated); 373e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 374d979a051Sjeremylt memcpy(impl->offsets_allocated, offsets, 375*d1d35e2fSjeremylt num_elem * elem_size * sizeof(offsets[0])); 376d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 37721617c04Sjeremylt break; 37821617c04Sjeremylt case CEED_OWN_POINTER: 379d979a051Sjeremylt impl->offsets_allocated = (CeedInt *)offsets; 380d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 38121617c04Sjeremylt break; 38221617c04Sjeremylt case CEED_USE_POINTER: 383d979a051Sjeremylt impl->offsets = offsets; 38421617c04Sjeremylt } 38592fe105eSJeremy L Thompson } 386fe2413ffSjeremylt 387e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionSetData(r, impl); CeedChkBackend(ierr); 388*d1d35e2fSjeremylt CeedInt layout[3] = {1, elem_size, elem_size*num_comp}; 389e15f9bd0SJeremy L Thompson ierr = CeedElemRestrictionSetELayout(r, layout); CeedChkBackend(ierr); 390fe2413ffSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 391e15f9bd0SJeremy L Thompson CeedElemRestrictionApply_Ref); CeedChkBackend(ierr); 392be9261b7Sjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 393be9261b7Sjeremylt CeedElemRestrictionApplyBlock_Ref); 394e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 395bd33150aSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", 396bd33150aSjeremylt CeedElemRestrictionGetOffsets_Ref); 397e15f9bd0SJeremy L Thompson CeedChkBackend(ierr); 398fe2413ffSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 399e15f9bd0SJeremy L Thompson CeedElemRestrictionDestroy_Ref); CeedChkBackend(ierr); 400d979a051Sjeremylt 401*d1d35e2fSjeremylt // Set apply function based upon num_comp, blk_size, and comp_stride 402d979a051Sjeremylt CeedInt idx = -1; 403*d1d35e2fSjeremylt if (blk_size < 10) 404*d1d35e2fSjeremylt idx = 100*num_comp + 10*blk_size + (comp_stride == 1); 405d979a051Sjeremylt switch (idx) { 406d979a051Sjeremylt case 110: 407d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_110; 408d979a051Sjeremylt break; 409d979a051Sjeremylt case 111: 410d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_111; 411d979a051Sjeremylt break; 412d979a051Sjeremylt case 180: 413d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_180; 414d979a051Sjeremylt break; 415d979a051Sjeremylt case 181: 416d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_181; 417d979a051Sjeremylt break; 418d979a051Sjeremylt case 310: 419d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_310; 420d979a051Sjeremylt break; 421d979a051Sjeremylt case 311: 422d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_311; 423d979a051Sjeremylt break; 424d979a051Sjeremylt case 380: 425d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_380; 426d979a051Sjeremylt break; 427d979a051Sjeremylt case 381: 428d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_381; 429d979a051Sjeremylt break; 430bf4d1581Sjeremylt // LCOV_EXCL_START 431d979a051Sjeremylt case 510: 432d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_510; 433d979a051Sjeremylt break; 434bf4d1581Sjeremylt // LCOV_EXCL_STOP 435d979a051Sjeremylt case 511: 436d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_511; 437d979a051Sjeremylt break; 438bf4d1581Sjeremylt // LCOV_EXCL_START 439d979a051Sjeremylt case 580: 440d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_580; 441d979a051Sjeremylt break; 442bf4d1581Sjeremylt // LCOV_EXCL_STOP 443d979a051Sjeremylt case 581: 444d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_581; 445d979a051Sjeremylt break; 446d979a051Sjeremylt default: 447d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_Core; 448d979a051Sjeremylt break; 449d979a051Sjeremylt } 450d979a051Sjeremylt 451e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS; 45221617c04Sjeremylt } 453f10650afSjeremylt //------------------------------------------------------------------------------ 454