121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details. 421617c04Sjeremylt // 521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software 621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral 721617c04Sjeremylt // element discretizations for exascale applications. For more information and 821617c04Sjeremylt // source code availability see http://github.com/ceed. 921617c04Sjeremylt // 1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office 1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for 1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including 1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early 1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative. 1621617c04Sjeremylt 17*3d576824SJeremy L Thompson #include <ceed.h> 18*3d576824SJeremy L Thompson #include <ceed-backend.h> 19*3d576824SJeremy L Thompson #include <stdbool.h> 20*3d576824SJeremy L Thompson #include <string.h> 2121617c04Sjeremylt #include "ceed-ref.h" 2221617c04Sjeremylt 23f10650afSjeremylt //------------------------------------------------------------------------------ 24f10650afSjeremylt // Core ElemRestriction Apply Code 25f10650afSjeremylt //------------------------------------------------------------------------------ 26be9261b7Sjeremylt static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r, 27d979a051Sjeremylt const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 28d979a051Sjeremylt CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 299c36149bSjeremylt CeedVector v, CeedRequest *request) { 3021617c04Sjeremylt int ierr; 314ce2993fSjeremylt CeedElemRestriction_Ref *impl; 32777ff853SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChk(ierr); 3321617c04Sjeremylt const CeedScalar *uu; 3421617c04Sjeremylt CeedScalar *vv; 35d979a051Sjeremylt CeedInt nelem, elemsize, voffset; 364ce2993fSjeremylt ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 374ce2993fSjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 38be9261b7Sjeremylt voffset = start*blksize*elemsize*ncomp; 3921617c04Sjeremylt 4021617c04Sjeremylt ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 4121617c04Sjeremylt ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 427f90ec76Sjeremylt // Restriction from L-vector to E-vector 4321617c04Sjeremylt // Perform: v = r * u 448d94b059Sjeremylt if (tmode == CEED_NOTRANSPOSE) { 45d979a051Sjeremylt // No offsets provided, Identity Restriction 46d979a051Sjeremylt if (!impl->offsets) { 47523b8ea0Sjeremylt bool backendstrides; 48fd364f38SJeremy L Thompson ierr = CeedElemRestrictionHasBackendStrides(r, &backendstrides); 49523b8ea0Sjeremylt CeedChk(ierr); 50523b8ea0Sjeremylt if (backendstrides) { 517f90ec76Sjeremylt // CPU backend strides are {1, elemsize, elemsize*ncomp} 527f90ec76Sjeremylt // This if branch is left separate to allow better inlining 53be9261b7Sjeremylt for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 54e1b98f6eSjeremylt CeedPragmaSIMD 557509a596Sjeremylt for (CeedInt k = 0; k < ncomp; k++) 561d79ecccSjeremylt CeedPragmaSIMD 577509a596Sjeremylt for (CeedInt n = 0; n < elemsize; n++) 587509a596Sjeremylt CeedPragmaSIMD 597f90ec76Sjeremylt for (CeedInt j = 0; j < blksize; j++) 607f90ec76Sjeremylt vv[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset] 617f90ec76Sjeremylt = uu[n + k*elemsize + 627f90ec76Sjeremylt CeedIntMin(e+j, nelem-1)*elemsize*ncomp]; 637f90ec76Sjeremylt } else { 647f90ec76Sjeremylt // User provided strides 657f90ec76Sjeremylt CeedInt strides[3]; 667f90ec76Sjeremylt ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 677f90ec76Sjeremylt for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 687f90ec76Sjeremylt CeedPragmaSIMD 697f90ec76Sjeremylt for (CeedInt k = 0; k < ncomp; k++) 707f90ec76Sjeremylt CeedPragmaSIMD 717f90ec76Sjeremylt for (CeedInt n = 0; n < elemsize; n++) 727f90ec76Sjeremylt CeedPragmaSIMD 737f90ec76Sjeremylt for (CeedInt j = 0; j < blksize; j++) 747509a596Sjeremylt vv[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset] 757509a596Sjeremylt = uu[n*strides[0] + k*strides[1] + 767509a596Sjeremylt CeedIntMin(e+j, nelem-1)*strides[2]]; 777509a596Sjeremylt } 7821617c04Sjeremylt } else { 79d979a051Sjeremylt // Offsets provided, standard or blocked restriction 80ecf6354eSJed Brown // vv has shape [elemsize, ncomp, nelem], row-major 818795c945Sjeremylt // uu has shape [nnodes, ncomp] 82be9261b7Sjeremylt for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 83e1b98f6eSjeremylt CeedPragmaSIMD 84d979a051Sjeremylt for (CeedInt k = 0; k < ncomp; k++) 85e1b98f6eSjeremylt CeedPragmaSIMD 8606cdd269SJed Brown for (CeedInt i = 0; i < elemsize*blksize; i++) 87d979a051Sjeremylt vv[elemsize*(k*blksize+ncomp*e) + i - voffset] 88d979a051Sjeremylt = uu[impl->offsets[i+elemsize*e] + k*compstride]; 8921617c04Sjeremylt } 9021617c04Sjeremylt } else { 917f90ec76Sjeremylt // Restriction from E-vector to L-vector 928d94b059Sjeremylt // Performing v += r^T * u 93d979a051Sjeremylt // No offsets provided, Identity Restriction 94d979a051Sjeremylt if (!impl->offsets) { 95523b8ea0Sjeremylt bool backendstrides; 96fd364f38SJeremy L Thompson ierr = CeedElemRestrictionHasBackendStrides(r, &backendstrides); 97523b8ea0Sjeremylt CeedChk(ierr); 98523b8ea0Sjeremylt if (backendstrides) { 997f90ec76Sjeremylt // CPU backend strides are {1, elemsize, elemsize*ncomp} 1007f90ec76Sjeremylt // This if brach is left separate to allow better inlining 101523b8ea0Sjeremylt for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 102523b8ea0Sjeremylt CeedPragmaSIMD 1037509a596Sjeremylt for (CeedInt k = 0; k < ncomp; k++) 1047509a596Sjeremylt CeedPragmaSIMD 1057509a596Sjeremylt for (CeedInt n = 0; n < elemsize; n++) 1067f90ec76Sjeremylt CeedPragmaSIMD 1077f90ec76Sjeremylt for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++) 1087f90ec76Sjeremylt vv[n + k*elemsize + (e+j)*elemsize*ncomp] 1097f90ec76Sjeremylt += uu[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset]; 1107f90ec76Sjeremylt } else { 1117f90ec76Sjeremylt // User provided strides 1127f90ec76Sjeremylt CeedInt strides[3]; 1137f90ec76Sjeremylt ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr); 1147f90ec76Sjeremylt for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 1157f90ec76Sjeremylt CeedPragmaSIMD 1167f90ec76Sjeremylt for (CeedInt k = 0; k < ncomp; k++) 1177f90ec76Sjeremylt CeedPragmaSIMD 1187f90ec76Sjeremylt for (CeedInt n = 0; n < elemsize; n++) 1197f90ec76Sjeremylt CeedPragmaSIMD 1207f90ec76Sjeremylt for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++) 1217509a596Sjeremylt vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]] 1227509a596Sjeremylt += uu[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset]; 123523b8ea0Sjeremylt } 12421617c04Sjeremylt } else { 125d979a051Sjeremylt // Offsets provided, standard or blocked restriction 126ecf6354eSJed Brown // uu has shape [elemsize, ncomp, nelem] 1278795c945Sjeremylt // vv has shape [nnodes, ncomp] 128e1b98f6eSjeremylt for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize) 129d979a051Sjeremylt for (CeedInt k = 0; k < ncomp; k++) 13006cdd269SJed Brown for (CeedInt i = 0; i < elemsize*blksize; i+=blksize) 1318d94b059Sjeremylt // Iteration bound set to discard padding elements 1324ce2993fSjeremylt for (CeedInt j = i; j < i+CeedIntMin(blksize, nelem-e); j++) 133d979a051Sjeremylt vv[impl->offsets[j+e*elemsize] + k*compstride] 134d979a051Sjeremylt += uu[elemsize*(k*blksize+ncomp*e) + j - voffset]; 13521617c04Sjeremylt } 13621617c04Sjeremylt } 13721617c04Sjeremylt ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 13821617c04Sjeremylt ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 13921617c04Sjeremylt if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 14021617c04Sjeremylt *request = NULL; 14121617c04Sjeremylt return 0; 14221617c04Sjeremylt } 14321617c04Sjeremylt 144f10650afSjeremylt //------------------------------------------------------------------------------ 145f10650afSjeremylt // ElemRestriction Apply - Common Sizes 146f10650afSjeremylt //------------------------------------------------------------------------------ 147d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_110(CeedElemRestriction r, 148d979a051Sjeremylt const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 149d979a051Sjeremylt CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 150d979a051Sjeremylt CeedVector v, CeedRequest *request) { 151d979a051Sjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 1, compstride, start, stop, 152d979a051Sjeremylt tmode, u, v, request); 153d979a051Sjeremylt } 154d979a051Sjeremylt 155d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_111(CeedElemRestriction r, 156d979a051Sjeremylt const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 157d979a051Sjeremylt CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 158d979a051Sjeremylt CeedVector v, CeedRequest *request) { 159d979a051Sjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 1, 1, start, stop, tmode, 1609c36149bSjeremylt u, v, request); 1614d2a38eeSjeremylt } 1624d2a38eeSjeremylt 163d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_180(CeedElemRestriction r, 164d979a051Sjeremylt const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 165d979a051Sjeremylt CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 166d979a051Sjeremylt CeedVector v, CeedRequest *request) { 167d979a051Sjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 8, compstride, start, stop, 168d979a051Sjeremylt tmode, u, v, request); 1699c36149bSjeremylt } 1709c36149bSjeremylt 171d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_181(CeedElemRestriction r, 172d979a051Sjeremylt const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 173d979a051Sjeremylt CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 174d979a051Sjeremylt CeedVector v, CeedRequest *request) { 175d979a051Sjeremylt return CeedElemRestrictionApply_Ref_Core(r, 1, 8, 1, start, stop, tmode, 1769c36149bSjeremylt u, v, request); 1779c36149bSjeremylt } 1789c36149bSjeremylt 179d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_310(CeedElemRestriction r, 180d979a051Sjeremylt const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 181d979a051Sjeremylt CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 182d979a051Sjeremylt CeedVector v, CeedRequest *request) { 183d979a051Sjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 1, compstride, start, stop, 184d979a051Sjeremylt tmode, u, v, request); 185d979a051Sjeremylt } 186d979a051Sjeremylt 187d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_311(CeedElemRestriction r, 188d979a051Sjeremylt const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 189d979a051Sjeremylt CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 190d979a051Sjeremylt CeedVector v, CeedRequest *request) { 191d979a051Sjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 1, 1, start, stop, tmode, 192d979a051Sjeremylt u, v, request); 193d979a051Sjeremylt } 194d979a051Sjeremylt 195d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_380(CeedElemRestriction r, 196d979a051Sjeremylt const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 197d979a051Sjeremylt CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 198d979a051Sjeremylt CeedVector v, CeedRequest *request) { 199d979a051Sjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 8, compstride, start, stop, 200d979a051Sjeremylt tmode, u, v, request); 201d979a051Sjeremylt } 202d979a051Sjeremylt 203d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_381(CeedElemRestriction r, 204d979a051Sjeremylt const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 205d979a051Sjeremylt CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 206d979a051Sjeremylt CeedVector v, CeedRequest *request) { 207d979a051Sjeremylt return CeedElemRestrictionApply_Ref_Core(r, 3, 8, 1, start, stop, tmode, 208d979a051Sjeremylt u, v, request); 209d979a051Sjeremylt } 210d979a051Sjeremylt 211bf4d1581Sjeremylt // LCOV_EXCL_START 212d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_510(CeedElemRestriction r, 213d979a051Sjeremylt const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 214d979a051Sjeremylt CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 215d979a051Sjeremylt CeedVector v, CeedRequest *request) { 216d979a051Sjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 1, compstride, start, stop, 217d979a051Sjeremylt tmode, u, v, request); 218d979a051Sjeremylt } 219bf4d1581Sjeremylt // LCOV_EXCL_STOP 220d979a051Sjeremylt 221d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_511(CeedElemRestriction r, 222d979a051Sjeremylt const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 223d979a051Sjeremylt CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 224d979a051Sjeremylt CeedVector v, CeedRequest *request) { 225d979a051Sjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 1, 1, start, stop, tmode, 226d979a051Sjeremylt u, v, request); 227d979a051Sjeremylt } 228d979a051Sjeremylt 229bf4d1581Sjeremylt // LCOV_EXCL_START 230d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_580(CeedElemRestriction r, 231d979a051Sjeremylt const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 232d979a051Sjeremylt CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 233d979a051Sjeremylt CeedVector v, CeedRequest *request) { 234d979a051Sjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 8, compstride, start, stop, 235d979a051Sjeremylt tmode, u, v, request); 236d979a051Sjeremylt } 237bf4d1581Sjeremylt // LCOV_EXCL_STOP 238d979a051Sjeremylt 239d979a051Sjeremylt static int CeedElemRestrictionApply_Ref_581(CeedElemRestriction r, 240d979a051Sjeremylt const CeedInt ncomp, const CeedInt blksize, const CeedInt compstride, 241d979a051Sjeremylt CeedInt start, CeedInt stop, CeedTransposeMode tmode, CeedVector u, 242d979a051Sjeremylt CeedVector v, CeedRequest *request) { 243d979a051Sjeremylt return CeedElemRestrictionApply_Ref_Core(r, 5, 8, 1, start, stop, tmode, 2449c36149bSjeremylt u, v, request); 2454d2a38eeSjeremylt } 2464d2a38eeSjeremylt 247f10650afSjeremylt //------------------------------------------------------------------------------ 248f10650afSjeremylt // ElemRestriction Apply 249f10650afSjeremylt //------------------------------------------------------------------------------ 250be9261b7Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 251074cb416Sjeremylt CeedTransposeMode tmode, CeedVector u, 252be9261b7Sjeremylt CeedVector v, CeedRequest *request) { 253be9261b7Sjeremylt int ierr; 254d979a051Sjeremylt CeedInt numblk, blksize, ncomp, compstride; 2554d2a38eeSjeremylt ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr); 2564d2a38eeSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 257d979a051Sjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 258d979a051Sjeremylt ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr); 2597509a596Sjeremylt CeedElemRestriction_Ref *impl; 260777ff853SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChk(ierr); 2614d2a38eeSjeremylt 262d979a051Sjeremylt return impl->Apply(r, ncomp, blksize, compstride, 0, numblk, tmode, u, v, 263d979a051Sjeremylt request); 2649c36149bSjeremylt } 265be9261b7Sjeremylt 266f10650afSjeremylt //------------------------------------------------------------------------------ 267f10650afSjeremylt // ElemRestriction Apply Block 268f10650afSjeremylt //------------------------------------------------------------------------------ 269be9261b7Sjeremylt static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r, 270074cb416Sjeremylt CeedInt block, CeedTransposeMode tmode, CeedVector u, CeedVector v, 271074cb416Sjeremylt CeedRequest *request) { 2724d2a38eeSjeremylt int ierr; 273d979a051Sjeremylt CeedInt blksize, ncomp, compstride; 2744d2a38eeSjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 275d979a051Sjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 276d979a051Sjeremylt ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr); 2777509a596Sjeremylt CeedElemRestriction_Ref *impl; 278777ff853SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChk(ierr); 2794d2a38eeSjeremylt 280d979a051Sjeremylt return impl->Apply(r, ncomp, blksize, compstride, block, block+1, tmode, u, v, 281d979a051Sjeremylt request); 2829c36149bSjeremylt } 283be9261b7Sjeremylt 284f10650afSjeremylt //------------------------------------------------------------------------------ 285bd33150aSjeremylt // ElemRestriction Get Offsets 286bd33150aSjeremylt //------------------------------------------------------------------------------ 287bd33150aSjeremylt static int CeedElemRestrictionGetOffsets_Ref(CeedElemRestriction rstr, 288bd33150aSjeremylt CeedMemType mtype, const CeedInt **offsets) { 289bd33150aSjeremylt int ierr; 290bd33150aSjeremylt CeedElemRestriction_Ref *impl; 291777ff853SJeremy L Thompson ierr = CeedElemRestrictionGetData(rstr, &impl); CeedChk(ierr); 292bd33150aSjeremylt Ceed ceed; 293bd33150aSjeremylt ierr = CeedElemRestrictionGetCeed(rstr, &ceed); CeedChk(ierr); 294bd33150aSjeremylt 295bd33150aSjeremylt if (mtype != CEED_MEM_HOST) 296bd33150aSjeremylt // LCOV_EXCL_START 297bd33150aSjeremylt return CeedError(ceed, 1, "Can only provide to HOST memory"); 298bd33150aSjeremylt // LCOV_EXCL_STOP 299bd33150aSjeremylt 300bd33150aSjeremylt *offsets = impl->offsets; 301bd33150aSjeremylt return 0; 302bd33150aSjeremylt } 303bd33150aSjeremylt 304bd33150aSjeremylt //------------------------------------------------------------------------------ 305f10650afSjeremylt // ElemRestriction Destroy 306f10650afSjeremylt //------------------------------------------------------------------------------ 30721617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 30821617c04Sjeremylt int ierr; 309fe2413ffSjeremylt CeedElemRestriction_Ref *impl; 310777ff853SJeremy L Thompson ierr = CeedElemRestrictionGetData(r, &impl); CeedChk(ierr); 31121617c04Sjeremylt 312d979a051Sjeremylt ierr = CeedFree(&impl->offsets_allocated); CeedChk(ierr); 313fe2413ffSjeremylt ierr = CeedFree(&impl); CeedChk(ierr); 31421617c04Sjeremylt return 0; 31521617c04Sjeremylt } 31621617c04Sjeremylt 317f10650afSjeremylt //------------------------------------------------------------------------------ 318f10650afSjeremylt // ElemRestriction Create 319f10650afSjeremylt //------------------------------------------------------------------------------ 320667bc5fcSjeremylt int CeedElemRestrictionCreate_Ref(CeedMemType mtype, CeedCopyMode cmode, 321d979a051Sjeremylt const CeedInt *offsets, 322d979a051Sjeremylt CeedElemRestriction r) { 32321617c04Sjeremylt int ierr; 32421617c04Sjeremylt CeedElemRestriction_Ref *impl; 325d979a051Sjeremylt CeedInt nelem, elemsize, numblk, blksize, ncomp, compstride; 3264ce2993fSjeremylt ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr); 3274ce2993fSjeremylt ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr); 328d979a051Sjeremylt ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr); 329d979a051Sjeremylt ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr); 330d979a051Sjeremylt ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr); 331d979a051Sjeremylt ierr = CeedElemRestrictionGetCompStride(r, &compstride); CeedChk(ierr); 3324ce2993fSjeremylt Ceed ceed; 3334ce2993fSjeremylt ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr); 33421617c04Sjeremylt 33521617c04Sjeremylt if (mtype != CEED_MEM_HOST) 336c042f62fSJeremy L Thompson // LCOV_EXCL_START 3374ce2993fSjeremylt return CeedError(ceed, 1, "Only MemType = HOST supported"); 338c042f62fSJeremy L Thompson // LCOV_EXCL_STOP 33921617c04Sjeremylt ierr = CeedCalloc(1, &impl); CeedChk(ierr); 3403661185eSjeremylt 34192fe105eSJeremy L Thompson // Offsets data 3427a528612SJeremy L Thompson bool isStrided; 343fd364f38SJeremy L Thompson ierr = CeedElemRestrictionIsStrided(r, &isStrided); CeedChk(ierr); 3447a528612SJeremy L Thompson if (!isStrided) { 34592fe105eSJeremy L Thompson // Check indices for ref or memcheck backends 3463661185eSjeremylt Ceed parentCeed = ceed, currCeed = NULL; 3473661185eSjeremylt while (parentCeed != currCeed) { 3483661185eSjeremylt currCeed = parentCeed; 3493661185eSjeremylt ierr = CeedGetParent(currCeed, &parentCeed); CeedChk(ierr); 3503661185eSjeremylt } 3513661185eSjeremylt const char *resource; 3523661185eSjeremylt ierr = CeedGetResource(parentCeed, &resource); CeedChk(ierr); 3533661185eSjeremylt if (!strcmp(resource, "/cpu/self/ref/serial") 3543661185eSjeremylt || !strcmp(resource, "/cpu/self/ref/blocked") 3553661185eSjeremylt || !strcmp(resource, "/cpu/self/memcheck/serial") 3563661185eSjeremylt || !strcmp(resource, "/cpu/self/memcheck/blocked")) { 3573661185eSjeremylt CeedInt lsize; 3583661185eSjeremylt ierr = CeedElemRestrictionGetLVectorSize(r, &lsize); CeedChk(ierr); 3593661185eSjeremylt 3603661185eSjeremylt for (CeedInt i = 0; i < nelem*elemsize; i++) 361d3ea8f26SJeremy L Thompson if (offsets[i] < 0 || lsize <= offsets[i] + (ncomp - 1) * compstride) 3623661185eSjeremylt // LCOV_EXCL_START 3633661185eSjeremylt return CeedError(ceed, 1, "Restriction offset %d (%d) out of range " 3643661185eSjeremylt "[0, %d]", i, offsets[i], lsize); 3653661185eSjeremylt // LCOV_EXCL_STOP 3663661185eSjeremylt } 3673661185eSjeremylt 36892fe105eSJeremy L Thompson // Copy data 36921617c04Sjeremylt switch (cmode) { 37021617c04Sjeremylt case CEED_COPY_VALUES: 371d979a051Sjeremylt ierr = CeedMalloc(nelem*elemsize, &impl->offsets_allocated); 37221617c04Sjeremylt CeedChk(ierr); 373d979a051Sjeremylt memcpy(impl->offsets_allocated, offsets, 374d979a051Sjeremylt nelem * elemsize * sizeof(offsets[0])); 375d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 37621617c04Sjeremylt break; 37721617c04Sjeremylt case CEED_OWN_POINTER: 378d979a051Sjeremylt impl->offsets_allocated = (CeedInt *)offsets; 379d979a051Sjeremylt impl->offsets = impl->offsets_allocated; 38021617c04Sjeremylt break; 38121617c04Sjeremylt case CEED_USE_POINTER: 382d979a051Sjeremylt impl->offsets = offsets; 38321617c04Sjeremylt } 38492fe105eSJeremy L Thompson } 385fe2413ffSjeremylt 386777ff853SJeremy L Thompson ierr = CeedElemRestrictionSetData(r, impl); CeedChk(ierr); 38749fd234cSJeremy L Thompson CeedInt layout[3] = {1, elemsize, elemsize*ncomp}; 38849fd234cSJeremy L Thompson ierr = CeedElemRestrictionSetELayout(r, layout); CeedChk(ierr); 389fe2413ffSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply", 390fe2413ffSjeremylt CeedElemRestrictionApply_Ref); CeedChk(ierr); 391be9261b7Sjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock", 392be9261b7Sjeremylt CeedElemRestrictionApplyBlock_Ref); 393be9261b7Sjeremylt CeedChk(ierr); 394bd33150aSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "GetOffsets", 395bd33150aSjeremylt CeedElemRestrictionGetOffsets_Ref); 396bd33150aSjeremylt CeedChk(ierr); 397fe2413ffSjeremylt ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy", 398fe2413ffSjeremylt CeedElemRestrictionDestroy_Ref); CeedChk(ierr); 399d979a051Sjeremylt 400d979a051Sjeremylt // Set apply function based upon ncomp, blksize, and compstride 401d979a051Sjeremylt CeedInt idx = -1; 402d979a051Sjeremylt if (blksize < 10) 403d979a051Sjeremylt idx = 100*ncomp + 10*blksize + (compstride == 1); 404d979a051Sjeremylt switch (idx) { 405d979a051Sjeremylt case 110: 406d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_110; 407d979a051Sjeremylt break; 408d979a051Sjeremylt case 111: 409d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_111; 410d979a051Sjeremylt break; 411d979a051Sjeremylt case 180: 412d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_180; 413d979a051Sjeremylt break; 414d979a051Sjeremylt case 181: 415d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_181; 416d979a051Sjeremylt break; 417d979a051Sjeremylt case 310: 418d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_310; 419d979a051Sjeremylt break; 420d979a051Sjeremylt case 311: 421d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_311; 422d979a051Sjeremylt break; 423d979a051Sjeremylt case 380: 424d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_380; 425d979a051Sjeremylt break; 426d979a051Sjeremylt case 381: 427d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_381; 428d979a051Sjeremylt break; 429bf4d1581Sjeremylt // LCOV_EXCL_START 430d979a051Sjeremylt case 510: 431d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_510; 432d979a051Sjeremylt break; 433bf4d1581Sjeremylt // LCOV_EXCL_STOP 434d979a051Sjeremylt case 511: 435d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_511; 436d979a051Sjeremylt break; 437bf4d1581Sjeremylt // LCOV_EXCL_START 438d979a051Sjeremylt case 580: 439d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_580; 440d979a051Sjeremylt break; 441bf4d1581Sjeremylt // LCOV_EXCL_STOP 442d979a051Sjeremylt case 581: 443d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_581; 444d979a051Sjeremylt break; 445d979a051Sjeremylt default: 446d979a051Sjeremylt impl->Apply = CeedElemRestrictionApply_Ref_Core; 447d979a051Sjeremylt break; 448d979a051Sjeremylt } 449d979a051Sjeremylt 45021617c04Sjeremylt return 0; 45121617c04Sjeremylt } 452f10650afSjeremylt //------------------------------------------------------------------------------ 453