121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details. 421617c04Sjeremylt // 521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software 621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral 721617c04Sjeremylt // element discretizations for exascale applications. For more information and 821617c04Sjeremylt // source code availability see http://github.com/ceed. 921617c04Sjeremylt // 1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office 1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for 1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including 1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early 1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative. 1621617c04Sjeremylt 1721617c04Sjeremylt #include <ceed-impl.h> 1821617c04Sjeremylt #include <string.h> 1921617c04Sjeremylt #include "ceed-ref.h" 2021617c04Sjeremylt 2121617c04Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r, 226ddacda3Sjeremylt CeedTransposeMode tmode, 2321617c04Sjeremylt CeedTransposeMode lmode, CeedVector u, 2421617c04Sjeremylt CeedVector v, CeedRequest *request) { 2521617c04Sjeremylt CeedElemRestriction_Ref *impl = r->data; 2621617c04Sjeremylt int ierr; 2721617c04Sjeremylt const CeedScalar *uu; 2821617c04Sjeremylt CeedScalar *vv; 29*4e35ef05Sjeremylt CeedInt nblk = r->nblk, blksize = r->blksize, elemsize = r->elemsize, 30*4e35ef05Sjeremylt esize = nblk*blksize*elemsize, ncomp=r->ncomp; 3121617c04Sjeremylt 3221617c04Sjeremylt ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr); 3321617c04Sjeremylt ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr); 3421617c04Sjeremylt if (tmode == CEED_NOTRANSPOSE) { 3521617c04Sjeremylt // Perform: v = r * u 36e17b31afSThilina Rathnayake if (!impl->indices) { 37*4e35ef05Sjeremylt for (CeedInt i = 0; i<nblk - 1; i++) { 38*4e35ef05Sjeremylt CeedInt shift = i*blksize*ncomp*elemsize; 39*4e35ef05Sjeremylt for (CeedInt j = 0; j<blksize; j++) { 40*4e35ef05Sjeremylt for (CeedInt k = 0; k<ncomp*elemsize; k++) { 41*4e35ef05Sjeremylt vv[shift + k*blksize + j] = uu[shift + j*ncomp*elemsize + k]; 42*4e35ef05Sjeremylt } 43*4e35ef05Sjeremylt } 44*4e35ef05Sjeremylt } 45*4e35ef05Sjeremylt CeedInt shift = (nblk - 1)*blksize*ncomp*elemsize; 46*4e35ef05Sjeremylt CeedInt nlastelems = r->nelem % nblk; 47*4e35ef05Sjeremylt if (nlastelems == 0) nlastelems = blksize; 48*4e35ef05Sjeremylt for (CeedInt j = 0; j<blksize; j++) { 49*4e35ef05Sjeremylt for (CeedInt k = 0; k<ncomp*elemsize; k++) { 50*4e35ef05Sjeremylt if (j < nlastelems) { 51*4e35ef05Sjeremylt vv[shift + k*blksize + j] = uu[shift + j*ncomp*elemsize + k]; 52*4e35ef05Sjeremylt } else { 53*4e35ef05Sjeremylt vv[shift + k*blksize + j] = uu[shift + (nlastelems - 1)*ncomp*elemsize + k]; 54*4e35ef05Sjeremylt } 55*4e35ef05Sjeremylt } 56*4e35ef05Sjeremylt } 57e17b31afSThilina Rathnayake } else if (ncomp == 1) { 5821617c04Sjeremylt for (CeedInt i = 0; i<esize; i++) vv[i] = uu[impl->indices[i]]; 5921617c04Sjeremylt } else { 6021617c04Sjeremylt // vv is (elemsize x ncomp x nelem), column-major 6121617c04Sjeremylt if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major 62*4e35ef05Sjeremylt for (CeedInt e = 0; e < nblk*blksize; e++) 6321617c04Sjeremylt for (CeedInt d = 0; d < ncomp; d++) 6421617c04Sjeremylt for (CeedInt i = 0; i<r->elemsize; i++) { 6521617c04Sjeremylt vv[i+r->elemsize*(d+ncomp*e)] = 6621617c04Sjeremylt uu[impl->indices[i+r->elemsize*e]+r->ndof*d]; 6721617c04Sjeremylt } 6821617c04Sjeremylt } else { // u is (ncomp x ndof), column-major 69*4e35ef05Sjeremylt for (CeedInt e = 0; e < r->nblk*blksize; e++) { 70*4e35ef05Sjeremylt for (CeedInt d = 0; d < ncomp; d++) { 7121617c04Sjeremylt for (CeedInt i = 0; i<r->elemsize; i++) { 7221617c04Sjeremylt vv[i+r->elemsize*(d+ncomp*e)] = 7321617c04Sjeremylt uu[d+ncomp*impl->indices[i+r->elemsize*e]]; 7421617c04Sjeremylt } 7521617c04Sjeremylt } 7621617c04Sjeremylt } 77*4e35ef05Sjeremylt } 78*4e35ef05Sjeremylt } 7921617c04Sjeremylt } else { 8021617c04Sjeremylt // Note: in transpose mode, we perform: v += r^t * u 81*4e35ef05Sjeremylt esize = (nblk - 1)*blksize*elemsize; 82e17b31afSThilina Rathnayake if (!impl->indices) { 83*4e35ef05Sjeremylt for (CeedInt i=0; i<nblk - 1; i++) { 84*4e35ef05Sjeremylt CeedInt shift = i*blksize*ncomp*elemsize; 85*4e35ef05Sjeremylt for (CeedInt j = 0; j<blksize; j++) { 86*4e35ef05Sjeremylt for (CeedInt k = 0; k<ncomp*elemsize; k++) { 87*4e35ef05Sjeremylt vv[shift + j*ncomp*elemsize + k] = uu[shift + k*blksize + j]; 88*4e35ef05Sjeremylt } 89*4e35ef05Sjeremylt } 90*4e35ef05Sjeremylt } 91*4e35ef05Sjeremylt CeedInt shift = (nblk - 1)*blksize*ncomp*elemsize; 92*4e35ef05Sjeremylt CeedInt nlastelems = r->nelem % nblk; 93*4e35ef05Sjeremylt if (nlastelems == 0) nlastelems = blksize; 94*4e35ef05Sjeremylt for (CeedInt j = 0; j<blksize; j++) { 95*4e35ef05Sjeremylt for (CeedInt k = 0; k<ncomp*elemsize; k++) { 96*4e35ef05Sjeremylt if (j < nlastelems) { 97*4e35ef05Sjeremylt vv[shift + j*ncomp*elemsize + k] = uu[shift + k*blksize + j]; 98*4e35ef05Sjeremylt } 99*4e35ef05Sjeremylt } 100*4e35ef05Sjeremylt } 101e17b31afSThilina Rathnayake } else if (ncomp == 1) { 10221617c04Sjeremylt for (CeedInt i = 0; i<esize; i++) vv[impl->indices[i]] += uu[i]; 103*4e35ef05Sjeremylt CeedInt nlastelems = r->nelem % blksize; 104*4e35ef05Sjeremylt CeedInt shift = (nblk - 1)*blksize*elemsize; 105*4e35ef05Sjeremylt if (nlastelems == 0) nlastelems = blksize; 106*4e35ef05Sjeremylt for (CeedInt i = 0; i<blksize*elemsize; i++) { 107*4e35ef05Sjeremylt if ((i % blksize) < nlastelems) { 108*4e35ef05Sjeremylt vv[impl->indices[shift + i]] += uu[shift + i]; 109*4e35ef05Sjeremylt } 110*4e35ef05Sjeremylt } 11121617c04Sjeremylt } else { 11221617c04Sjeremylt // u is (elemsize x ncomp x nelem) 11321617c04Sjeremylt if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major 114*4e35ef05Sjeremylt for (CeedInt e = 0; e < blksize * (nblk - 1); e++) { 115*4e35ef05Sjeremylt for (CeedInt d = 0; d < ncomp; d++) { 116*4e35ef05Sjeremylt for (CeedInt i = 0; i<elemsize; i++) { 117*4e35ef05Sjeremylt vv[impl->indices[i+elemsize*e]+r->ndof*d] += 118*4e35ef05Sjeremylt uu[i+elemsize*(d+e*ncomp)]; 119*4e35ef05Sjeremylt } 120*4e35ef05Sjeremylt } 121*4e35ef05Sjeremylt } 122*4e35ef05Sjeremylt CeedInt shift = (nblk - 1)*blksize*elemsize; 123*4e35ef05Sjeremylt CeedInt nlastelems = r->nelem % blksize; 124*4e35ef05Sjeremylt if (nlastelems == 0) nlastelems = blksize; 125*4e35ef05Sjeremylt for (CeedInt e = 0; e < nlastelems; e++) { 126*4e35ef05Sjeremylt for (CeedInt d = 0; d < ncomp; d++) { 127*4e35ef05Sjeremylt for (CeedInt i = 0; i<elemsize; i++) { 128*4e35ef05Sjeremylt vv[impl->indices[i+elemsize*(e+shift)]+r->ndof*d] += 129*4e35ef05Sjeremylt uu[i+elemsize*(d+(e+shift)*ncomp)]; 130*4e35ef05Sjeremylt } 131*4e35ef05Sjeremylt } 13221617c04Sjeremylt } 13321617c04Sjeremylt } else { // vv is (ncomp x ndof), column-major 134*4e35ef05Sjeremylt for (CeedInt e = 0; e < blksize * (nblk - 1); e++) { 135*4e35ef05Sjeremylt for (CeedInt d = 0; d < ncomp; d++) { 136*4e35ef05Sjeremylt for (CeedInt i = 0; i<elemsize; i++) { 137*4e35ef05Sjeremylt vv[d+ncomp*impl->indices[i+elemsize*e]] += 13821617c04Sjeremylt uu[i+r->elemsize*(d+e*ncomp)]; 13921617c04Sjeremylt } 14021617c04Sjeremylt } 14121617c04Sjeremylt } 142*4e35ef05Sjeremylt CeedInt shift = (nblk - 1)*blksize*elemsize; 143*4e35ef05Sjeremylt CeedInt nlastelems = r->nelem % blksize; 144*4e35ef05Sjeremylt for (CeedInt e = 0; e < nlastelems; e++) { 145*4e35ef05Sjeremylt for (CeedInt d = 0; d < ncomp; d++) { 146*4e35ef05Sjeremylt for (CeedInt i = 0; i<elemsize; i++) { 147*4e35ef05Sjeremylt vv[d+ncomp*impl->indices[i+elemsize*(e+shift)]] += 148*4e35ef05Sjeremylt uu[i+r->elemsize*(d+(e+shift)*ncomp)]; 149*4e35ef05Sjeremylt } 150*4e35ef05Sjeremylt } 151*4e35ef05Sjeremylt } 152*4e35ef05Sjeremylt } 153*4e35ef05Sjeremylt } 15421617c04Sjeremylt } 15521617c04Sjeremylt ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr); 15621617c04Sjeremylt ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr); 15721617c04Sjeremylt if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED) 15821617c04Sjeremylt *request = NULL; 15921617c04Sjeremylt return 0; 16021617c04Sjeremylt } 16121617c04Sjeremylt 16221617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) { 16321617c04Sjeremylt CeedElemRestriction_Ref *impl = r->data; 16421617c04Sjeremylt int ierr; 16521617c04Sjeremylt 16621617c04Sjeremylt ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr); 16721617c04Sjeremylt ierr = CeedFree(&r->data); CeedChk(ierr); 16821617c04Sjeremylt return 0; 16921617c04Sjeremylt } 17021617c04Sjeremylt 17121617c04Sjeremylt int CeedElemRestrictionCreate_Ref(CeedElemRestriction r, 17221617c04Sjeremylt CeedMemType mtype, 17321617c04Sjeremylt CeedCopyMode cmode, const CeedInt *indices) { 17421617c04Sjeremylt int ierr; 17521617c04Sjeremylt CeedElemRestriction_Ref *impl; 17621617c04Sjeremylt 17721617c04Sjeremylt if (mtype != CEED_MEM_HOST) 17821617c04Sjeremylt return CeedError(r->ceed, 1, "Only MemType = HOST supported"); 17921617c04Sjeremylt ierr = CeedCalloc(1,&impl); CeedChk(ierr); 18021617c04Sjeremylt switch (cmode) { 18121617c04Sjeremylt case CEED_COPY_VALUES: 18221617c04Sjeremylt ierr = CeedMalloc(r->nelem*r->elemsize, &impl->indices_allocated); 18321617c04Sjeremylt CeedChk(ierr); 18421617c04Sjeremylt memcpy(impl->indices_allocated, indices, 18521617c04Sjeremylt r->nelem * r->elemsize * sizeof(indices[0])); 18621617c04Sjeremylt impl->indices = impl->indices_allocated; 18721617c04Sjeremylt break; 18821617c04Sjeremylt case CEED_OWN_POINTER: 18921617c04Sjeremylt impl->indices_allocated = (CeedInt *)indices; 19021617c04Sjeremylt impl->indices = impl->indices_allocated; 19121617c04Sjeremylt break; 19221617c04Sjeremylt case CEED_USE_POINTER: 19321617c04Sjeremylt impl->indices = indices; 19421617c04Sjeremylt } 19521617c04Sjeremylt r->data = impl; 19621617c04Sjeremylt r->Apply = CeedElemRestrictionApply_Ref; 19721617c04Sjeremylt r->Destroy = CeedElemRestrictionDestroy_Ref; 19821617c04Sjeremylt return 0; 19921617c04Sjeremylt } 200