xref: /libCEED/backends/ref/ceed-ref-restriction.c (revision a92b91d64f3394bca5626f06bd5cf25837fe6001)
121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
421617c04Sjeremylt //
521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
721617c04Sjeremylt // element discretizations for exascale applications. For more information and
821617c04Sjeremylt // source code availability see http://github.com/ceed.
921617c04Sjeremylt //
1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early
1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
1621617c04Sjeremylt 
1721617c04Sjeremylt #include <ceed-impl.h>
1821617c04Sjeremylt #include <string.h>
1921617c04Sjeremylt #include "ceed-ref.h"
2021617c04Sjeremylt 
2121617c04Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
226ddacda3Sjeremylt                                         CeedTransposeMode tmode,
2321617c04Sjeremylt                                         CeedTransposeMode lmode, CeedVector u,
2421617c04Sjeremylt                                         CeedVector v, CeedRequest *request) {
2521617c04Sjeremylt   CeedElemRestriction_Ref *impl = r->data;
2621617c04Sjeremylt   int ierr;
2721617c04Sjeremylt   const CeedScalar *uu;
2821617c04Sjeremylt   CeedScalar *vv;
294e35ef05Sjeremylt   CeedInt nblk = r->nblk, blksize = r->blksize, elemsize = r->elemsize,
304e35ef05Sjeremylt           esize = nblk*blksize*elemsize, ncomp=r->ncomp;
3121617c04Sjeremylt 
3221617c04Sjeremylt   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
3321617c04Sjeremylt   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
3421617c04Sjeremylt   if (tmode == CEED_NOTRANSPOSE) {
3521617c04Sjeremylt     // Perform: v = r * u
36e17b31afSThilina Rathnayake     if (!impl->indices) {
37*a92b91d6Sjeremylt       for (CeedInt shift=0; shift<nblk*blksize*ncomp*elemsize;
38*a92b91d6Sjeremylt            shift+=blksize*ncomp*elemsize) {
394e35ef05Sjeremylt         for (CeedInt j = 0; j<blksize; j++) {
40*a92b91d6Sjeremylt           CeedInt maxj = (shift<(nblk-1)*blksize*ncomp*elemsize)?blksize-1:(r->nelem%nblk)-1;
41*a92b91d6Sjeremylt           if (maxj == -1) maxj = blksize-1;
424e35ef05Sjeremylt           for (CeedInt k = 0; k<ncomp*elemsize; k++) {
43*a92b91d6Sjeremylt             vv[shift + k*blksize + j] = uu[shift + (j<maxj?j:maxj)*ncomp*elemsize + k];
444e35ef05Sjeremylt           }
454e35ef05Sjeremylt         }
464e35ef05Sjeremylt       }
47e17b31afSThilina Rathnayake     } else if (ncomp == 1) {
4821617c04Sjeremylt       for (CeedInt i = 0; i<esize; i++) vv[i] = uu[impl->indices[i]];
4921617c04Sjeremylt     } else {
5021617c04Sjeremylt       // vv is (elemsize x ncomp x nelem), column-major
5121617c04Sjeremylt       if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major
524e35ef05Sjeremylt         for (CeedInt e = 0; e < nblk*blksize; e++)
5321617c04Sjeremylt           for (CeedInt d = 0; d < ncomp; d++)
5421617c04Sjeremylt             for (CeedInt i = 0; i<r->elemsize; i++) {
5521617c04Sjeremylt               vv[i+r->elemsize*(d+ncomp*e)] =
5621617c04Sjeremylt                 uu[impl->indices[i+r->elemsize*e]+r->ndof*d];
5721617c04Sjeremylt             }
5821617c04Sjeremylt       } else { // u is (ncomp x ndof), column-major
594e35ef05Sjeremylt         for (CeedInt e = 0; e < r->nblk*blksize; e++) {
604e35ef05Sjeremylt           for (CeedInt d = 0; d < ncomp; d++) {
6121617c04Sjeremylt             for (CeedInt i = 0; i<r->elemsize; i++) {
6221617c04Sjeremylt               vv[i+r->elemsize*(d+ncomp*e)] =
6321617c04Sjeremylt                 uu[d+ncomp*impl->indices[i+r->elemsize*e]];
6421617c04Sjeremylt             }
6521617c04Sjeremylt           }
6621617c04Sjeremylt         }
674e35ef05Sjeremylt       }
684e35ef05Sjeremylt     }
6921617c04Sjeremylt   } else {
7021617c04Sjeremylt     // Note: in transpose mode, we perform: v += r^t * u
714e35ef05Sjeremylt     esize = (nblk - 1)*blksize*elemsize;
72e17b31afSThilina Rathnayake     if (!impl->indices) {
73*a92b91d6Sjeremylt       for (CeedInt shift=0; shift<nblk*blksize*ncomp*elemsize;
74*a92b91d6Sjeremylt            shift+=blksize*ncomp*elemsize) {
75*a92b91d6Sjeremylt         CeedInt maxj = (shift<(nblk-1)*blksize*ncomp*elemsize)?blksize:r->nelem%nblk;
76*a92b91d6Sjeremylt         if (maxj == 0) maxj = blksize;
77*a92b91d6Sjeremylt         for (CeedInt j = 0; j<maxj; j++) {
784e35ef05Sjeremylt           for (CeedInt k = 0; k<ncomp*elemsize; k++) {
794e35ef05Sjeremylt             vv[shift + j*ncomp*elemsize + k] = uu[shift + k*blksize + j];
804e35ef05Sjeremylt           }
814e35ef05Sjeremylt         }
824e35ef05Sjeremylt       }
83e17b31afSThilina Rathnayake     } else if (ncomp == 1) {
8421617c04Sjeremylt       for (CeedInt i = 0; i<esize; i++) vv[impl->indices[i]] += uu[i];
854e35ef05Sjeremylt       CeedInt nlastelems = r->nelem % blksize;
864e35ef05Sjeremylt       CeedInt shift = (nblk - 1)*blksize*elemsize;
874e35ef05Sjeremylt       if (nlastelems == 0) nlastelems = blksize;
884e35ef05Sjeremylt       for (CeedInt i = 0; i<blksize*elemsize; i++) {
894e35ef05Sjeremylt         if ((i % blksize) < nlastelems) {
904e35ef05Sjeremylt           vv[impl->indices[shift + i]] += uu[shift + i];
914e35ef05Sjeremylt         }
924e35ef05Sjeremylt       }
9321617c04Sjeremylt     } else {
9421617c04Sjeremylt       // u is (elemsize x ncomp x nelem)
9521617c04Sjeremylt       if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major
964e35ef05Sjeremylt         for (CeedInt e = 0; e < blksize * (nblk - 1); e++) {
974e35ef05Sjeremylt           for (CeedInt d = 0; d < ncomp; d++) {
984e35ef05Sjeremylt             for (CeedInt i = 0; i<elemsize; i++) {
994e35ef05Sjeremylt               vv[impl->indices[i+elemsize*e]+r->ndof*d] +=
1004e35ef05Sjeremylt                 uu[i+elemsize*(d+e*ncomp)];
1014e35ef05Sjeremylt             }
1024e35ef05Sjeremylt           }
1034e35ef05Sjeremylt         }
1044e35ef05Sjeremylt         CeedInt shift = (nblk - 1)*blksize*elemsize;
1054e35ef05Sjeremylt         CeedInt nlastelems = r->nelem % blksize;
1064e35ef05Sjeremylt         if (nlastelems == 0) nlastelems = blksize;
1074e35ef05Sjeremylt         for (CeedInt e = 0; e < nlastelems; e++) {
1084e35ef05Sjeremylt           for (CeedInt d = 0; d < ncomp; d++) {
1094e35ef05Sjeremylt             for (CeedInt i = 0; i<elemsize; i++) {
1104e35ef05Sjeremylt               vv[impl->indices[i+elemsize*(e+shift)]+r->ndof*d] +=
1114e35ef05Sjeremylt                 uu[i+elemsize*(d+(e+shift)*ncomp)];
1124e35ef05Sjeremylt             }
1134e35ef05Sjeremylt           }
11421617c04Sjeremylt         }
11521617c04Sjeremylt       } else { // vv is (ncomp x ndof), column-major
1164e35ef05Sjeremylt         for (CeedInt e = 0; e < blksize * (nblk - 1); e++) {
1174e35ef05Sjeremylt           for (CeedInt d = 0; d < ncomp; d++) {
1184e35ef05Sjeremylt             for (CeedInt i = 0; i<elemsize; i++) {
1194e35ef05Sjeremylt               vv[d+ncomp*impl->indices[i+elemsize*e]] +=
12021617c04Sjeremylt                 uu[i+r->elemsize*(d+e*ncomp)];
12121617c04Sjeremylt             }
12221617c04Sjeremylt           }
12321617c04Sjeremylt         }
1244e35ef05Sjeremylt         CeedInt shift = (nblk - 1)*blksize*elemsize;
1254e35ef05Sjeremylt         CeedInt nlastelems = r->nelem % blksize;
1264e35ef05Sjeremylt         for (CeedInt e = 0; e < nlastelems; e++) {
1274e35ef05Sjeremylt           for (CeedInt d = 0; d < ncomp; d++) {
1284e35ef05Sjeremylt             for (CeedInt i = 0; i<elemsize; i++) {
1294e35ef05Sjeremylt               vv[d+ncomp*impl->indices[i+elemsize*(e+shift)]] +=
1304e35ef05Sjeremylt                 uu[i+r->elemsize*(d+(e+shift)*ncomp)];
1314e35ef05Sjeremylt             }
1324e35ef05Sjeremylt           }
1334e35ef05Sjeremylt         }
1344e35ef05Sjeremylt       }
1354e35ef05Sjeremylt     }
13621617c04Sjeremylt   }
13721617c04Sjeremylt   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
13821617c04Sjeremylt   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
13921617c04Sjeremylt   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
14021617c04Sjeremylt     *request = NULL;
14121617c04Sjeremylt   return 0;
14221617c04Sjeremylt }
14321617c04Sjeremylt 
14421617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
14521617c04Sjeremylt   CeedElemRestriction_Ref *impl = r->data;
14621617c04Sjeremylt   int ierr;
14721617c04Sjeremylt 
14821617c04Sjeremylt   ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr);
14921617c04Sjeremylt   ierr = CeedFree(&r->data); CeedChk(ierr);
15021617c04Sjeremylt   return 0;
15121617c04Sjeremylt }
15221617c04Sjeremylt 
15321617c04Sjeremylt int CeedElemRestrictionCreate_Ref(CeedElemRestriction r,
15421617c04Sjeremylt                                   CeedMemType mtype,
15521617c04Sjeremylt                                   CeedCopyMode cmode, const CeedInt *indices) {
15621617c04Sjeremylt   int ierr;
15721617c04Sjeremylt   CeedElemRestriction_Ref *impl;
15821617c04Sjeremylt 
15921617c04Sjeremylt   if (mtype != CEED_MEM_HOST)
16021617c04Sjeremylt     return CeedError(r->ceed, 1, "Only MemType = HOST supported");
16121617c04Sjeremylt   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
16221617c04Sjeremylt   switch (cmode) {
16321617c04Sjeremylt   case CEED_COPY_VALUES:
16421617c04Sjeremylt     ierr = CeedMalloc(r->nelem*r->elemsize, &impl->indices_allocated);
16521617c04Sjeremylt     CeedChk(ierr);
16621617c04Sjeremylt     memcpy(impl->indices_allocated, indices,
16721617c04Sjeremylt            r->nelem * r->elemsize * sizeof(indices[0]));
16821617c04Sjeremylt     impl->indices = impl->indices_allocated;
16921617c04Sjeremylt     break;
17021617c04Sjeremylt   case CEED_OWN_POINTER:
17121617c04Sjeremylt     impl->indices_allocated = (CeedInt *)indices;
17221617c04Sjeremylt     impl->indices = impl->indices_allocated;
17321617c04Sjeremylt     break;
17421617c04Sjeremylt   case CEED_USE_POINTER:
17521617c04Sjeremylt     impl->indices = indices;
17621617c04Sjeremylt   }
17721617c04Sjeremylt   r->data = impl;
17821617c04Sjeremylt   r->Apply = CeedElemRestrictionApply_Ref;
17921617c04Sjeremylt   r->Destroy = CeedElemRestrictionDestroy_Ref;
18021617c04Sjeremylt   return 0;
18121617c04Sjeremylt }
182