xref: /libCEED/rust/libceed-sys/c-src/backends/ref/ceed-ref-restriction.c (revision 170d5e7106681a99f11609ba4d34f66fd5013a1d)
121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
421617c04Sjeremylt //
521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
721617c04Sjeremylt // element discretizations for exascale applications. For more information and
821617c04Sjeremylt // source code availability see http://github.com/ceed.
921617c04Sjeremylt //
1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early
1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
1621617c04Sjeremylt 
1721617c04Sjeremylt #include <ceed-impl.h>
1821617c04Sjeremylt #include <string.h>
1921617c04Sjeremylt #include "ceed-ref.h"
2021617c04Sjeremylt 
2121617c04Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
226ddacda3Sjeremylt                                         CeedTransposeMode tmode,
2321617c04Sjeremylt                                         CeedTransposeMode lmode, CeedVector u,
2421617c04Sjeremylt                                         CeedVector v, CeedRequest *request) {
2521617c04Sjeremylt   CeedElemRestriction_Ref *impl = r->data;
2621617c04Sjeremylt   int ierr;
2721617c04Sjeremylt   const CeedScalar *uu;
2821617c04Sjeremylt   CeedScalar *vv;
294e35ef05Sjeremylt   CeedInt nblk = r->nblk, blksize = r->blksize, elemsize = r->elemsize,
304e35ef05Sjeremylt           esize = nblk*blksize*elemsize, ncomp=r->ncomp;
3121617c04Sjeremylt 
3221617c04Sjeremylt   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
3321617c04Sjeremylt   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
3421617c04Sjeremylt   if (tmode == CEED_NOTRANSPOSE) {
3521617c04Sjeremylt     // Perform: v = r * u
36e17b31afSThilina Rathnayake     if (!impl->indices) {
37*170d5e71Sjeremylt       for (CeedInt e = 0; e < nblk*blksize; e+=blksize)
38*170d5e71Sjeremylt         for (CeedInt j = 0; j < blksize; j++)
39*170d5e71Sjeremylt           for (CeedInt k = 0; k < ncomp*elemsize; k++)
405f2ab5aeSjeremylt             vv[e*elemsize*ncomp + k*blksize + j] =
415f2ab5aeSjeremylt               uu[CeedIntMin(e+j,r->nelem-1)*ncomp*elemsize + k];
42e17b31afSThilina Rathnayake     } else if (ncomp == 1) {
4321617c04Sjeremylt       for (CeedInt i = 0; i<esize; i++) vv[i] = uu[impl->indices[i]];
4421617c04Sjeremylt     } else {
4521617c04Sjeremylt       // vv is (elemsize x ncomp x nelem), column-major
4621617c04Sjeremylt       if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major
474e35ef05Sjeremylt         for (CeedInt e = 0; e < nblk*blksize; e++)
4821617c04Sjeremylt           for (CeedInt d = 0; d < ncomp; d++)
49*170d5e71Sjeremylt             for (CeedInt i = 0; i < r->elemsize; i++)
5021617c04Sjeremylt               vv[i+r->elemsize*(d+ncomp*e)] =
5121617c04Sjeremylt                 uu[impl->indices[i+r->elemsize*e]+r->ndof*d];
5221617c04Sjeremylt       } else { // u is (ncomp x ndof), column-major
53*170d5e71Sjeremylt         for (CeedInt e = 0; e < r->nblk*blksize; e++)
54*170d5e71Sjeremylt           for (CeedInt d = 0; d < ncomp; d++)
55*170d5e71Sjeremylt             for (CeedInt i = 0; i < r->elemsize; i++)
5621617c04Sjeremylt               vv[i+r->elemsize*(d+ncomp*e)] =
5721617c04Sjeremylt                 uu[d+ncomp*impl->indices[i+r->elemsize*e]];
5821617c04Sjeremylt       }
5921617c04Sjeremylt     }
6021617c04Sjeremylt   } else {
6121617c04Sjeremylt     // Note: in transpose mode, we perform: v += r^t * u
624e35ef05Sjeremylt     esize = (nblk - 1)*blksize*elemsize;
63e17b31afSThilina Rathnayake     if (!impl->indices) {
645f2ab5aeSjeremylt       for (CeedInt e = 0; e < nblk*blksize; e+=blksize) {
65*170d5e71Sjeremylt         CeedInt maxj = ((e<(nblk-1)*blksize)
66*170d5e71Sjeremylt                         ||!(r->nelem%blksize))?blksize:r->nelem%blksize;
67*170d5e71Sjeremylt         for (CeedInt j = 0; j < maxj; j++)
68*170d5e71Sjeremylt           for (CeedInt k = 0; k < ncomp*elemsize; k++)
695f2ab5aeSjeremylt             vv[(e + j)*ncomp*elemsize + k] += uu[e*ncomp*elemsize + k*blksize + j];
704e35ef05Sjeremylt       }
71e17b31afSThilina Rathnayake     } else if (ncomp == 1) {
7221617c04Sjeremylt       for (CeedInt i = 0; i < esize; i++) vv[impl->indices[i]] += uu[i];
735f2ab5aeSjeremylt       CeedInt nlastelems = (r->nelem%blksize)?r->nelem%blksize:blksize;
744e35ef05Sjeremylt       CeedInt shift = (nblk - 1)*blksize*elemsize;
75*170d5e71Sjeremylt       for (CeedInt i = 0; i < blksize*elemsize; i++)
76*170d5e71Sjeremylt         if ((i % blksize) < nlastelems)
774e35ef05Sjeremylt           vv[impl->indices[shift + i]] += uu[shift + i];
7821617c04Sjeremylt     } else {
7921617c04Sjeremylt       // u is (elemsize x ncomp x nelem)
8021617c04Sjeremylt       if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major
81*170d5e71Sjeremylt         for (CeedInt e = 0; e < r->nelem; e++)
82*170d5e71Sjeremylt           for (CeedInt d = 0; d < ncomp; d++)
83*170d5e71Sjeremylt             for (CeedInt i = 0; i < elemsize; i++)
844e35ef05Sjeremylt               vv[impl->indices[i+elemsize*e]+r->ndof*d] +=
854e35ef05Sjeremylt                 uu[i+elemsize*(d+e*ncomp)];
8621617c04Sjeremylt       } else { // vv is (ncomp x ndof), column-major
87*170d5e71Sjeremylt         for (CeedInt e = 0; e < r->nelem; e++)
88*170d5e71Sjeremylt           for (CeedInt d = 0; d < ncomp; d++)
89*170d5e71Sjeremylt             for (CeedInt i = 0; i < elemsize; i++)
904e35ef05Sjeremylt               vv[d+ncomp*impl->indices[i+elemsize*e]] +=
9121617c04Sjeremylt                 uu[i+r->elemsize*(d+e*ncomp)];
9221617c04Sjeremylt       }
9321617c04Sjeremylt     }
9421617c04Sjeremylt   }
9521617c04Sjeremylt   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
9621617c04Sjeremylt   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
9721617c04Sjeremylt   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
9821617c04Sjeremylt     *request = NULL;
9921617c04Sjeremylt   return 0;
10021617c04Sjeremylt }
10121617c04Sjeremylt 
10221617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
10321617c04Sjeremylt   CeedElemRestriction_Ref *impl = r->data;
10421617c04Sjeremylt   int ierr;
10521617c04Sjeremylt 
10621617c04Sjeremylt   ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr);
10721617c04Sjeremylt   ierr = CeedFree(&r->data); CeedChk(ierr);
10821617c04Sjeremylt   return 0;
10921617c04Sjeremylt }
11021617c04Sjeremylt 
11121617c04Sjeremylt int CeedElemRestrictionCreate_Ref(CeedElemRestriction r,
11221617c04Sjeremylt                                   CeedMemType mtype,
11321617c04Sjeremylt                                   CeedCopyMode cmode, const CeedInt *indices) {
11421617c04Sjeremylt   int ierr;
11521617c04Sjeremylt   CeedElemRestriction_Ref *impl;
11621617c04Sjeremylt 
11721617c04Sjeremylt   if (mtype != CEED_MEM_HOST)
11821617c04Sjeremylt     return CeedError(r->ceed, 1, "Only MemType = HOST supported");
11921617c04Sjeremylt   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
12021617c04Sjeremylt   switch (cmode) {
12121617c04Sjeremylt   case CEED_COPY_VALUES:
12221617c04Sjeremylt     ierr = CeedMalloc(r->nelem*r->elemsize, &impl->indices_allocated);
12321617c04Sjeremylt     CeedChk(ierr);
12421617c04Sjeremylt     memcpy(impl->indices_allocated, indices,
12521617c04Sjeremylt            r->nelem * r->elemsize * sizeof(indices[0]));
12621617c04Sjeremylt     impl->indices = impl->indices_allocated;
12721617c04Sjeremylt     break;
12821617c04Sjeremylt   case CEED_OWN_POINTER:
12921617c04Sjeremylt     impl->indices_allocated = (CeedInt *)indices;
13021617c04Sjeremylt     impl->indices = impl->indices_allocated;
13121617c04Sjeremylt     break;
13221617c04Sjeremylt   case CEED_USE_POINTER:
13321617c04Sjeremylt     impl->indices = indices;
13421617c04Sjeremylt   }
13521617c04Sjeremylt   r->data = impl;
13621617c04Sjeremylt   r->Apply = CeedElemRestrictionApply_Ref;
13721617c04Sjeremylt   r->Destroy = CeedElemRestrictionDestroy_Ref;
13821617c04Sjeremylt   return 0;
13921617c04Sjeremylt }
140