xref: /libCEED/rust/libceed-sys/c-src/backends/ref/ceed-ref-restriction.c (revision 4e35ef053a4ca6d42bd4fa6764313dea5d12ef4c)
121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
421617c04Sjeremylt //
521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
721617c04Sjeremylt // element discretizations for exascale applications. For more information and
821617c04Sjeremylt // source code availability see http://github.com/ceed.
921617c04Sjeremylt //
1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early
1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
1621617c04Sjeremylt 
1721617c04Sjeremylt #include <ceed-impl.h>
1821617c04Sjeremylt #include <string.h>
1921617c04Sjeremylt #include "ceed-ref.h"
2021617c04Sjeremylt 
2121617c04Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
226ddacda3Sjeremylt                                         CeedTransposeMode tmode,
2321617c04Sjeremylt                                         CeedTransposeMode lmode, CeedVector u,
2421617c04Sjeremylt                                         CeedVector v, CeedRequest *request) {
2521617c04Sjeremylt   CeedElemRestriction_Ref *impl = r->data;
2621617c04Sjeremylt   int ierr;
2721617c04Sjeremylt   const CeedScalar *uu;
2821617c04Sjeremylt   CeedScalar *vv;
29*4e35ef05Sjeremylt   CeedInt nblk = r->nblk, blksize = r->blksize, elemsize = r->elemsize,
30*4e35ef05Sjeremylt            esize = nblk*blksize*elemsize, ncomp=r->ncomp;
3121617c04Sjeremylt 
3221617c04Sjeremylt   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
3321617c04Sjeremylt   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
3421617c04Sjeremylt   if (tmode == CEED_NOTRANSPOSE) {
3521617c04Sjeremylt     // Perform: v = r * u
36e17b31afSThilina Rathnayake     if (!impl->indices) {
37*4e35ef05Sjeremylt       for (CeedInt i = 0; i<nblk - 1; i++) {
38*4e35ef05Sjeremylt         CeedInt shift = i*blksize*ncomp*elemsize;
39*4e35ef05Sjeremylt         for (CeedInt j = 0; j<blksize; j++) {
40*4e35ef05Sjeremylt           for (CeedInt k = 0; k<ncomp*elemsize; k++) {
41*4e35ef05Sjeremylt             vv[shift + k*blksize + j] = uu[shift + j*ncomp*elemsize + k];
42*4e35ef05Sjeremylt           }
43*4e35ef05Sjeremylt         }
44*4e35ef05Sjeremylt       }
45*4e35ef05Sjeremylt       CeedInt shift = (nblk - 1)*blksize*ncomp*elemsize;
46*4e35ef05Sjeremylt       CeedInt nlastelems = r->nelem % nblk;
47*4e35ef05Sjeremylt       if (nlastelems == 0) nlastelems = blksize;
48*4e35ef05Sjeremylt       for (CeedInt j = 0; j<blksize; j++) {
49*4e35ef05Sjeremylt         for (CeedInt k = 0; k<ncomp*elemsize; k++) {
50*4e35ef05Sjeremylt           if (j < nlastelems) {
51*4e35ef05Sjeremylt             vv[shift + k*blksize + j] = uu[shift + j*ncomp*elemsize + k];
52*4e35ef05Sjeremylt           } else {
53*4e35ef05Sjeremylt             vv[shift + k*blksize + j] = uu[shift + (nlastelems - 1)*ncomp*elemsize + k];
54*4e35ef05Sjeremylt           }
55*4e35ef05Sjeremylt         }
56*4e35ef05Sjeremylt       }
57e17b31afSThilina Rathnayake     } else if (ncomp == 1) {
5821617c04Sjeremylt       for (CeedInt i = 0; i<esize; i++) vv[i] = uu[impl->indices[i]];
5921617c04Sjeremylt     } else {
6021617c04Sjeremylt       // vv is (elemsize x ncomp x nelem), column-major
6121617c04Sjeremylt       if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major
62*4e35ef05Sjeremylt         for (CeedInt e = 0; e < nblk*blksize; e++)
6321617c04Sjeremylt           for (CeedInt d = 0; d < ncomp; d++)
6421617c04Sjeremylt             for (CeedInt i = 0; i<r->elemsize; i++) {
6521617c04Sjeremylt               vv[i+r->elemsize*(d+ncomp*e)] =
6621617c04Sjeremylt                 uu[impl->indices[i+r->elemsize*e]+r->ndof*d];
6721617c04Sjeremylt             }
6821617c04Sjeremylt       } else { // u is (ncomp x ndof), column-major
69*4e35ef05Sjeremylt         for (CeedInt e = 0; e < r->nblk*blksize; e++) {
70*4e35ef05Sjeremylt           for (CeedInt d = 0; d < ncomp; d++) {
7121617c04Sjeremylt             for (CeedInt i = 0; i<r->elemsize; i++) {
7221617c04Sjeremylt               vv[i+r->elemsize*(d+ncomp*e)] =
7321617c04Sjeremylt                 uu[d+ncomp*impl->indices[i+r->elemsize*e]];
7421617c04Sjeremylt             }
7521617c04Sjeremylt           }
7621617c04Sjeremylt         }
77*4e35ef05Sjeremylt       }
78*4e35ef05Sjeremylt     }
7921617c04Sjeremylt   } else {
8021617c04Sjeremylt     // Note: in transpose mode, we perform: v += r^t * u
81*4e35ef05Sjeremylt     esize = (nblk - 1)*blksize*elemsize;
82e17b31afSThilina Rathnayake     if (!impl->indices) {
83*4e35ef05Sjeremylt       for (CeedInt i=0; i<nblk - 1; i++) {
84*4e35ef05Sjeremylt         CeedInt shift = i*blksize*ncomp*elemsize;
85*4e35ef05Sjeremylt         for (CeedInt j = 0; j<blksize; j++) {
86*4e35ef05Sjeremylt           for (CeedInt k = 0; k<ncomp*elemsize; k++) {
87*4e35ef05Sjeremylt             vv[shift + j*ncomp*elemsize + k] = uu[shift + k*blksize + j];
88*4e35ef05Sjeremylt           }
89*4e35ef05Sjeremylt         }
90*4e35ef05Sjeremylt       }
91*4e35ef05Sjeremylt       CeedInt shift = (nblk - 1)*blksize*ncomp*elemsize;
92*4e35ef05Sjeremylt       CeedInt nlastelems = r->nelem % nblk;
93*4e35ef05Sjeremylt       if (nlastelems == 0) nlastelems = blksize;
94*4e35ef05Sjeremylt       for (CeedInt j = 0; j<blksize; j++) {
95*4e35ef05Sjeremylt         for (CeedInt k = 0; k<ncomp*elemsize; k++) {
96*4e35ef05Sjeremylt           if (j < nlastelems) {
97*4e35ef05Sjeremylt             vv[shift + j*ncomp*elemsize + k] = uu[shift + k*blksize + j];
98*4e35ef05Sjeremylt           }
99*4e35ef05Sjeremylt         }
100*4e35ef05Sjeremylt       }
101e17b31afSThilina Rathnayake     } else if (ncomp == 1) {
10221617c04Sjeremylt       for (CeedInt i = 0; i<esize; i++) vv[impl->indices[i]] += uu[i];
103*4e35ef05Sjeremylt       CeedInt nlastelems = r->nelem % blksize;
104*4e35ef05Sjeremylt       CeedInt shift = (nblk - 1)*blksize*elemsize;
105*4e35ef05Sjeremylt       if (nlastelems == 0) nlastelems = blksize;
106*4e35ef05Sjeremylt       for (CeedInt i = 0; i<blksize*elemsize; i++) {
107*4e35ef05Sjeremylt         if ((i % blksize) < nlastelems) {
108*4e35ef05Sjeremylt           vv[impl->indices[shift + i]] += uu[shift + i];
109*4e35ef05Sjeremylt         }
110*4e35ef05Sjeremylt       }
11121617c04Sjeremylt     } else {
11221617c04Sjeremylt       // u is (elemsize x ncomp x nelem)
11321617c04Sjeremylt       if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major
114*4e35ef05Sjeremylt         for (CeedInt e = 0; e < blksize * (nblk - 1); e++) {
115*4e35ef05Sjeremylt           for (CeedInt d = 0; d < ncomp; d++) {
116*4e35ef05Sjeremylt             for (CeedInt i = 0; i<elemsize; i++) {
117*4e35ef05Sjeremylt               vv[impl->indices[i+elemsize*e]+r->ndof*d] +=
118*4e35ef05Sjeremylt                 uu[i+elemsize*(d+e*ncomp)];
119*4e35ef05Sjeremylt             }
120*4e35ef05Sjeremylt           }
121*4e35ef05Sjeremylt         }
122*4e35ef05Sjeremylt       CeedInt shift = (nblk - 1)*blksize*elemsize;
123*4e35ef05Sjeremylt       CeedInt nlastelems = r->nelem % blksize;
124*4e35ef05Sjeremylt       if (nlastelems == 0) nlastelems = blksize;
125*4e35ef05Sjeremylt         for (CeedInt e = 0; e < nlastelems; e++) {
126*4e35ef05Sjeremylt           for (CeedInt d = 0; d < ncomp; d++) {
127*4e35ef05Sjeremylt             for (CeedInt i = 0; i<elemsize; i++) {
128*4e35ef05Sjeremylt               vv[impl->indices[i+elemsize*(e+shift)]+r->ndof*d] +=
129*4e35ef05Sjeremylt                 uu[i+elemsize*(d+(e+shift)*ncomp)];
130*4e35ef05Sjeremylt             }
131*4e35ef05Sjeremylt           }
13221617c04Sjeremylt         }
13321617c04Sjeremylt       } else { // vv is (ncomp x ndof), column-major
134*4e35ef05Sjeremylt         for (CeedInt e = 0; e < blksize * (nblk - 1); e++) {
135*4e35ef05Sjeremylt           for (CeedInt d = 0; d < ncomp; d++) {
136*4e35ef05Sjeremylt             for (CeedInt i = 0; i<elemsize; i++) {
137*4e35ef05Sjeremylt               vv[d+ncomp*impl->indices[i+elemsize*e]] +=
13821617c04Sjeremylt                 uu[i+r->elemsize*(d+e*ncomp)];
13921617c04Sjeremylt             }
14021617c04Sjeremylt           }
14121617c04Sjeremylt         }
142*4e35ef05Sjeremylt         CeedInt shift = (nblk - 1)*blksize*elemsize;
143*4e35ef05Sjeremylt         CeedInt nlastelems = r->nelem % blksize;
144*4e35ef05Sjeremylt         for (CeedInt e = 0; e < nlastelems; e++) {
145*4e35ef05Sjeremylt           for (CeedInt d = 0; d < ncomp; d++) {
146*4e35ef05Sjeremylt             for (CeedInt i = 0; i<elemsize; i++) {
147*4e35ef05Sjeremylt               vv[d+ncomp*impl->indices[i+elemsize*(e+shift)]] +=
148*4e35ef05Sjeremylt                 uu[i+r->elemsize*(d+(e+shift)*ncomp)];
149*4e35ef05Sjeremylt             }
150*4e35ef05Sjeremylt           }
151*4e35ef05Sjeremylt         }
152*4e35ef05Sjeremylt       }
153*4e35ef05Sjeremylt     }
15421617c04Sjeremylt   }
15521617c04Sjeremylt   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
15621617c04Sjeremylt   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
15721617c04Sjeremylt   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
15821617c04Sjeremylt     *request = NULL;
15921617c04Sjeremylt   return 0;
16021617c04Sjeremylt }
16121617c04Sjeremylt 
16221617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
16321617c04Sjeremylt   CeedElemRestriction_Ref *impl = r->data;
16421617c04Sjeremylt   int ierr;
16521617c04Sjeremylt 
16621617c04Sjeremylt   ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr);
16721617c04Sjeremylt   ierr = CeedFree(&r->data); CeedChk(ierr);
16821617c04Sjeremylt   return 0;
16921617c04Sjeremylt }
17021617c04Sjeremylt 
17121617c04Sjeremylt int CeedElemRestrictionCreate_Ref(CeedElemRestriction r,
17221617c04Sjeremylt                                   CeedMemType mtype,
17321617c04Sjeremylt                                   CeedCopyMode cmode, const CeedInt *indices) {
17421617c04Sjeremylt   int ierr;
17521617c04Sjeremylt   CeedElemRestriction_Ref *impl;
17621617c04Sjeremylt 
17721617c04Sjeremylt   if (mtype != CEED_MEM_HOST)
17821617c04Sjeremylt     return CeedError(r->ceed, 1, "Only MemType = HOST supported");
17921617c04Sjeremylt   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
18021617c04Sjeremylt   switch (cmode) {
18121617c04Sjeremylt   case CEED_COPY_VALUES:
18221617c04Sjeremylt     ierr = CeedMalloc(r->nelem*r->elemsize, &impl->indices_allocated);
18321617c04Sjeremylt     CeedChk(ierr);
18421617c04Sjeremylt     memcpy(impl->indices_allocated, indices,
18521617c04Sjeremylt            r->nelem * r->elemsize * sizeof(indices[0]));
18621617c04Sjeremylt     impl->indices = impl->indices_allocated;
18721617c04Sjeremylt     break;
18821617c04Sjeremylt   case CEED_OWN_POINTER:
18921617c04Sjeremylt     impl->indices_allocated = (CeedInt *)indices;
19021617c04Sjeremylt     impl->indices = impl->indices_allocated;
19121617c04Sjeremylt     break;
19221617c04Sjeremylt   case CEED_USE_POINTER:
19321617c04Sjeremylt     impl->indices = indices;
19421617c04Sjeremylt   }
19521617c04Sjeremylt   r->data = impl;
19621617c04Sjeremylt   r->Apply = CeedElemRestrictionApply_Ref;
19721617c04Sjeremylt   r->Destroy = CeedElemRestrictionDestroy_Ref;
19821617c04Sjeremylt   return 0;
19921617c04Sjeremylt }
200