xref: /libCEED/backends/ref/ceed-ref-restriction.c (revision 7509a596beda7c1d002d2274a17375b09541fdb6)
121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
421617c04Sjeremylt //
521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
721617c04Sjeremylt // element discretizations for exascale applications. For more information and
821617c04Sjeremylt // source code availability see http://github.com/ceed.
921617c04Sjeremylt //
1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early
1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
1621617c04Sjeremylt 
1721617c04Sjeremylt #include "ceed-ref.h"
1821617c04Sjeremylt 
19f10650afSjeremylt //------------------------------------------------------------------------------
20f10650afSjeremylt // Core ElemRestriction Apply Code
21f10650afSjeremylt //------------------------------------------------------------------------------
22be9261b7Sjeremylt static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r,
239c36149bSjeremylt     const CeedInt blksize, const CeedInt ncomp, CeedInt start, CeedInt stop,
2461dbc9d2Sjeremylt     CeedTransposeMode tmode, CeedInterlaceMode imode, CeedVector u,
259c36149bSjeremylt     CeedVector v, CeedRequest *request) {
2621617c04Sjeremylt   int ierr;
274ce2993fSjeremylt   CeedElemRestriction_Ref *impl;
28*7509a596Sjeremylt   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
2921617c04Sjeremylt   const CeedScalar *uu;
3021617c04Sjeremylt   CeedScalar *vv;
319c36149bSjeremylt   CeedInt nelem, elemsize, nnodes, voffset;
324ce2993fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
334ce2993fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
348795c945Sjeremylt   ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
35be9261b7Sjeremylt   voffset = start*blksize*elemsize*ncomp;
3621617c04Sjeremylt 
3721617c04Sjeremylt   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
3821617c04Sjeremylt   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
398d94b059Sjeremylt   // Restriction from lvector to evector
4021617c04Sjeremylt   // Perform: v = r * u
418d94b059Sjeremylt   if (tmode == CEED_NOTRANSPOSE) {
428c91a0c9SJeremy L Thompson     // No indices provided, Identity Restriction
43e17b31afSThilina Rathnayake     if (!impl->indices) {
44*7509a596Sjeremylt       CeedInt strides[3];
45*7509a596Sjeremylt       ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr);
46be9261b7Sjeremylt       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
47e1b98f6eSjeremylt         CeedPragmaSIMD
48*7509a596Sjeremylt         for (CeedInt k = 0; k < ncomp; k++)
491d79ecccSjeremylt           CeedPragmaSIMD
50*7509a596Sjeremylt           for (CeedInt n = 0; n < elemsize; n++)
51*7509a596Sjeremylt             CeedPragmaSIMD
52*7509a596Sjeremylt             for (CeedInt j = 0; j < blksize; j++) {
53*7509a596Sjeremylt               vv[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset]
54*7509a596Sjeremylt                 = uu[n*strides[0] + k*strides[1] +
55*7509a596Sjeremylt                                   CeedIntMin(e+j, nelem-1)*strides[2]];
56*7509a596Sjeremylt             }
5721617c04Sjeremylt     } else {
588c91a0c9SJeremy L Thompson       // Indices provided, standard or blocked restriction
59ecf6354eSJed Brown       // vv has shape [elemsize, ncomp, nelem], row-major
608795c945Sjeremylt       // uu has shape [nnodes, ncomp]
61be9261b7Sjeremylt       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
62e1b98f6eSjeremylt         CeedPragmaSIMD
6321617c04Sjeremylt         for (CeedInt d = 0; d < ncomp; d++)
64e1b98f6eSjeremylt           CeedPragmaSIMD
6506cdd269SJed Brown           for (CeedInt i = 0; i < elemsize*blksize; i++)
661d79ecccSjeremylt             vv[elemsize*(d*blksize+ncomp*e) + i - voffset]
6761dbc9d2Sjeremylt               = uu[imode == CEED_NONINTERLACED
688795c945Sjeremylt                          ? impl->indices[i+elemsize*e]+nnodes*d
6906cdd269SJed Brown                          : d+ncomp*impl->indices[i+elemsize*e]];
7021617c04Sjeremylt     }
7121617c04Sjeremylt   } else {
728d94b059Sjeremylt     // Restriction from evector to lvector
738d94b059Sjeremylt     // Performing v += r^T * u
748c91a0c9SJeremy L Thompson     // No indices provided, Identity Restriction
75e17b31afSThilina Rathnayake     if (!impl->indices) {
76*7509a596Sjeremylt       CeedInt strides[3];
77*7509a596Sjeremylt       ierr = CeedElemRestrictionGetStrides(r, &strides); CeedChk(ierr);
78be9261b7Sjeremylt       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
794ce2993fSjeremylt         for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++)
801d79ecccSjeremylt           CeedPragmaSIMD
81*7509a596Sjeremylt           for (CeedInt k = 0; k < ncomp; k++)
82*7509a596Sjeremylt             CeedPragmaSIMD
83*7509a596Sjeremylt             for (CeedInt n = 0; n < elemsize; n++)
84*7509a596Sjeremylt               vv[n*strides[0] + k*strides[1] + (e+j)*strides[2]]
85*7509a596Sjeremylt               += uu[e*elemsize*ncomp + (k*elemsize+n)*blksize + j - voffset];
8621617c04Sjeremylt     } else {
878c91a0c9SJeremy L Thompson       // Indices provided, standard or blocked restriction
88ecf6354eSJed Brown       // uu has shape [elemsize, ncomp, nelem]
898795c945Sjeremylt       // vv has shape [nnodes, ncomp]
90e1b98f6eSjeremylt       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
91170d5e71Sjeremylt         for (CeedInt d = 0; d < ncomp; d++)
9206cdd269SJed Brown           for (CeedInt i = 0; i < elemsize*blksize; i+=blksize)
938d94b059Sjeremylt             // Iteration bound set to discard padding elements
944ce2993fSjeremylt             for (CeedInt j = i; j < i+CeedIntMin(blksize, nelem-e); j++)
9561dbc9d2Sjeremylt               vv[imode == CEED_NONINTERLACED
968795c945Sjeremylt                        ? impl->indices[j+e*elemsize]+nnodes*d
9706cdd269SJed Brown                        : d+ncomp*impl->indices[j+e*elemsize]]
98e1b98f6eSjeremylt               += uu[elemsize*(d*blksize+ncomp*e) + j - voffset];
9921617c04Sjeremylt     }
10021617c04Sjeremylt   }
10121617c04Sjeremylt   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
10221617c04Sjeremylt   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
10321617c04Sjeremylt   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
10421617c04Sjeremylt     *request = NULL;
10521617c04Sjeremylt   return 0;
10621617c04Sjeremylt }
10721617c04Sjeremylt 
108f10650afSjeremylt //------------------------------------------------------------------------------
109f10650afSjeremylt // ElemRestriction Apply - Common Sizes
110f10650afSjeremylt //------------------------------------------------------------------------------
1111d79ecccSjeremylt static int CeedElemRestrictionApply_Ref_11(CeedElemRestriction r,
1124d2a38eeSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode tmode,
11361dbc9d2Sjeremylt     CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) {
11461dbc9d2Sjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, start, stop, tmode, imode,
1159c36149bSjeremylt          u, v, request);
1164d2a38eeSjeremylt }
1174d2a38eeSjeremylt 
1181d79ecccSjeremylt static int CeedElemRestrictionApply_Ref_18(CeedElemRestriction r,
1194d2a38eeSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode tmode,
12061dbc9d2Sjeremylt     CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) {
12161dbc9d2Sjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 8, 1, start, stop, tmode, imode,
1229c36149bSjeremylt          u, v, request);
1239c36149bSjeremylt 
1249c36149bSjeremylt }
1259c36149bSjeremylt 
1261d79ecccSjeremylt static int CeedElemRestrictionApply_Ref_31(CeedElemRestriction r,
1279c36149bSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode tmode,
12861dbc9d2Sjeremylt     CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) {
12961dbc9d2Sjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 3, start, stop, tmode, imode,
1309c36149bSjeremylt          u, v, request);
1319c36149bSjeremylt }
1329c36149bSjeremylt 
1331d79ecccSjeremylt static int CeedElemRestrictionApply_Ref_38(CeedElemRestriction r,
1349c36149bSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode tmode,
13561dbc9d2Sjeremylt     CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) {
13661dbc9d2Sjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 8, 3, start, stop, tmode, imode,
1379c36149bSjeremylt          u, v, request);
1384d2a38eeSjeremylt }
1394d2a38eeSjeremylt 
140f10650afSjeremylt //------------------------------------------------------------------------------
141f10650afSjeremylt // ElemRestriction Apply
142f10650afSjeremylt //------------------------------------------------------------------------------
143be9261b7Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
144074cb416Sjeremylt                                         CeedTransposeMode tmode, CeedVector u,
145be9261b7Sjeremylt                                         CeedVector v, CeedRequest *request) {
146be9261b7Sjeremylt   int ierr;
1479c36149bSjeremylt   CeedInt numblk, ncomp, blksize;
148*7509a596Sjeremylt   CeedInterlaceMode imode = CEED_NONINTERLACED;
1494d2a38eeSjeremylt   ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr);
1509c36149bSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
1514d2a38eeSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr);
152*7509a596Sjeremylt   CeedElemRestriction_Ref *impl;
153*7509a596Sjeremylt   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
154*7509a596Sjeremylt   if (impl->indices)
15561dbc9d2Sjeremylt     ierr = CeedElemRestrictionGetIMode(r, &imode); CeedChk(ierr);
1564d2a38eeSjeremylt 
1579c36149bSjeremylt   CeedInt idx = -1;
1589c36149bSjeremylt   if (blksize < 10)
1599c36149bSjeremylt     idx = 10*ncomp + blksize;
1609c36149bSjeremylt   switch (idx) {
1619c36149bSjeremylt   case 11:
16261dbc9d2Sjeremylt     return CeedElemRestrictionApply_Ref_11(r, 0, numblk, tmode, imode,
1631d79ecccSjeremylt                                            u, v, request);
1649c36149bSjeremylt     break;
1659c36149bSjeremylt   case 18:
16661dbc9d2Sjeremylt     return CeedElemRestrictionApply_Ref_18(r, 0, numblk, tmode, imode,
1671d79ecccSjeremylt                                            u, v, request);
1689c36149bSjeremylt     break;
1699c36149bSjeremylt   case 31:
17061dbc9d2Sjeremylt     return CeedElemRestrictionApply_Ref_31(r, 0, numblk, tmode, imode,
1711d79ecccSjeremylt                                            u, v, request);
1729c36149bSjeremylt     break;
1739c36149bSjeremylt   case 38:
17461dbc9d2Sjeremylt     return CeedElemRestrictionApply_Ref_38(r, 0, numblk, tmode, imode,
1751d79ecccSjeremylt                                            u, v, request);
1769c36149bSjeremylt     break;
1779c36149bSjeremylt   default:
1784d2a38eeSjeremylt     // LCOV_EXCL_START
1799c36149bSjeremylt     return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, 0, numblk,
18061dbc9d2Sjeremylt            tmode, imode, u, v, request);
1814d2a38eeSjeremylt     // LCOV_EXCL_STOP
182be9261b7Sjeremylt   }
1839c36149bSjeremylt }
184be9261b7Sjeremylt 
185f10650afSjeremylt //------------------------------------------------------------------------------
186f10650afSjeremylt // ElemRestriction Apply Block
187f10650afSjeremylt //------------------------------------------------------------------------------
188be9261b7Sjeremylt static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r,
189074cb416Sjeremylt     CeedInt block, CeedTransposeMode tmode, CeedVector u, CeedVector v,
190074cb416Sjeremylt     CeedRequest *request) {
1914d2a38eeSjeremylt   int ierr;
1929c36149bSjeremylt   CeedInt ncomp, blksize;
193*7509a596Sjeremylt   CeedInterlaceMode imode = CEED_NONINTERLACED;
1949c36149bSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
1954d2a38eeSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr);
196*7509a596Sjeremylt   CeedElemRestriction_Ref *impl;
197*7509a596Sjeremylt   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
198*7509a596Sjeremylt   if (impl->indices)
19961dbc9d2Sjeremylt     ierr = CeedElemRestrictionGetIMode(r, &imode); CeedChk(ierr);
2004d2a38eeSjeremylt 
2019c36149bSjeremylt   CeedInt idx = -1;
2029c36149bSjeremylt   if (blksize < 10)
2039c36149bSjeremylt     idx = 10*ncomp + blksize;
2049c36149bSjeremylt   switch (idx) {
2059c36149bSjeremylt   case 11:
20661dbc9d2Sjeremylt     return CeedElemRestrictionApply_Ref_11(r, block, block+1, tmode, imode,
2074d2a38eeSjeremylt                                            u, v, request);
2089c36149bSjeremylt     break;
2099c36149bSjeremylt   case 18:
21061dbc9d2Sjeremylt     return CeedElemRestrictionApply_Ref_18(r, block, block+1, tmode, imode,
2114d2a38eeSjeremylt                                            u, v, request);
2129c36149bSjeremylt     break;
2139c36149bSjeremylt   case 31:
21461dbc9d2Sjeremylt     return CeedElemRestrictionApply_Ref_31(r, block, block+1, tmode, imode,
2159c36149bSjeremylt                                            u, v, request);
2169c36149bSjeremylt     break;
2179c36149bSjeremylt   case 38:
21861dbc9d2Sjeremylt     return CeedElemRestrictionApply_Ref_38(r, block, block+1, tmode, imode,
2199c36149bSjeremylt                                            u, v, request);
2209c36149bSjeremylt     break;
2219c36149bSjeremylt   default:
2224d2a38eeSjeremylt     // LCOV_EXCL_START
2239c36149bSjeremylt     return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, block, block+1,
22461dbc9d2Sjeremylt            tmode, imode, u, v, request);
2254d2a38eeSjeremylt     // LCOV_EXCL_STOP
226be9261b7Sjeremylt   }
2279c36149bSjeremylt }
228be9261b7Sjeremylt 
229f10650afSjeremylt //------------------------------------------------------------------------------
230f10650afSjeremylt // ElemRestriction Destroy
231f10650afSjeremylt //------------------------------------------------------------------------------
23221617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
23321617c04Sjeremylt   int ierr;
234fe2413ffSjeremylt   CeedElemRestriction_Ref *impl;
235fe2413ffSjeremylt   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
23621617c04Sjeremylt 
23721617c04Sjeremylt   ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr);
238fe2413ffSjeremylt   ierr = CeedFree(&impl); CeedChk(ierr);
23921617c04Sjeremylt   return 0;
24021617c04Sjeremylt }
24121617c04Sjeremylt 
242f10650afSjeremylt //------------------------------------------------------------------------------
243f10650afSjeremylt // ElemRestriction Create
244f10650afSjeremylt //------------------------------------------------------------------------------
245667bc5fcSjeremylt int CeedElemRestrictionCreate_Ref(CeedMemType mtype, CeedCopyMode cmode,
246667bc5fcSjeremylt                                   const CeedInt *indices, CeedElemRestriction r) {
24721617c04Sjeremylt   int ierr;
24821617c04Sjeremylt   CeedElemRestriction_Ref *impl;
2494ce2993fSjeremylt   CeedInt elemsize, nelem;
2504ce2993fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
2514ce2993fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
2524ce2993fSjeremylt   Ceed ceed;
2534ce2993fSjeremylt   ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
25421617c04Sjeremylt 
25521617c04Sjeremylt   if (mtype != CEED_MEM_HOST)
256c042f62fSJeremy L Thompson     // LCOV_EXCL_START
2574ce2993fSjeremylt     return CeedError(ceed, 1, "Only MemType = HOST supported");
258c042f62fSJeremy L Thompson   // LCOV_EXCL_STOP
25921617c04Sjeremylt   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
26021617c04Sjeremylt   switch (cmode) {
26121617c04Sjeremylt   case CEED_COPY_VALUES:
2624ce2993fSjeremylt     ierr = CeedMalloc(nelem*elemsize, &impl->indices_allocated);
26321617c04Sjeremylt     CeedChk(ierr);
26421617c04Sjeremylt     memcpy(impl->indices_allocated, indices,
2654ce2993fSjeremylt            nelem * elemsize * sizeof(indices[0]));
26621617c04Sjeremylt     impl->indices = impl->indices_allocated;
26721617c04Sjeremylt     break;
26821617c04Sjeremylt   case CEED_OWN_POINTER:
26921617c04Sjeremylt     impl->indices_allocated = (CeedInt *)indices;
27021617c04Sjeremylt     impl->indices = impl->indices_allocated;
27121617c04Sjeremylt     break;
27221617c04Sjeremylt   case CEED_USE_POINTER:
27321617c04Sjeremylt     impl->indices = indices;
27421617c04Sjeremylt   }
275fe2413ffSjeremylt 
276fe2413ffSjeremylt   ierr = CeedElemRestrictionSetData(r, (void *)&impl); CeedChk(ierr);
277fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply",
278fe2413ffSjeremylt                                 CeedElemRestrictionApply_Ref); CeedChk(ierr);
279be9261b7Sjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock",
280be9261b7Sjeremylt                                 CeedElemRestrictionApplyBlock_Ref);
281be9261b7Sjeremylt   CeedChk(ierr);
282fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy",
283fe2413ffSjeremylt                                 CeedElemRestrictionDestroy_Ref); CeedChk(ierr);
28421617c04Sjeremylt   return 0;
28521617c04Sjeremylt }
286f10650afSjeremylt //------------------------------------------------------------------------------
287