xref: /libCEED/backends/ref/ceed-ref-restriction.c (revision f10650af6497af6d0949b45f2eee824400fc9b71)
121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
421617c04Sjeremylt //
521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
721617c04Sjeremylt // element discretizations for exascale applications. For more information and
821617c04Sjeremylt // source code availability see http://github.com/ceed.
921617c04Sjeremylt //
1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early
1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
1621617c04Sjeremylt 
1721617c04Sjeremylt #include "ceed-ref.h"
1821617c04Sjeremylt 
19*f10650afSjeremylt //------------------------------------------------------------------------------
20*f10650afSjeremylt // Core ElemRestriction Apply Code
21*f10650afSjeremylt //------------------------------------------------------------------------------
22be9261b7Sjeremylt static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r,
239c36149bSjeremylt     const CeedInt blksize, const CeedInt ncomp, CeedInt start, CeedInt stop,
249c36149bSjeremylt     CeedTransposeMode tmode, CeedTransposeMode lmode, CeedVector u,
259c36149bSjeremylt     CeedVector v, CeedRequest *request) {
2621617c04Sjeremylt   int ierr;
274ce2993fSjeremylt   CeedElemRestriction_Ref *impl;
284ce2993fSjeremylt   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);;
2921617c04Sjeremylt   const CeedScalar *uu;
3021617c04Sjeremylt   CeedScalar *vv;
319c36149bSjeremylt   CeedInt nelem, elemsize, nnodes, voffset;
324ce2993fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
334ce2993fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
348795c945Sjeremylt   ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
35be9261b7Sjeremylt   voffset = start*blksize*elemsize*ncomp;
3621617c04Sjeremylt 
3721617c04Sjeremylt   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
3821617c04Sjeremylt   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
398d94b059Sjeremylt   // Restriction from lvector to evector
4021617c04Sjeremylt   // Perform: v = r * u
418d94b059Sjeremylt   if (tmode == CEED_NOTRANSPOSE) {
428c91a0c9SJeremy L Thompson     // No indices provided, Identity Restriction
43e17b31afSThilina Rathnayake     if (!impl->indices) {
44be9261b7Sjeremylt       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
45e1b98f6eSjeremylt         CeedPragmaSIMD
46170d5e71Sjeremylt         for (CeedInt k = 0; k < ncomp*elemsize; k++)
471d79ecccSjeremylt           CeedPragmaSIMD
481d79ecccSjeremylt           for (CeedInt j = 0; j < blksize; j++)
49be9261b7Sjeremylt             vv[e*elemsize*ncomp + k*blksize + j - voffset]
504ce2993fSjeremylt               = uu[CeedIntMin(e+j, nelem-1)*ncomp*elemsize + k];
5121617c04Sjeremylt     } else {
528c91a0c9SJeremy L Thompson       // Indices provided, standard or blocked restriction
53ecf6354eSJed Brown       // vv has shape [elemsize, ncomp, nelem], row-major
548795c945Sjeremylt       // uu has shape [nnodes, ncomp]
55be9261b7Sjeremylt       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
56e1b98f6eSjeremylt         CeedPragmaSIMD
5721617c04Sjeremylt         for (CeedInt d = 0; d < ncomp; d++)
58e1b98f6eSjeremylt           CeedPragmaSIMD
5906cdd269SJed Brown           for (CeedInt i = 0; i < elemsize*blksize; i++)
601d79ecccSjeremylt             vv[elemsize*(d*blksize+ncomp*e) + i - voffset]
6106cdd269SJed Brown               = uu[lmode == CEED_NOTRANSPOSE
628795c945Sjeremylt                          ? impl->indices[i+elemsize*e]+nnodes*d
6306cdd269SJed Brown                          : d+ncomp*impl->indices[i+elemsize*e]];
6421617c04Sjeremylt     }
6521617c04Sjeremylt   } else {
668d94b059Sjeremylt     // Restriction from evector to lvector
678d94b059Sjeremylt     // Performing v += r^T * u
688c91a0c9SJeremy L Thompson     // No indices provided, Identity Restriction
69e17b31afSThilina Rathnayake     if (!impl->indices) {
70be9261b7Sjeremylt       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
714ce2993fSjeremylt         for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++)
721d79ecccSjeremylt           CeedPragmaSIMD
73170d5e71Sjeremylt           for (CeedInt k = 0; k < ncomp*elemsize; k++)
74be9261b7Sjeremylt             vv[(e+j)*ncomp*elemsize + k]
75be9261b7Sjeremylt             += uu[e*elemsize*ncomp + k*blksize + j - voffset];
7621617c04Sjeremylt     } else {
778c91a0c9SJeremy L Thompson       // Indices provided, standard or blocked restriction
78ecf6354eSJed Brown       // uu has shape [elemsize, ncomp, nelem]
798795c945Sjeremylt       // vv has shape [nnodes, ncomp]
80e1b98f6eSjeremylt       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
81170d5e71Sjeremylt         for (CeedInt d = 0; d < ncomp; d++)
8206cdd269SJed Brown           for (CeedInt i = 0; i < elemsize*blksize; i+=blksize)
838d94b059Sjeremylt             // Iteration bound set to discard padding elements
844ce2993fSjeremylt             for (CeedInt j = i; j < i+CeedIntMin(blksize, nelem-e); j++)
8506cdd269SJed Brown               vv[lmode == CEED_NOTRANSPOSE
868795c945Sjeremylt                        ? impl->indices[j+e*elemsize]+nnodes*d
8706cdd269SJed Brown                        : d+ncomp*impl->indices[j+e*elemsize]]
88e1b98f6eSjeremylt               += uu[elemsize*(d*blksize+ncomp*e) + j - voffset];
8921617c04Sjeremylt     }
9021617c04Sjeremylt   }
9121617c04Sjeremylt   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
9221617c04Sjeremylt   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
9321617c04Sjeremylt   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
9421617c04Sjeremylt     *request = NULL;
9521617c04Sjeremylt   return 0;
9621617c04Sjeremylt }
9721617c04Sjeremylt 
98*f10650afSjeremylt //------------------------------------------------------------------------------
99*f10650afSjeremylt // ElemRestriction Apply - Common Sizes
100*f10650afSjeremylt //------------------------------------------------------------------------------
1011d79ecccSjeremylt static int CeedElemRestrictionApply_Ref_11(CeedElemRestriction r,
1024d2a38eeSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode tmode,
1034d2a38eeSjeremylt     CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) {
1049c36149bSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, start, stop, tmode, lmode,
1059c36149bSjeremylt          u, v, request);
1064d2a38eeSjeremylt }
1074d2a38eeSjeremylt 
1081d79ecccSjeremylt static int CeedElemRestrictionApply_Ref_18(CeedElemRestriction r,
1094d2a38eeSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode tmode,
1104d2a38eeSjeremylt     CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) {
1119c36149bSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 8, 1, start, stop, tmode, lmode,
1129c36149bSjeremylt          u, v, request);
1139c36149bSjeremylt 
1149c36149bSjeremylt }
1159c36149bSjeremylt 
1161d79ecccSjeremylt static int CeedElemRestrictionApply_Ref_31(CeedElemRestriction r,
1179c36149bSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode tmode,
1189c36149bSjeremylt     CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) {
1199c36149bSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 3, start, stop, tmode, lmode,
1209c36149bSjeremylt          u, v, request);
1219c36149bSjeremylt }
1229c36149bSjeremylt 
1231d79ecccSjeremylt static int CeedElemRestrictionApply_Ref_38(CeedElemRestriction r,
1249c36149bSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode tmode,
1259c36149bSjeremylt     CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) {
1269c36149bSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 8, 3, start, stop, tmode, lmode,
1279c36149bSjeremylt          u, v, request);
1284d2a38eeSjeremylt }
1294d2a38eeSjeremylt 
130*f10650afSjeremylt //------------------------------------------------------------------------------
131*f10650afSjeremylt // ElemRestriction Apply
132*f10650afSjeremylt //------------------------------------------------------------------------------
133be9261b7Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
134f90c8643Sjeremylt                                         CeedTransposeMode tmode,
135f90c8643Sjeremylt                                         CeedTransposeMode lmode, CeedVector u,
136be9261b7Sjeremylt                                         CeedVector v, CeedRequest *request) {
137be9261b7Sjeremylt   int ierr;
1389c36149bSjeremylt   CeedInt numblk, ncomp, blksize;
1394d2a38eeSjeremylt   ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr);
1409c36149bSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
1414d2a38eeSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr);
1424d2a38eeSjeremylt 
1439c36149bSjeremylt   CeedInt idx = -1;
1449c36149bSjeremylt   if (blksize < 10)
1459c36149bSjeremylt     idx = 10*ncomp + blksize;
1469c36149bSjeremylt   switch (idx) {
1479c36149bSjeremylt   case 11:
1481d79ecccSjeremylt     return CeedElemRestrictionApply_Ref_11(r, 0, numblk, tmode, lmode,
1491d79ecccSjeremylt                                            u, v, request);
1509c36149bSjeremylt     break;
1519c36149bSjeremylt   case 18:
1521d79ecccSjeremylt     return CeedElemRestrictionApply_Ref_18(r, 0, numblk, tmode, lmode,
1531d79ecccSjeremylt                                            u, v, request);
1549c36149bSjeremylt     break;
1559c36149bSjeremylt   case 31:
1561d79ecccSjeremylt     return CeedElemRestrictionApply_Ref_31(r, 0, numblk, tmode, lmode,
1571d79ecccSjeremylt                                            u, v, request);
1589c36149bSjeremylt     break;
1599c36149bSjeremylt   case 38:
1601d79ecccSjeremylt     return CeedElemRestrictionApply_Ref_38(r, 0, numblk, tmode, lmode,
1611d79ecccSjeremylt                                            u, v, request);
1629c36149bSjeremylt     break;
1639c36149bSjeremylt   default:
1644d2a38eeSjeremylt     // LCOV_EXCL_START
1659c36149bSjeremylt     return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, 0, numblk,
1669c36149bSjeremylt            tmode, lmode, u, v, request);
1674d2a38eeSjeremylt     // LCOV_EXCL_STOP
168be9261b7Sjeremylt   }
1699c36149bSjeremylt }
170be9261b7Sjeremylt 
171*f10650afSjeremylt //------------------------------------------------------------------------------
172*f10650afSjeremylt // ElemRestriction Apply Block
173*f10650afSjeremylt //------------------------------------------------------------------------------
174be9261b7Sjeremylt static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r,
175be9261b7Sjeremylt     CeedInt block, CeedTransposeMode tmode, CeedTransposeMode lmode,
176be9261b7Sjeremylt     CeedVector u, CeedVector v, CeedRequest *request) {
1774d2a38eeSjeremylt   int ierr;
1789c36149bSjeremylt   CeedInt ncomp, blksize;
1799c36149bSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
1804d2a38eeSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr);
1814d2a38eeSjeremylt 
1829c36149bSjeremylt   CeedInt idx = -1;
1839c36149bSjeremylt   if (blksize < 10)
1849c36149bSjeremylt     idx = 10*ncomp + blksize;
1859c36149bSjeremylt   switch (idx) {
1869c36149bSjeremylt   case 11:
1871d79ecccSjeremylt     return CeedElemRestrictionApply_Ref_11(r, block, block+1, tmode, lmode,
1884d2a38eeSjeremylt                                            u, v, request);
1899c36149bSjeremylt     break;
1909c36149bSjeremylt   case 18:
1911d79ecccSjeremylt     return CeedElemRestrictionApply_Ref_18(r, block, block+1, tmode, lmode,
1924d2a38eeSjeremylt                                            u, v, request);
1939c36149bSjeremylt     break;
1949c36149bSjeremylt   case 31:
1951d79ecccSjeremylt     return CeedElemRestrictionApply_Ref_31(r, block, block+1, tmode, lmode,
1969c36149bSjeremylt                                            u, v, request);
1979c36149bSjeremylt     break;
1989c36149bSjeremylt   case 38:
1991d79ecccSjeremylt     return CeedElemRestrictionApply_Ref_38(r, block, block+1, tmode, lmode,
2009c36149bSjeremylt                                            u, v, request);
2019c36149bSjeremylt     break;
2029c36149bSjeremylt   default:
2034d2a38eeSjeremylt     // LCOV_EXCL_START
2049c36149bSjeremylt     return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, block, block+1,
2059c36149bSjeremylt            tmode, lmode, u, v, request);
2064d2a38eeSjeremylt     // LCOV_EXCL_STOP
207be9261b7Sjeremylt   }
2089c36149bSjeremylt }
209be9261b7Sjeremylt 
210*f10650afSjeremylt //------------------------------------------------------------------------------
211*f10650afSjeremylt // ElemRestriction Destroy
212*f10650afSjeremylt //------------------------------------------------------------------------------
21321617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
21421617c04Sjeremylt   int ierr;
215fe2413ffSjeremylt   CeedElemRestriction_Ref *impl;
216fe2413ffSjeremylt   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
21721617c04Sjeremylt 
21821617c04Sjeremylt   ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr);
219fe2413ffSjeremylt   ierr = CeedFree(&impl); CeedChk(ierr);
22021617c04Sjeremylt   return 0;
22121617c04Sjeremylt }
22221617c04Sjeremylt 
223*f10650afSjeremylt //------------------------------------------------------------------------------
224*f10650afSjeremylt // ElemRestriction Create
225*f10650afSjeremylt //------------------------------------------------------------------------------
226667bc5fcSjeremylt int CeedElemRestrictionCreate_Ref(CeedMemType mtype, CeedCopyMode cmode,
227667bc5fcSjeremylt                                   const CeedInt *indices, CeedElemRestriction r) {
22821617c04Sjeremylt   int ierr;
22921617c04Sjeremylt   CeedElemRestriction_Ref *impl;
2304ce2993fSjeremylt   CeedInt elemsize, nelem;
2314ce2993fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
2324ce2993fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
2334ce2993fSjeremylt   Ceed ceed;
2344ce2993fSjeremylt   ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
23521617c04Sjeremylt 
23621617c04Sjeremylt   if (mtype != CEED_MEM_HOST)
237c042f62fSJeremy L Thompson     // LCOV_EXCL_START
2384ce2993fSjeremylt     return CeedError(ceed, 1, "Only MemType = HOST supported");
239c042f62fSJeremy L Thompson   // LCOV_EXCL_STOP
24021617c04Sjeremylt   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
24121617c04Sjeremylt   switch (cmode) {
24221617c04Sjeremylt   case CEED_COPY_VALUES:
2434ce2993fSjeremylt     ierr = CeedMalloc(nelem*elemsize, &impl->indices_allocated);
24421617c04Sjeremylt     CeedChk(ierr);
24521617c04Sjeremylt     memcpy(impl->indices_allocated, indices,
2464ce2993fSjeremylt            nelem * elemsize * sizeof(indices[0]));
24721617c04Sjeremylt     impl->indices = impl->indices_allocated;
24821617c04Sjeremylt     break;
24921617c04Sjeremylt   case CEED_OWN_POINTER:
25021617c04Sjeremylt     impl->indices_allocated = (CeedInt *)indices;
25121617c04Sjeremylt     impl->indices = impl->indices_allocated;
25221617c04Sjeremylt     break;
25321617c04Sjeremylt   case CEED_USE_POINTER:
25421617c04Sjeremylt     impl->indices = indices;
25521617c04Sjeremylt   }
256fe2413ffSjeremylt 
257fe2413ffSjeremylt   ierr = CeedElemRestrictionSetData(r, (void *)&impl); CeedChk(ierr);
258fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply",
259fe2413ffSjeremylt                                 CeedElemRestrictionApply_Ref); CeedChk(ierr);
260be9261b7Sjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock",
261be9261b7Sjeremylt                                 CeedElemRestrictionApplyBlock_Ref);
262be9261b7Sjeremylt   CeedChk(ierr);
263fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy",
264fe2413ffSjeremylt                                 CeedElemRestrictionDestroy_Ref); CeedChk(ierr);
26521617c04Sjeremylt   return 0;
26621617c04Sjeremylt }
267*f10650afSjeremylt //------------------------------------------------------------------------------
268