xref: /libCEED/rust/libceed-sys/c-src/backends/ref/ceed-ref-restriction.c (revision 61dbc9d25335c1a7855bdb901ed4d560d51eee1f)
121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
421617c04Sjeremylt //
521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
721617c04Sjeremylt // element discretizations for exascale applications. For more information and
821617c04Sjeremylt // source code availability see http://github.com/ceed.
921617c04Sjeremylt //
1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early
1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
1621617c04Sjeremylt 
1721617c04Sjeremylt #include "ceed-ref.h"
1821617c04Sjeremylt 
19f10650afSjeremylt //------------------------------------------------------------------------------
20f10650afSjeremylt // Core ElemRestriction Apply Code
21f10650afSjeremylt //------------------------------------------------------------------------------
22be9261b7Sjeremylt static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r,
239c36149bSjeremylt     const CeedInt blksize, const CeedInt ncomp, CeedInt start, CeedInt stop,
24*61dbc9d2Sjeremylt     CeedTransposeMode tmode, CeedInterlaceMode imode, CeedVector u,
259c36149bSjeremylt     CeedVector v, CeedRequest *request) {
2621617c04Sjeremylt   int ierr;
274ce2993fSjeremylt   CeedElemRestriction_Ref *impl;
284ce2993fSjeremylt   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);;
2921617c04Sjeremylt   const CeedScalar *uu;
3021617c04Sjeremylt   CeedScalar *vv;
319c36149bSjeremylt   CeedInt nelem, elemsize, nnodes, voffset;
324ce2993fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
334ce2993fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
348795c945Sjeremylt   ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
35be9261b7Sjeremylt   voffset = start*blksize*elemsize*ncomp;
3621617c04Sjeremylt 
3721617c04Sjeremylt   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
3821617c04Sjeremylt   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
398d94b059Sjeremylt   // Restriction from lvector to evector
4021617c04Sjeremylt   // Perform: v = r * u
418d94b059Sjeremylt   if (tmode == CEED_NOTRANSPOSE) {
428c91a0c9SJeremy L Thompson     // No indices provided, Identity Restriction
43e17b31afSThilina Rathnayake     if (!impl->indices) {
44be9261b7Sjeremylt       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
45e1b98f6eSjeremylt         CeedPragmaSIMD
46170d5e71Sjeremylt         for (CeedInt k = 0; k < ncomp*elemsize; k++)
471d79ecccSjeremylt           CeedPragmaSIMD
481d79ecccSjeremylt           for (CeedInt j = 0; j < blksize; j++)
49be9261b7Sjeremylt             vv[e*elemsize*ncomp + k*blksize + j - voffset]
504ce2993fSjeremylt               = uu[CeedIntMin(e+j, nelem-1)*ncomp*elemsize + k];
5121617c04Sjeremylt     } else {
528c91a0c9SJeremy L Thompson       // Indices provided, standard or blocked restriction
53ecf6354eSJed Brown       // vv has shape [elemsize, ncomp, nelem], row-major
548795c945Sjeremylt       // uu has shape [nnodes, ncomp]
55be9261b7Sjeremylt       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
56e1b98f6eSjeremylt         CeedPragmaSIMD
5721617c04Sjeremylt         for (CeedInt d = 0; d < ncomp; d++)
58e1b98f6eSjeremylt           CeedPragmaSIMD
5906cdd269SJed Brown           for (CeedInt i = 0; i < elemsize*blksize; i++)
601d79ecccSjeremylt             vv[elemsize*(d*blksize+ncomp*e) + i - voffset]
61*61dbc9d2Sjeremylt               = uu[imode == CEED_NONINTERLACED
628795c945Sjeremylt                          ? impl->indices[i+elemsize*e]+nnodes*d
6306cdd269SJed Brown                          : d+ncomp*impl->indices[i+elemsize*e]];
6421617c04Sjeremylt     }
6521617c04Sjeremylt   } else {
668d94b059Sjeremylt     // Restriction from evector to lvector
678d94b059Sjeremylt     // Performing v += r^T * u
688c91a0c9SJeremy L Thompson     // No indices provided, Identity Restriction
69e17b31afSThilina Rathnayake     if (!impl->indices) {
70be9261b7Sjeremylt       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
714ce2993fSjeremylt         for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++)
721d79ecccSjeremylt           CeedPragmaSIMD
73170d5e71Sjeremylt           for (CeedInt k = 0; k < ncomp*elemsize; k++)
74be9261b7Sjeremylt             vv[(e+j)*ncomp*elemsize + k]
75be9261b7Sjeremylt             += uu[e*elemsize*ncomp + k*blksize + j - voffset];
7621617c04Sjeremylt     } else {
778c91a0c9SJeremy L Thompson       // Indices provided, standard or blocked restriction
78ecf6354eSJed Brown       // uu has shape [elemsize, ncomp, nelem]
798795c945Sjeremylt       // vv has shape [nnodes, ncomp]
80e1b98f6eSjeremylt       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
81170d5e71Sjeremylt         for (CeedInt d = 0; d < ncomp; d++)
8206cdd269SJed Brown           for (CeedInt i = 0; i < elemsize*blksize; i+=blksize)
838d94b059Sjeremylt             // Iteration bound set to discard padding elements
844ce2993fSjeremylt             for (CeedInt j = i; j < i+CeedIntMin(blksize, nelem-e); j++)
85*61dbc9d2Sjeremylt               vv[imode == CEED_NONINTERLACED
868795c945Sjeremylt                        ? impl->indices[j+e*elemsize]+nnodes*d
8706cdd269SJed Brown                        : d+ncomp*impl->indices[j+e*elemsize]]
88e1b98f6eSjeremylt               += uu[elemsize*(d*blksize+ncomp*e) + j - voffset];
8921617c04Sjeremylt     }
9021617c04Sjeremylt   }
9121617c04Sjeremylt   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
9221617c04Sjeremylt   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
9321617c04Sjeremylt   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
9421617c04Sjeremylt     *request = NULL;
9521617c04Sjeremylt   return 0;
9621617c04Sjeremylt }
9721617c04Sjeremylt 
98f10650afSjeremylt //------------------------------------------------------------------------------
99f10650afSjeremylt // ElemRestriction Apply - Common Sizes
100f10650afSjeremylt //------------------------------------------------------------------------------
1011d79ecccSjeremylt static int CeedElemRestrictionApply_Ref_11(CeedElemRestriction r,
1024d2a38eeSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode tmode,
103*61dbc9d2Sjeremylt     CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) {
104*61dbc9d2Sjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, start, stop, tmode, imode,
1059c36149bSjeremylt          u, v, request);
1064d2a38eeSjeremylt }
1074d2a38eeSjeremylt 
1081d79ecccSjeremylt static int CeedElemRestrictionApply_Ref_18(CeedElemRestriction r,
1094d2a38eeSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode tmode,
110*61dbc9d2Sjeremylt     CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) {
111*61dbc9d2Sjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 8, 1, start, stop, tmode, imode,
1129c36149bSjeremylt          u, v, request);
1139c36149bSjeremylt 
1149c36149bSjeremylt }
1159c36149bSjeremylt 
1161d79ecccSjeremylt static int CeedElemRestrictionApply_Ref_31(CeedElemRestriction r,
1179c36149bSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode tmode,
118*61dbc9d2Sjeremylt     CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) {
119*61dbc9d2Sjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 3, start, stop, tmode, imode,
1209c36149bSjeremylt          u, v, request);
1219c36149bSjeremylt }
1229c36149bSjeremylt 
1231d79ecccSjeremylt static int CeedElemRestrictionApply_Ref_38(CeedElemRestriction r,
1249c36149bSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode tmode,
125*61dbc9d2Sjeremylt     CeedInterlaceMode imode, CeedVector u, CeedVector v, CeedRequest *request) {
126*61dbc9d2Sjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 8, 3, start, stop, tmode, imode,
1279c36149bSjeremylt          u, v, request);
1284d2a38eeSjeremylt }
1294d2a38eeSjeremylt 
130f10650afSjeremylt //------------------------------------------------------------------------------
131f10650afSjeremylt // ElemRestriction Apply
132f10650afSjeremylt //------------------------------------------------------------------------------
133be9261b7Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
134074cb416Sjeremylt                                         CeedTransposeMode tmode, CeedVector u,
135be9261b7Sjeremylt                                         CeedVector v, CeedRequest *request) {
136be9261b7Sjeremylt   int ierr;
1379c36149bSjeremylt   CeedInt numblk, ncomp, blksize;
138*61dbc9d2Sjeremylt   CeedInterlaceMode imode;
1394d2a38eeSjeremylt   ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr);
1409c36149bSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
1414d2a38eeSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr);
142*61dbc9d2Sjeremylt   ierr = CeedElemRestrictionGetIMode(r, &imode); CeedChk(ierr);
1434d2a38eeSjeremylt 
1449c36149bSjeremylt   CeedInt idx = -1;
1459c36149bSjeremylt   if (blksize < 10)
1469c36149bSjeremylt     idx = 10*ncomp + blksize;
1479c36149bSjeremylt   switch (idx) {
1489c36149bSjeremylt   case 11:
149*61dbc9d2Sjeremylt     return CeedElemRestrictionApply_Ref_11(r, 0, numblk, tmode, imode,
1501d79ecccSjeremylt                                            u, v, request);
1519c36149bSjeremylt     break;
1529c36149bSjeremylt   case 18:
153*61dbc9d2Sjeremylt     return CeedElemRestrictionApply_Ref_18(r, 0, numblk, tmode, imode,
1541d79ecccSjeremylt                                            u, v, request);
1559c36149bSjeremylt     break;
1569c36149bSjeremylt   case 31:
157*61dbc9d2Sjeremylt     return CeedElemRestrictionApply_Ref_31(r, 0, numblk, tmode, imode,
1581d79ecccSjeremylt                                            u, v, request);
1599c36149bSjeremylt     break;
1609c36149bSjeremylt   case 38:
161*61dbc9d2Sjeremylt     return CeedElemRestrictionApply_Ref_38(r, 0, numblk, tmode, imode,
1621d79ecccSjeremylt                                            u, v, request);
1639c36149bSjeremylt     break;
1649c36149bSjeremylt   default:
1654d2a38eeSjeremylt     // LCOV_EXCL_START
1669c36149bSjeremylt     return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, 0, numblk,
167*61dbc9d2Sjeremylt            tmode, imode, u, v, request);
1684d2a38eeSjeremylt     // LCOV_EXCL_STOP
169be9261b7Sjeremylt   }
1709c36149bSjeremylt }
171be9261b7Sjeremylt 
172f10650afSjeremylt //------------------------------------------------------------------------------
173f10650afSjeremylt // ElemRestriction Apply Block
174f10650afSjeremylt //------------------------------------------------------------------------------
175be9261b7Sjeremylt static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r,
176074cb416Sjeremylt     CeedInt block, CeedTransposeMode tmode, CeedVector u, CeedVector v,
177074cb416Sjeremylt     CeedRequest *request) {
1784d2a38eeSjeremylt   int ierr;
1799c36149bSjeremylt   CeedInt ncomp, blksize;
180*61dbc9d2Sjeremylt   CeedInterlaceMode imode;
1819c36149bSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
1824d2a38eeSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr);
183*61dbc9d2Sjeremylt   ierr = CeedElemRestrictionGetIMode(r, &imode); CeedChk(ierr);
1844d2a38eeSjeremylt 
1859c36149bSjeremylt   CeedInt idx = -1;
1869c36149bSjeremylt   if (blksize < 10)
1879c36149bSjeremylt     idx = 10*ncomp + blksize;
1889c36149bSjeremylt   switch (idx) {
1899c36149bSjeremylt   case 11:
190*61dbc9d2Sjeremylt     return CeedElemRestrictionApply_Ref_11(r, block, block+1, tmode, imode,
1914d2a38eeSjeremylt                                            u, v, request);
1929c36149bSjeremylt     break;
1939c36149bSjeremylt   case 18:
194*61dbc9d2Sjeremylt     return CeedElemRestrictionApply_Ref_18(r, block, block+1, tmode, imode,
1954d2a38eeSjeremylt                                            u, v, request);
1969c36149bSjeremylt     break;
1979c36149bSjeremylt   case 31:
198*61dbc9d2Sjeremylt     return CeedElemRestrictionApply_Ref_31(r, block, block+1, tmode, imode,
1999c36149bSjeremylt                                            u, v, request);
2009c36149bSjeremylt     break;
2019c36149bSjeremylt   case 38:
202*61dbc9d2Sjeremylt     return CeedElemRestrictionApply_Ref_38(r, block, block+1, tmode, imode,
2039c36149bSjeremylt                                            u, v, request);
2049c36149bSjeremylt     break;
2059c36149bSjeremylt   default:
2064d2a38eeSjeremylt     // LCOV_EXCL_START
2079c36149bSjeremylt     return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, block, block+1,
208*61dbc9d2Sjeremylt            tmode, imode, u, v, request);
2094d2a38eeSjeremylt     // LCOV_EXCL_STOP
210be9261b7Sjeremylt   }
2119c36149bSjeremylt }
212be9261b7Sjeremylt 
213f10650afSjeremylt //------------------------------------------------------------------------------
214f10650afSjeremylt // ElemRestriction Destroy
215f10650afSjeremylt //------------------------------------------------------------------------------
21621617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
21721617c04Sjeremylt   int ierr;
218fe2413ffSjeremylt   CeedElemRestriction_Ref *impl;
219fe2413ffSjeremylt   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
22021617c04Sjeremylt 
22121617c04Sjeremylt   ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr);
222fe2413ffSjeremylt   ierr = CeedFree(&impl); CeedChk(ierr);
22321617c04Sjeremylt   return 0;
22421617c04Sjeremylt }
22521617c04Sjeremylt 
226f10650afSjeremylt //------------------------------------------------------------------------------
227f10650afSjeremylt // ElemRestriction Create
228f10650afSjeremylt //------------------------------------------------------------------------------
229667bc5fcSjeremylt int CeedElemRestrictionCreate_Ref(CeedMemType mtype, CeedCopyMode cmode,
230667bc5fcSjeremylt                                   const CeedInt *indices, CeedElemRestriction r) {
23121617c04Sjeremylt   int ierr;
23221617c04Sjeremylt   CeedElemRestriction_Ref *impl;
2334ce2993fSjeremylt   CeedInt elemsize, nelem;
2344ce2993fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
2354ce2993fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
2364ce2993fSjeremylt   Ceed ceed;
2374ce2993fSjeremylt   ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
23821617c04Sjeremylt 
23921617c04Sjeremylt   if (mtype != CEED_MEM_HOST)
240c042f62fSJeremy L Thompson     // LCOV_EXCL_START
2414ce2993fSjeremylt     return CeedError(ceed, 1, "Only MemType = HOST supported");
242c042f62fSJeremy L Thompson   // LCOV_EXCL_STOP
24321617c04Sjeremylt   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
24421617c04Sjeremylt   switch (cmode) {
24521617c04Sjeremylt   case CEED_COPY_VALUES:
2464ce2993fSjeremylt     ierr = CeedMalloc(nelem*elemsize, &impl->indices_allocated);
24721617c04Sjeremylt     CeedChk(ierr);
24821617c04Sjeremylt     memcpy(impl->indices_allocated, indices,
2494ce2993fSjeremylt            nelem * elemsize * sizeof(indices[0]));
25021617c04Sjeremylt     impl->indices = impl->indices_allocated;
25121617c04Sjeremylt     break;
25221617c04Sjeremylt   case CEED_OWN_POINTER:
25321617c04Sjeremylt     impl->indices_allocated = (CeedInt *)indices;
25421617c04Sjeremylt     impl->indices = impl->indices_allocated;
25521617c04Sjeremylt     break;
25621617c04Sjeremylt   case CEED_USE_POINTER:
25721617c04Sjeremylt     impl->indices = indices;
25821617c04Sjeremylt   }
259fe2413ffSjeremylt 
260fe2413ffSjeremylt   ierr = CeedElemRestrictionSetData(r, (void *)&impl); CeedChk(ierr);
261fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply",
262fe2413ffSjeremylt                                 CeedElemRestrictionApply_Ref); CeedChk(ierr);
263be9261b7Sjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock",
264be9261b7Sjeremylt                                 CeedElemRestrictionApplyBlock_Ref);
265be9261b7Sjeremylt   CeedChk(ierr);
266fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy",
267fe2413ffSjeremylt                                 CeedElemRestrictionDestroy_Ref); CeedChk(ierr);
26821617c04Sjeremylt   return 0;
26921617c04Sjeremylt }
270f10650afSjeremylt //------------------------------------------------------------------------------
271