xref: /libCEED/backends/ref/ceed-ref-restriction.c (revision 1d79ecccc2cb6288d141cadc52b33d3947b3b884)
121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
421617c04Sjeremylt //
521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
721617c04Sjeremylt // element discretizations for exascale applications. For more information and
821617c04Sjeremylt // source code availability see http://github.com/ceed.
921617c04Sjeremylt //
1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early
1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
1621617c04Sjeremylt 
1721617c04Sjeremylt #include "ceed-ref.h"
1821617c04Sjeremylt 
19be9261b7Sjeremylt static inline int CeedElemRestrictionApply_Ref_Core(CeedElemRestriction r,
209c36149bSjeremylt     const CeedInt blksize, const CeedInt ncomp, CeedInt start, CeedInt stop,
219c36149bSjeremylt     CeedTransposeMode tmode, CeedTransposeMode lmode, CeedVector u,
229c36149bSjeremylt     CeedVector v, CeedRequest *request) {
2321617c04Sjeremylt   int ierr;
244ce2993fSjeremylt   CeedElemRestriction_Ref *impl;
254ce2993fSjeremylt   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);;
2621617c04Sjeremylt   const CeedScalar *uu;
2721617c04Sjeremylt   CeedScalar *vv;
289c36149bSjeremylt   CeedInt nelem, elemsize, nnodes, voffset;
294ce2993fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
304ce2993fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
318795c945Sjeremylt   ierr = CeedElemRestrictionGetNumNodes(r, &nnodes); CeedChk(ierr);
32be9261b7Sjeremylt   voffset = start*blksize*elemsize*ncomp;
3321617c04Sjeremylt 
3421617c04Sjeremylt   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
3521617c04Sjeremylt   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
368d94b059Sjeremylt   // Restriction from lvector to evector
3721617c04Sjeremylt   // Perform: v = r * u
388d94b059Sjeremylt   if (tmode == CEED_NOTRANSPOSE) {
398c91a0c9SJeremy L Thompson     // No indices provided, Identity Restriction
40e17b31afSThilina Rathnayake     if (!impl->indices) {
41be9261b7Sjeremylt       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
42e1b98f6eSjeremylt         CeedPragmaSIMD
43170d5e71Sjeremylt         for (CeedInt k = 0; k < ncomp*elemsize; k++)
44*1d79ecccSjeremylt           CeedPragmaSIMD
45*1d79ecccSjeremylt           for (CeedInt j = 0; j < blksize; j++)
46be9261b7Sjeremylt             vv[e*elemsize*ncomp + k*blksize + j - voffset]
474ce2993fSjeremylt               = uu[CeedIntMin(e+j, nelem-1)*ncomp*elemsize + k];
4821617c04Sjeremylt     } else {
498c91a0c9SJeremy L Thompson       // Indices provided, standard or blocked restriction
50ecf6354eSJed Brown       // vv has shape [elemsize, ncomp, nelem], row-major
518795c945Sjeremylt       // uu has shape [nnodes, ncomp]
52be9261b7Sjeremylt       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
53e1b98f6eSjeremylt         CeedPragmaSIMD
5421617c04Sjeremylt         for (CeedInt d = 0; d < ncomp; d++)
55e1b98f6eSjeremylt           CeedPragmaSIMD
5606cdd269SJed Brown           for (CeedInt i = 0; i < elemsize*blksize; i++)
57*1d79ecccSjeremylt             vv[elemsize*(d*blksize+ncomp*e) + i - voffset]
5806cdd269SJed Brown               = uu[lmode == CEED_NOTRANSPOSE
598795c945Sjeremylt                          ? impl->indices[i+elemsize*e]+nnodes*d
6006cdd269SJed Brown                          : d+ncomp*impl->indices[i+elemsize*e]];
6121617c04Sjeremylt     }
6221617c04Sjeremylt   } else {
638d94b059Sjeremylt     // Restriction from evector to lvector
648d94b059Sjeremylt     // Performing v += r^T * u
658c91a0c9SJeremy L Thompson     // No indices provided, Identity Restriction
66e17b31afSThilina Rathnayake     if (!impl->indices) {
67be9261b7Sjeremylt       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
684ce2993fSjeremylt         for (CeedInt j = 0; j < CeedIntMin(blksize, nelem-e); j++)
69*1d79ecccSjeremylt           CeedPragmaSIMD
70170d5e71Sjeremylt           for (CeedInt k = 0; k < ncomp*elemsize; k++)
71be9261b7Sjeremylt             vv[(e+j)*ncomp*elemsize + k]
72be9261b7Sjeremylt             += uu[e*elemsize*ncomp + k*blksize + j - voffset];
7321617c04Sjeremylt     } else {
748c91a0c9SJeremy L Thompson       // Indices provided, standard or blocked restriction
75ecf6354eSJed Brown       // uu has shape [elemsize, ncomp, nelem]
768795c945Sjeremylt       // vv has shape [nnodes, ncomp]
77e1b98f6eSjeremylt       for (CeedInt e = start*blksize; e < stop*blksize; e+=blksize)
78170d5e71Sjeremylt         for (CeedInt d = 0; d < ncomp; d++)
7906cdd269SJed Brown           for (CeedInt i = 0; i < elemsize*blksize; i+=blksize)
808d94b059Sjeremylt             // Iteration bound set to discard padding elements
814ce2993fSjeremylt             for (CeedInt j = i; j < i+CeedIntMin(blksize, nelem-e); j++)
8206cdd269SJed Brown               vv[lmode == CEED_NOTRANSPOSE
838795c945Sjeremylt                        ? impl->indices[j+e*elemsize]+nnodes*d
8406cdd269SJed Brown                        : d+ncomp*impl->indices[j+e*elemsize]]
85e1b98f6eSjeremylt               += uu[elemsize*(d*blksize+ncomp*e) + j - voffset];
8621617c04Sjeremylt     }
8721617c04Sjeremylt   }
8821617c04Sjeremylt   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
8921617c04Sjeremylt   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
9021617c04Sjeremylt   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
9121617c04Sjeremylt     *request = NULL;
9221617c04Sjeremylt   return 0;
9321617c04Sjeremylt }
9421617c04Sjeremylt 
95*1d79ecccSjeremylt static int CeedElemRestrictionApply_Ref_11(CeedElemRestriction r,
964d2a38eeSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode tmode,
974d2a38eeSjeremylt     CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) {
989c36149bSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 1, start, stop, tmode, lmode,
999c36149bSjeremylt          u, v, request);
1004d2a38eeSjeremylt }
1014d2a38eeSjeremylt 
102*1d79ecccSjeremylt static int CeedElemRestrictionApply_Ref_18(CeedElemRestriction r,
1034d2a38eeSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode tmode,
1044d2a38eeSjeremylt     CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) {
1059c36149bSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 8, 1, start, stop, tmode, lmode,
1069c36149bSjeremylt          u, v, request);
1079c36149bSjeremylt 
1089c36149bSjeremylt }
1099c36149bSjeremylt 
110*1d79ecccSjeremylt static int CeedElemRestrictionApply_Ref_31(CeedElemRestriction r,
1119c36149bSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode tmode,
1129c36149bSjeremylt     CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) {
1139c36149bSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 1, 3, start, stop, tmode, lmode,
1149c36149bSjeremylt          u, v, request);
1159c36149bSjeremylt }
1169c36149bSjeremylt 
117*1d79ecccSjeremylt static int CeedElemRestrictionApply_Ref_38(CeedElemRestriction r,
1189c36149bSjeremylt     CeedInt start, CeedInt stop, CeedTransposeMode tmode,
1199c36149bSjeremylt     CeedTransposeMode lmode, CeedVector u, CeedVector v, CeedRequest *request) {
1209c36149bSjeremylt   return CeedElemRestrictionApply_Ref_Core(r, 8, 3, start, stop, tmode, lmode,
1219c36149bSjeremylt          u, v, request);
1224d2a38eeSjeremylt }
1234d2a38eeSjeremylt 
124be9261b7Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
125f90c8643Sjeremylt                                         CeedTransposeMode tmode,
126f90c8643Sjeremylt                                         CeedTransposeMode lmode, CeedVector u,
127be9261b7Sjeremylt                                         CeedVector v, CeedRequest *request) {
128be9261b7Sjeremylt   int ierr;
1299c36149bSjeremylt   CeedInt numblk, ncomp, blksize;
1304d2a38eeSjeremylt   ierr = CeedElemRestrictionGetNumBlocks(r, &numblk); CeedChk(ierr);
1319c36149bSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
1324d2a38eeSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr);
1334d2a38eeSjeremylt 
1349c36149bSjeremylt   CeedInt idx = -1;
1359c36149bSjeremylt   if (blksize < 10)
1369c36149bSjeremylt     idx = 10*ncomp + blksize;
1379c36149bSjeremylt   switch (idx) {
1389c36149bSjeremylt   case 11:
139*1d79ecccSjeremylt     return CeedElemRestrictionApply_Ref_11(r, 0, numblk, tmode, lmode,
140*1d79ecccSjeremylt                                            u, v, request);
1419c36149bSjeremylt     break;
1429c36149bSjeremylt   case 18:
143*1d79ecccSjeremylt     return CeedElemRestrictionApply_Ref_18(r, 0, numblk, tmode, lmode,
144*1d79ecccSjeremylt                                            u, v, request);
1459c36149bSjeremylt     break;
1469c36149bSjeremylt   case 31:
147*1d79ecccSjeremylt     return CeedElemRestrictionApply_Ref_31(r, 0, numblk, tmode, lmode,
148*1d79ecccSjeremylt                                            u, v, request);
1499c36149bSjeremylt     break;
1509c36149bSjeremylt   case 38:
151*1d79ecccSjeremylt     return CeedElemRestrictionApply_Ref_38(r, 0, numblk, tmode, lmode,
152*1d79ecccSjeremylt                                            u, v, request);
1539c36149bSjeremylt     break;
1549c36149bSjeremylt   default:
1554d2a38eeSjeremylt     // LCOV_EXCL_START
1569c36149bSjeremylt     return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, 0, numblk,
1579c36149bSjeremylt            tmode, lmode, u, v, request);
1584d2a38eeSjeremylt     // LCOV_EXCL_STOP
159be9261b7Sjeremylt   }
1609c36149bSjeremylt }
161be9261b7Sjeremylt 
162be9261b7Sjeremylt static int CeedElemRestrictionApplyBlock_Ref(CeedElemRestriction r,
163be9261b7Sjeremylt     CeedInt block, CeedTransposeMode tmode, CeedTransposeMode lmode,
164be9261b7Sjeremylt     CeedVector u, CeedVector v, CeedRequest *request) {
1654d2a38eeSjeremylt   int ierr;
1669c36149bSjeremylt   CeedInt ncomp, blksize;
1679c36149bSjeremylt   ierr = CeedElemRestrictionGetNumComponents(r, &ncomp); CeedChk(ierr);
1684d2a38eeSjeremylt   ierr = CeedElemRestrictionGetBlockSize(r, &blksize); CeedChk(ierr);
1694d2a38eeSjeremylt 
1709c36149bSjeremylt   CeedInt idx = -1;
1719c36149bSjeremylt   if (blksize < 10)
1729c36149bSjeremylt     idx = 10*ncomp + blksize;
1739c36149bSjeremylt   switch (idx) {
1749c36149bSjeremylt   case 11:
175*1d79ecccSjeremylt     return CeedElemRestrictionApply_Ref_11(r, block, block+1, tmode, lmode,
1764d2a38eeSjeremylt                                            u, v, request);
1779c36149bSjeremylt     break;
1789c36149bSjeremylt   case 18:
179*1d79ecccSjeremylt     return CeedElemRestrictionApply_Ref_18(r, block, block+1, tmode, lmode,
1804d2a38eeSjeremylt                                            u, v, request);
1819c36149bSjeremylt     break;
1829c36149bSjeremylt   case 31:
183*1d79ecccSjeremylt     return CeedElemRestrictionApply_Ref_31(r, block, block+1, tmode, lmode,
1849c36149bSjeremylt                                            u, v, request);
1859c36149bSjeremylt     break;
1869c36149bSjeremylt   case 38:
187*1d79ecccSjeremylt     return CeedElemRestrictionApply_Ref_38(r, block, block+1, tmode, lmode,
1889c36149bSjeremylt                                            u, v, request);
1899c36149bSjeremylt     break;
1909c36149bSjeremylt   default:
1914d2a38eeSjeremylt     // LCOV_EXCL_START
1929c36149bSjeremylt     return CeedElemRestrictionApply_Ref_Core(r, blksize, ncomp, block, block+1,
1939c36149bSjeremylt            tmode, lmode, u, v, request);
1944d2a38eeSjeremylt     // LCOV_EXCL_STOP
195be9261b7Sjeremylt   }
1969c36149bSjeremylt }
197be9261b7Sjeremylt 
19821617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
19921617c04Sjeremylt   int ierr;
200fe2413ffSjeremylt   CeedElemRestriction_Ref *impl;
201fe2413ffSjeremylt   ierr = CeedElemRestrictionGetData(r, (void *)&impl); CeedChk(ierr);
20221617c04Sjeremylt 
20321617c04Sjeremylt   ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr);
204fe2413ffSjeremylt   ierr = CeedFree(&impl); CeedChk(ierr);
20521617c04Sjeremylt   return 0;
20621617c04Sjeremylt }
20721617c04Sjeremylt 
208667bc5fcSjeremylt int CeedElemRestrictionCreate_Ref(CeedMemType mtype, CeedCopyMode cmode,
209667bc5fcSjeremylt                                   const CeedInt *indices, CeedElemRestriction r) {
21021617c04Sjeremylt   int ierr;
21121617c04Sjeremylt   CeedElemRestriction_Ref *impl;
2124ce2993fSjeremylt   CeedInt elemsize, nelem;
2134ce2993fSjeremylt   ierr = CeedElemRestrictionGetNumElements(r, &nelem); CeedChk(ierr);
2144ce2993fSjeremylt   ierr = CeedElemRestrictionGetElementSize(r, &elemsize); CeedChk(ierr);
2154ce2993fSjeremylt   Ceed ceed;
2164ce2993fSjeremylt   ierr = CeedElemRestrictionGetCeed(r, &ceed); CeedChk(ierr);
21721617c04Sjeremylt 
21821617c04Sjeremylt   if (mtype != CEED_MEM_HOST)
219c042f62fSJeremy L Thompson     // LCOV_EXCL_START
2204ce2993fSjeremylt     return CeedError(ceed, 1, "Only MemType = HOST supported");
221c042f62fSJeremy L Thompson   // LCOV_EXCL_STOP
22221617c04Sjeremylt   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
22321617c04Sjeremylt   switch (cmode) {
22421617c04Sjeremylt   case CEED_COPY_VALUES:
2254ce2993fSjeremylt     ierr = CeedMalloc(nelem*elemsize, &impl->indices_allocated);
22621617c04Sjeremylt     CeedChk(ierr);
22721617c04Sjeremylt     memcpy(impl->indices_allocated, indices,
2284ce2993fSjeremylt            nelem * elemsize * sizeof(indices[0]));
22921617c04Sjeremylt     impl->indices = impl->indices_allocated;
23021617c04Sjeremylt     break;
23121617c04Sjeremylt   case CEED_OWN_POINTER:
23221617c04Sjeremylt     impl->indices_allocated = (CeedInt *)indices;
23321617c04Sjeremylt     impl->indices = impl->indices_allocated;
23421617c04Sjeremylt     break;
23521617c04Sjeremylt   case CEED_USE_POINTER:
23621617c04Sjeremylt     impl->indices = indices;
23721617c04Sjeremylt   }
238fe2413ffSjeremylt 
239fe2413ffSjeremylt   ierr = CeedElemRestrictionSetData(r, (void *)&impl); CeedChk(ierr);
240fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Apply",
241fe2413ffSjeremylt                                 CeedElemRestrictionApply_Ref); CeedChk(ierr);
242be9261b7Sjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "ApplyBlock",
243be9261b7Sjeremylt                                 CeedElemRestrictionApplyBlock_Ref);
244be9261b7Sjeremylt   CeedChk(ierr);
245fe2413ffSjeremylt   ierr = CeedSetBackendFunction(ceed, "ElemRestriction", r, "Destroy",
246fe2413ffSjeremylt                                 CeedElemRestrictionDestroy_Ref); CeedChk(ierr);
24721617c04Sjeremylt   return 0;
24821617c04Sjeremylt }
249