xref: /libCEED/backends/ref/ceed-ref-restriction.c (revision 21617c043cf2be06120e43f272b68c5ebd5f09fa)
1*21617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2*21617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3*21617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details.
4*21617c04Sjeremylt //
5*21617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software
6*21617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral
7*21617c04Sjeremylt // element discretizations for exascale applications. For more information and
8*21617c04Sjeremylt // source code availability see http://github.com/ceed.
9*21617c04Sjeremylt //
10*21617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11*21617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office
12*21617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for
13*21617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including
14*21617c04Sjeremylt // software, applications, hardware, advanced system engineering and early
15*21617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative.
16*21617c04Sjeremylt 
17*21617c04Sjeremylt #include <ceed-impl.h>
18*21617c04Sjeremylt #include <string.h>
19*21617c04Sjeremylt #include "ceed-ref.h"
20*21617c04Sjeremylt 
21*21617c04Sjeremylt static int CeedElemRestrictionApply_Ref(CeedElemRestriction r,
22*21617c04Sjeremylt                                         CeedTransposeMode tmode, CeedInt ncomp,
23*21617c04Sjeremylt                                         CeedTransposeMode lmode, CeedVector u,
24*21617c04Sjeremylt                                         CeedVector v, CeedRequest *request) {
25*21617c04Sjeremylt   CeedElemRestriction_Ref *impl = r->data;
26*21617c04Sjeremylt   int ierr;
27*21617c04Sjeremylt   const CeedScalar *uu;
28*21617c04Sjeremylt   CeedScalar *vv;
29*21617c04Sjeremylt   CeedInt esize = r->nelem*r->elemsize;
30*21617c04Sjeremylt 
31*21617c04Sjeremylt   ierr = CeedVectorGetArrayRead(u, CEED_MEM_HOST, &uu); CeedChk(ierr);
32*21617c04Sjeremylt   ierr = CeedVectorGetArray(v, CEED_MEM_HOST, &vv); CeedChk(ierr);
33*21617c04Sjeremylt   if (tmode == CEED_NOTRANSPOSE) {
34*21617c04Sjeremylt     // Perform: v = r * u
35*21617c04Sjeremylt     if (ncomp == 1) {
36*21617c04Sjeremylt       for (CeedInt i=0; i<esize; i++) vv[i] = uu[impl->indices[i]];
37*21617c04Sjeremylt     } else {
38*21617c04Sjeremylt       // vv is (elemsize x ncomp x nelem), column-major
39*21617c04Sjeremylt       if (lmode == CEED_NOTRANSPOSE) { // u is (ndof x ncomp), column-major
40*21617c04Sjeremylt         for (CeedInt e = 0; e < r->nelem; e++)
41*21617c04Sjeremylt           for (CeedInt d = 0; d < ncomp; d++)
42*21617c04Sjeremylt             for (CeedInt i=0; i<r->elemsize; i++) {
43*21617c04Sjeremylt               vv[i+r->elemsize*(d+ncomp*e)] =
44*21617c04Sjeremylt                 uu[impl->indices[i+r->elemsize*e]+r->ndof*d];
45*21617c04Sjeremylt             }
46*21617c04Sjeremylt       } else { // u is (ncomp x ndof), column-major
47*21617c04Sjeremylt         for (CeedInt e = 0; e < r->nelem; e++)
48*21617c04Sjeremylt           for (CeedInt d = 0; d < ncomp; d++)
49*21617c04Sjeremylt             for (CeedInt i=0; i<r->elemsize; i++) {
50*21617c04Sjeremylt               vv[i+r->elemsize*(d+ncomp*e)] =
51*21617c04Sjeremylt                 uu[d+ncomp*impl->indices[i+r->elemsize*e]];
52*21617c04Sjeremylt             }
53*21617c04Sjeremylt       }
54*21617c04Sjeremylt     }
55*21617c04Sjeremylt   } else {
56*21617c04Sjeremylt     // Note: in transpose mode, we perform: v += r^t * u
57*21617c04Sjeremylt     if (ncomp == 1) {
58*21617c04Sjeremylt       for (CeedInt i=0; i<esize; i++) vv[impl->indices[i]] += uu[i];
59*21617c04Sjeremylt     } else {
60*21617c04Sjeremylt       // u is (elemsize x ncomp x nelem)
61*21617c04Sjeremylt       if (lmode == CEED_NOTRANSPOSE) { // vv is (ndof x ncomp), column-major
62*21617c04Sjeremylt         for (CeedInt e = 0; e < r->nelem; e++)
63*21617c04Sjeremylt           for (CeedInt d = 0; d < ncomp; d++)
64*21617c04Sjeremylt             for (CeedInt i=0; i<r->elemsize; i++) {
65*21617c04Sjeremylt               vv[impl->indices[i+r->elemsize*e]+r->ndof*d] +=
66*21617c04Sjeremylt                 uu[i+r->elemsize*(d+e*ncomp)];
67*21617c04Sjeremylt             }
68*21617c04Sjeremylt       } else { // vv is (ncomp x ndof), column-major
69*21617c04Sjeremylt         for (CeedInt e = 0; e < r->nelem; e++)
70*21617c04Sjeremylt           for (CeedInt d = 0; d < ncomp; d++)
71*21617c04Sjeremylt             for (CeedInt i=0; i<r->elemsize; i++) {
72*21617c04Sjeremylt               vv[d+ncomp*impl->indices[i+r->elemsize*e]] +=
73*21617c04Sjeremylt                 uu[i+r->elemsize*(d+e*ncomp)];
74*21617c04Sjeremylt             }
75*21617c04Sjeremylt       }
76*21617c04Sjeremylt     }
77*21617c04Sjeremylt   }
78*21617c04Sjeremylt   ierr = CeedVectorRestoreArrayRead(u, &uu); CeedChk(ierr);
79*21617c04Sjeremylt   ierr = CeedVectorRestoreArray(v, &vv); CeedChk(ierr);
80*21617c04Sjeremylt   if (request != CEED_REQUEST_IMMEDIATE && request != CEED_REQUEST_ORDERED)
81*21617c04Sjeremylt     *request = NULL;
82*21617c04Sjeremylt   return 0;
83*21617c04Sjeremylt }
84*21617c04Sjeremylt 
85*21617c04Sjeremylt static int CeedElemRestrictionDestroy_Ref(CeedElemRestriction r) {
86*21617c04Sjeremylt   CeedElemRestriction_Ref *impl = r->data;
87*21617c04Sjeremylt   int ierr;
88*21617c04Sjeremylt 
89*21617c04Sjeremylt   ierr = CeedFree(&impl->indices_allocated); CeedChk(ierr);
90*21617c04Sjeremylt   ierr = CeedFree(&r->data); CeedChk(ierr);
91*21617c04Sjeremylt   return 0;
92*21617c04Sjeremylt }
93*21617c04Sjeremylt 
94*21617c04Sjeremylt int CeedElemRestrictionCreate_Ref(CeedElemRestriction r,
95*21617c04Sjeremylt                                   CeedMemType mtype,
96*21617c04Sjeremylt                                   CeedCopyMode cmode, const CeedInt *indices) {
97*21617c04Sjeremylt   int ierr;
98*21617c04Sjeremylt   CeedElemRestriction_Ref *impl;
99*21617c04Sjeremylt 
100*21617c04Sjeremylt   if (mtype != CEED_MEM_HOST)
101*21617c04Sjeremylt     return CeedError(r->ceed, 1, "Only MemType = HOST supported");
102*21617c04Sjeremylt   ierr = CeedCalloc(1,&impl); CeedChk(ierr);
103*21617c04Sjeremylt   switch (cmode) {
104*21617c04Sjeremylt   case CEED_COPY_VALUES:
105*21617c04Sjeremylt     ierr = CeedMalloc(r->nelem*r->elemsize, &impl->indices_allocated);
106*21617c04Sjeremylt     CeedChk(ierr);
107*21617c04Sjeremylt     memcpy(impl->indices_allocated, indices,
108*21617c04Sjeremylt            r->nelem * r->elemsize * sizeof(indices[0]));
109*21617c04Sjeremylt     impl->indices = impl->indices_allocated;
110*21617c04Sjeremylt     break;
111*21617c04Sjeremylt   case CEED_OWN_POINTER:
112*21617c04Sjeremylt     impl->indices_allocated = (CeedInt *)indices;
113*21617c04Sjeremylt     impl->indices = impl->indices_allocated;
114*21617c04Sjeremylt     break;
115*21617c04Sjeremylt   case CEED_USE_POINTER:
116*21617c04Sjeremylt     impl->indices = indices;
117*21617c04Sjeremylt   }
118*21617c04Sjeremylt   r->data = impl;
119*21617c04Sjeremylt   r->Apply = CeedElemRestrictionApply_Ref;
120*21617c04Sjeremylt   r->Destroy = CeedElemRestrictionDestroy_Ref;
121*21617c04Sjeremylt   return 0;
122*21617c04Sjeremylt }
123