1*21617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2*21617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3*21617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details. 4*21617c04Sjeremylt // 5*21617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software 6*21617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral 7*21617c04Sjeremylt // element discretizations for exascale applications. For more information and 8*21617c04Sjeremylt // source code availability see http://github.com/ceed. 9*21617c04Sjeremylt // 10*21617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11*21617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office 12*21617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for 13*21617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including 14*21617c04Sjeremylt // software, applications, hardware, advanced system engineering and early 15*21617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative. 16*21617c04Sjeremylt 17*21617c04Sjeremylt #include <ceed-impl.h> 18*21617c04Sjeremylt #include <string.h> 19*21617c04Sjeremylt #include "ceed-ref.h" 20*21617c04Sjeremylt 21*21617c04Sjeremylt // Contracts on the middle index 22*21617c04Sjeremylt // NOTRANSPOSE: V_ajc = T_jb U_abc 23*21617c04Sjeremylt // TRANSPOSE: V_ajc = T_bj U_abc 24*21617c04Sjeremylt // If Add != 0, "=" is replaced by "+=" 25*21617c04Sjeremylt static int CeedTensorContract_Ref(Ceed ceed, 26*21617c04Sjeremylt CeedInt A, CeedInt B, CeedInt C, CeedInt J, 27*21617c04Sjeremylt const CeedScalar *t, CeedTransposeMode tmode, 28*21617c04Sjeremylt const CeedInt Add, 29*21617c04Sjeremylt const CeedScalar *u, CeedScalar *v) { 30*21617c04Sjeremylt CeedInt tstride0 = B, tstride1 = 1; 31*21617c04Sjeremylt if (tmode == CEED_TRANSPOSE) { 32*21617c04Sjeremylt tstride0 = 1; tstride1 = J; 33*21617c04Sjeremylt } 34*21617c04Sjeremylt 35*21617c04Sjeremylt for (CeedInt a=0; a<A; a++) { 36*21617c04Sjeremylt for (CeedInt j=0; j<J; j++) { 37*21617c04Sjeremylt if (!Add) { 38*21617c04Sjeremylt for (CeedInt c=0; c<C; c++) 39*21617c04Sjeremylt v[(a*J+j)*C+c] = 0; 40*21617c04Sjeremylt } 41*21617c04Sjeremylt for (CeedInt b=0; b<B; b++) { 42*21617c04Sjeremylt for (CeedInt c=0; c<C; c++) { 43*21617c04Sjeremylt v[(a*J+j)*C+c] += t[j*tstride0 + b*tstride1] * u[(a*B+b)*C+c]; 44*21617c04Sjeremylt } 45*21617c04Sjeremylt } 46*21617c04Sjeremylt } 47*21617c04Sjeremylt } 48*21617c04Sjeremylt return 0; 49*21617c04Sjeremylt } 50*21617c04Sjeremylt 51*21617c04Sjeremylt static int CeedBasisApply_Ref(CeedBasis basis, CeedTransposeMode tmode, 52*21617c04Sjeremylt CeedEvalMode emode, 53*21617c04Sjeremylt const CeedScalar *u, CeedScalar *v) { 54*21617c04Sjeremylt int ierr; 55*21617c04Sjeremylt const CeedInt dim = basis->dim; 56*21617c04Sjeremylt const CeedInt ndof = basis->ndof; 57*21617c04Sjeremylt const CeedInt nqpt = ndof*CeedPowInt(basis->Q1d, dim); 58*21617c04Sjeremylt const CeedInt add = (tmode == CEED_TRANSPOSE); 59*21617c04Sjeremylt 60*21617c04Sjeremylt if (tmode == CEED_TRANSPOSE) { 61*21617c04Sjeremylt const CeedInt vsize = ndof*CeedPowInt(basis->P1d, dim); 62*21617c04Sjeremylt for (CeedInt i = 0; i < vsize; i++) 63*21617c04Sjeremylt v[i] = (CeedScalar) 0; 64*21617c04Sjeremylt } 65*21617c04Sjeremylt if (emode & CEED_EVAL_INTERP) { 66*21617c04Sjeremylt CeedInt P = basis->P1d, Q = basis->Q1d; 67*21617c04Sjeremylt if (tmode == CEED_TRANSPOSE) { 68*21617c04Sjeremylt P = basis->Q1d; Q = basis->P1d; 69*21617c04Sjeremylt } 70*21617c04Sjeremylt CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 71*21617c04Sjeremylt CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 72*21617c04Sjeremylt for (CeedInt d=0; d<dim; d++) { 73*21617c04Sjeremylt ierr = CeedTensorContract_Ref(basis->ceed, pre, P, post, Q, basis->interp1d, 74*21617c04Sjeremylt tmode, add&&(d==dim-1), 75*21617c04Sjeremylt d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 76*21617c04Sjeremylt CeedChk(ierr); 77*21617c04Sjeremylt pre /= P; 78*21617c04Sjeremylt post *= Q; 79*21617c04Sjeremylt } 80*21617c04Sjeremylt if (tmode == CEED_NOTRANSPOSE) { 81*21617c04Sjeremylt v += nqpt; 82*21617c04Sjeremylt } else { 83*21617c04Sjeremylt u += nqpt; 84*21617c04Sjeremylt } 85*21617c04Sjeremylt } 86*21617c04Sjeremylt if (emode & CEED_EVAL_GRAD) { 87*21617c04Sjeremylt CeedInt P = basis->P1d, Q = basis->Q1d; 88*21617c04Sjeremylt // In CEED_NOTRANSPOSE mode: 89*21617c04Sjeremylt // u is (P^dim x nc), column-major layout (nc = ndof) 90*21617c04Sjeremylt // v is (Q^dim x nc x dim), column-major layout (nc = ndof) 91*21617c04Sjeremylt // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 92*21617c04Sjeremylt if (tmode == CEED_TRANSPOSE) { 93*21617c04Sjeremylt P = basis->Q1d, Q = basis->P1d; 94*21617c04Sjeremylt } 95*21617c04Sjeremylt CeedScalar tmp[2][ndof*Q*CeedPowInt(P>Q?P:Q, dim-1)]; 96*21617c04Sjeremylt for (CeedInt p = 0; p < dim; p++) { 97*21617c04Sjeremylt CeedInt pre = ndof*CeedPowInt(P, dim-1), post = 1; 98*21617c04Sjeremylt for (CeedInt d=0; d<dim; d++) { 99*21617c04Sjeremylt ierr = CeedTensorContract_Ref(basis->ceed, pre, P, post, Q, 100*21617c04Sjeremylt (p==d)?basis->grad1d:basis->interp1d, 101*21617c04Sjeremylt tmode, add&&(d==dim-1), 102*21617c04Sjeremylt d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 103*21617c04Sjeremylt CeedChk(ierr); 104*21617c04Sjeremylt pre /= P; 105*21617c04Sjeremylt post *= Q; 106*21617c04Sjeremylt } 107*21617c04Sjeremylt if (tmode == CEED_NOTRANSPOSE) { 108*21617c04Sjeremylt v += nqpt; 109*21617c04Sjeremylt } else { 110*21617c04Sjeremylt u += nqpt; 111*21617c04Sjeremylt } 112*21617c04Sjeremylt } 113*21617c04Sjeremylt } 114*21617c04Sjeremylt if (emode & CEED_EVAL_WEIGHT) { 115*21617c04Sjeremylt if (tmode == CEED_TRANSPOSE) 116*21617c04Sjeremylt return CeedError(basis->ceed, 1, 117*21617c04Sjeremylt "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 118*21617c04Sjeremylt CeedInt Q = basis->Q1d; 119*21617c04Sjeremylt for (CeedInt d=0; d<dim; d++) { 120*21617c04Sjeremylt CeedInt pre = CeedPowInt(Q, dim-d-1), post = CeedPowInt(Q, d); 121*21617c04Sjeremylt for (CeedInt i=0; i<pre; i++) { 122*21617c04Sjeremylt for (CeedInt j=0; j<Q; j++) { 123*21617c04Sjeremylt for (CeedInt k=0; k<post; k++) { 124*21617c04Sjeremylt v[(i*Q + j)*post + k] = basis->qweight1d[j] 125*21617c04Sjeremylt * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 126*21617c04Sjeremylt } 127*21617c04Sjeremylt } 128*21617c04Sjeremylt } 129*21617c04Sjeremylt } 130*21617c04Sjeremylt } 131*21617c04Sjeremylt return 0; 132*21617c04Sjeremylt } 133*21617c04Sjeremylt 134*21617c04Sjeremylt static int CeedBasisDestroy_Ref(CeedBasis basis) { 135*21617c04Sjeremylt return 0; 136*21617c04Sjeremylt } 137*21617c04Sjeremylt 138*21617c04Sjeremylt int CeedBasisCreateTensorH1_Ref(Ceed ceed, CeedInt dim, CeedInt P1d, 139*21617c04Sjeremylt CeedInt Q1d, const CeedScalar *interp1d, 140*21617c04Sjeremylt const CeedScalar *grad1d, 141*21617c04Sjeremylt const CeedScalar *qref1d, 142*21617c04Sjeremylt const CeedScalar *qweight1d, 143*21617c04Sjeremylt CeedBasis basis) { 144*21617c04Sjeremylt basis->Apply = CeedBasisApply_Ref; 145*21617c04Sjeremylt basis->Destroy = CeedBasisDestroy_Ref; 146*21617c04Sjeremylt return 0; 147*21617c04Sjeremylt } 148