121617c04Sjeremylt // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 221617c04Sjeremylt // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 321617c04Sjeremylt // All Rights reserved. See files LICENSE and NOTICE for details. 421617c04Sjeremylt // 521617c04Sjeremylt // This file is part of CEED, a collection of benchmarks, miniapps, software 621617c04Sjeremylt // libraries and APIs for efficient high-order finite element and spectral 721617c04Sjeremylt // element discretizations for exascale applications. For more information and 821617c04Sjeremylt // source code availability see http://github.com/ceed. 921617c04Sjeremylt // 1021617c04Sjeremylt // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1121617c04Sjeremylt // a collaborative effort of two U.S. Department of Energy organizations (Office 1221617c04Sjeremylt // of Science and the National Nuclear Security Administration) responsible for 1321617c04Sjeremylt // the planning and preparation of a capable exascale ecosystem, including 1421617c04Sjeremylt // software, applications, hardware, advanced system engineering and early 1521617c04Sjeremylt // testbed platforms, in support of the nation's exascale computing imperative. 1621617c04Sjeremylt 1721617c04Sjeremylt #include <ceed-impl.h> 1821617c04Sjeremylt #include <string.h> 1921617c04Sjeremylt #include "ceed-ref.h" 2021617c04Sjeremylt 2121617c04Sjeremylt // Contracts on the middle index 2221617c04Sjeremylt // NOTRANSPOSE: V_ajc = T_jb U_abc 2321617c04Sjeremylt // TRANSPOSE: V_ajc = T_bj U_abc 2421617c04Sjeremylt // If Add != 0, "=" is replaced by "+=" 2521617c04Sjeremylt static int CeedTensorContract_Ref(Ceed ceed, 2621617c04Sjeremylt CeedInt A, CeedInt B, CeedInt C, CeedInt J, 2712170c1eSJed Brown const CeedScalar *restrict t, CeedTransposeMode tmode, 2821617c04Sjeremylt const CeedInt Add, 2912170c1eSJed Brown const CeedScalar *restrict u, CeedScalar *restrict v) { 3021617c04Sjeremylt CeedInt tstride0 = B, tstride1 = 1; 3121617c04Sjeremylt if (tmode == CEED_TRANSPOSE) { 3221617c04Sjeremylt tstride0 = 1; tstride1 = J; 3321617c04Sjeremylt } 3421617c04Sjeremylt 35a2b73c81Sjeremylt if (!Add) 36a2b73c81Sjeremylt for (CeedInt q=0; q<A*J*C; q++) 371c19f432SSteven Roberts v[q] = (CeedScalar) 0.0; 381c19f432SSteven Roberts 39a2b73c81Sjeremylt for (CeedInt a=0; a<A; a++) 40a2b73c81Sjeremylt for (CeedInt b=0; b<B; b++) 411c19f432SSteven Roberts for (CeedInt j=0; j<J; j++) { 421c19f432SSteven Roberts CeedScalar tq = t[j*tstride0 + b*tstride1]; 43a2b73c81Sjeremylt for (CeedInt c=0; c<C; c++) 44282be9a0SSteven Roberts v[(a*J+j)*C+c] += tq * u[(a*B+b)*C+c]; 4521617c04Sjeremylt } 4621617c04Sjeremylt return 0; 4721617c04Sjeremylt } 4821617c04Sjeremylt 49d3181881Sjeremylt static int CeedBasisApply_Ref(CeedBasis basis, CeedInt nelem, 50d3181881Sjeremylt CeedTransposeMode tmode, CeedEvalMode emode, 5121617c04Sjeremylt const CeedScalar *u, CeedScalar *v) { 5221617c04Sjeremylt int ierr; 5321617c04Sjeremylt const CeedInt dim = basis->dim; 540f5de9e9Sjeremylt const CeedInt ncomp = basis->ncomp; 55*a8de75f0Sjeremylt CeedInt nqpt; 56*a8de75f0Sjeremylt ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr); 57*a8de75f0Sjeremylt CeedInt ndof; 58*a8de75f0Sjeremylt ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr); 5921617c04Sjeremylt const CeedInt add = (tmode == CEED_TRANSPOSE); 6021617c04Sjeremylt 61c44a85f4Sjeremylt if (nelem != 1) 62c44a85f4Sjeremylt return CeedError(basis->ceed, 1, 63c44a85f4Sjeremylt "This backend does not support BasisApply for multiple elements"); 64c44a85f4Sjeremylt 658d94b059Sjeremylt // Clear v if operating in transpose 6621617c04Sjeremylt if (tmode == CEED_TRANSPOSE) { 67*a8de75f0Sjeremylt const CeedInt vsize = ncomp*ndof; 6821617c04Sjeremylt for (CeedInt i = 0; i < vsize; i++) 6921617c04Sjeremylt v[i] = (CeedScalar) 0; 7021617c04Sjeremylt } 71*a8de75f0Sjeremylt // Tensor basis 72*a8de75f0Sjeremylt if (basis->tensorbasis) { 73a2b73c81Sjeremylt switch (emode) { 748d94b059Sjeremylt // Interpolate to/from quadrature points 75a2b73c81Sjeremylt case CEED_EVAL_INTERP: { 7621617c04Sjeremylt CeedInt P = basis->P1d, Q = basis->Q1d; 7721617c04Sjeremylt if (tmode == CEED_TRANSPOSE) { 7821617c04Sjeremylt P = basis->Q1d; Q = basis->P1d; 7921617c04Sjeremylt } 80b5cf12eeSjeremylt CeedInt pre = ncomp*CeedIntPow(P, dim-1), post = 1; 81b5cf12eeSjeremylt CeedScalar tmp[2][ncomp*Q*CeedIntPow(P>Q?P:Q, dim-1)]; 8221617c04Sjeremylt for (CeedInt d=0; d<dim; d++) { 8321617c04Sjeremylt ierr = CeedTensorContract_Ref(basis->ceed, pre, P, post, Q, basis->interp1d, 8421617c04Sjeremylt tmode, add&&(d==dim-1), 8521617c04Sjeremylt d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 8621617c04Sjeremylt CeedChk(ierr); 8721617c04Sjeremylt pre /= P; 8821617c04Sjeremylt post *= Q; 8921617c04Sjeremylt } 90a2b73c81Sjeremylt } break; 918d94b059Sjeremylt // Evaluate the gradient to/from quadrature points 92a2b73c81Sjeremylt case CEED_EVAL_GRAD: { 9321617c04Sjeremylt CeedInt P = basis->P1d, Q = basis->Q1d; 9421617c04Sjeremylt // In CEED_NOTRANSPOSE mode: 95ecf6354eSJed Brown // u has shape [dim, ncomp, P^dim, nelem], row-major layout 96ecf6354eSJed Brown // v has shape [dim, ncomp, Q^dim, nelem], row-major layout 9721617c04Sjeremylt // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 9821617c04Sjeremylt if (tmode == CEED_TRANSPOSE) { 9921617c04Sjeremylt P = basis->Q1d, Q = basis->P1d; 10021617c04Sjeremylt } 101b5cf12eeSjeremylt CeedScalar tmp[2][ncomp*Q*CeedIntPow(P>Q?P:Q, dim-1)]; 10221617c04Sjeremylt for (CeedInt p = 0; p < dim; p++) { 103b5cf12eeSjeremylt CeedInt pre = ncomp*CeedIntPow(P, dim-1), post = 1; 10421617c04Sjeremylt for (CeedInt d=0; d<dim; d++) { 10521617c04Sjeremylt ierr = CeedTensorContract_Ref(basis->ceed, pre, P, post, Q, 10621617c04Sjeremylt (p==d)?basis->grad1d:basis->interp1d, 10721617c04Sjeremylt tmode, add&&(d==dim-1), 1084b8bea3bSJed Brown (d == 0 109a2b73c81Sjeremylt ? (tmode==CEED_NOTRANSPOSE?u:u+p*ncomp*nqpt) 1104b8bea3bSJed Brown : tmp[d%2]), 1114b8bea3bSJed Brown (d == dim-1 112a2b73c81Sjeremylt ? (tmode==CEED_TRANSPOSE?v:v+p*ncomp*nqpt) 1134b8bea3bSJed Brown : tmp[(d+1)%2])); 11421617c04Sjeremylt CeedChk(ierr); 11521617c04Sjeremylt pre /= P; 11621617c04Sjeremylt post *= Q; 11721617c04Sjeremylt } 11821617c04Sjeremylt } 119a2b73c81Sjeremylt } break; 1208d94b059Sjeremylt // Retrieve interpolation weights 121a2b73c81Sjeremylt case CEED_EVAL_WEIGHT: { 12221617c04Sjeremylt if (tmode == CEED_TRANSPOSE) 12321617c04Sjeremylt return CeedError(basis->ceed, 1, 12421617c04Sjeremylt "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 12521617c04Sjeremylt CeedInt Q = basis->Q1d; 12621617c04Sjeremylt for (CeedInt d=0; d<dim; d++) { 127b5cf12eeSjeremylt CeedInt pre = CeedIntPow(Q, dim-d-1), post = CeedIntPow(Q, d); 128a2b73c81Sjeremylt for (CeedInt i=0; i<pre; i++) 129a2b73c81Sjeremylt for (CeedInt j=0; j<Q; j++) 130a2b73c81Sjeremylt for (CeedInt k=0; k<post; k++) 13121617c04Sjeremylt v[(i*Q + j)*post + k] = basis->qweight1d[j] 13221617c04Sjeremylt * (d == 0 ? 1 : v[(i*Q + j)*post + k]); 13321617c04Sjeremylt } 134a2b73c81Sjeremylt } break; 1358d94b059Sjeremylt // Evaluate the divergence to/from the quadrature points 136a2b73c81Sjeremylt case CEED_EVAL_DIV: 137a2b73c81Sjeremylt return CeedError(basis->ceed, 1, "CEED_EVAL_DIV not supported"); 1388d94b059Sjeremylt // Evaluate the curl to/from the quadrature points 139a2b73c81Sjeremylt case CEED_EVAL_CURL: 140a2b73c81Sjeremylt return CeedError(basis->ceed, 1, "CEED_EVAL_CURL not supported"); 1418d94b059Sjeremylt // Take no action, BasisApply should not have been called 142a2b73c81Sjeremylt case CEED_EVAL_NONE: 1434b8bea3bSJed Brown return CeedError(basis->ceed, 1, 1444b8bea3bSJed Brown "CEED_EVAL_NONE does not make sense in this context"); 14521617c04Sjeremylt } 146*a8de75f0Sjeremylt } else { 147*a8de75f0Sjeremylt // Non-tensor basis 148*a8de75f0Sjeremylt switch (emode) { 149*a8de75f0Sjeremylt case CEED_EVAL_INTERP: { 150*a8de75f0Sjeremylt CeedInt P = basis->P, Q = basis->Q; 151*a8de75f0Sjeremylt if (tmode == CEED_TRANSPOSE) { 152*a8de75f0Sjeremylt P = basis->Q; Q = basis->P; 153*a8de75f0Sjeremylt } 154*a8de75f0Sjeremylt ierr = CeedTensorContract_Ref(basis->ceed, ncomp, P, 1, Q, basis->interp1d, 155*a8de75f0Sjeremylt tmode, add, u, v); 156*a8de75f0Sjeremylt CeedChk(ierr); 157*a8de75f0Sjeremylt } 158*a8de75f0Sjeremylt break; 159*a8de75f0Sjeremylt case CEED_EVAL_GRAD: { 160*a8de75f0Sjeremylt CeedInt P = basis->P, Q = dim*basis->Q; 161*a8de75f0Sjeremylt if (tmode == CEED_TRANSPOSE) { 162*a8de75f0Sjeremylt P = dim*basis->Q; Q = basis->P; 163*a8de75f0Sjeremylt } 164*a8de75f0Sjeremylt ierr = CeedTensorContract_Ref(basis->ceed, ncomp, P, 1, Q, basis->grad1d, 165*a8de75f0Sjeremylt tmode, add, u, v); 166*a8de75f0Sjeremylt CeedChk(ierr); 167*a8de75f0Sjeremylt } 168*a8de75f0Sjeremylt break; 169*a8de75f0Sjeremylt case CEED_EVAL_WEIGHT: { 170*a8de75f0Sjeremylt if (tmode == CEED_TRANSPOSE) 171*a8de75f0Sjeremylt return CeedError(basis->ceed, 1, 172*a8de75f0Sjeremylt "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 173*a8de75f0Sjeremylt for (CeedInt i=0; i<nqpt; i++) 174*a8de75f0Sjeremylt v[i] = basis->qweight1d[i]; 175*a8de75f0Sjeremylt } break; 176*a8de75f0Sjeremylt case CEED_EVAL_DIV: 177*a8de75f0Sjeremylt return CeedError(basis->ceed, 1, "CEED_EVAL_DIV not supported"); 178*a8de75f0Sjeremylt case CEED_EVAL_CURL: 179*a8de75f0Sjeremylt return CeedError(basis->ceed, 1, "CEED_EVAL_CURL not supported"); 180*a8de75f0Sjeremylt case CEED_EVAL_NONE: 181*a8de75f0Sjeremylt return CeedError(basis->ceed, 1, 182*a8de75f0Sjeremylt "CEED_EVAL_NONE does not make sense in this context"); 183*a8de75f0Sjeremylt } 184*a8de75f0Sjeremylt } 18521617c04Sjeremylt return 0; 18621617c04Sjeremylt } 18721617c04Sjeremylt 18821617c04Sjeremylt static int CeedBasisDestroy_Ref(CeedBasis basis) { 18921617c04Sjeremylt return 0; 19021617c04Sjeremylt } 19121617c04Sjeremylt 19221617c04Sjeremylt int CeedBasisCreateTensorH1_Ref(Ceed ceed, CeedInt dim, CeedInt P1d, 19321617c04Sjeremylt CeedInt Q1d, const CeedScalar *interp1d, 19421617c04Sjeremylt const CeedScalar *grad1d, 19521617c04Sjeremylt const CeedScalar *qref1d, 19621617c04Sjeremylt const CeedScalar *qweight1d, 19721617c04Sjeremylt CeedBasis basis) { 19821617c04Sjeremylt basis->Apply = CeedBasisApply_Ref; 19921617c04Sjeremylt basis->Destroy = CeedBasisDestroy_Ref; 20021617c04Sjeremylt return 0; 20121617c04Sjeremylt } 202*a8de75f0Sjeremylt 203*a8de75f0Sjeremylt int CeedBasisCreateH1_Ref(Ceed ceed, CeedElemTopology topo, CeedInt dim, 204*a8de75f0Sjeremylt CeedInt ndof, CeedInt nqpts, 205*a8de75f0Sjeremylt const CeedScalar *interp, 206*a8de75f0Sjeremylt const CeedScalar *grad, 207*a8de75f0Sjeremylt const CeedScalar *qref, 208*a8de75f0Sjeremylt const CeedScalar *qweight, 209*a8de75f0Sjeremylt CeedBasis basis) { 210*a8de75f0Sjeremylt basis->Apply = CeedBasisApply_Ref; 211*a8de75f0Sjeremylt basis->Destroy = CeedBasisDestroy_Ref; 212*a8de75f0Sjeremylt return 0; 213*a8de75f0Sjeremylt } 214