1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3 // All Rights reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 #include <string.h> 18 #include "ceed-ref.h" 19 20 // Contracts on the middle index 21 // NOTRANSPOSE: V_ajc = T_jb U_abc 22 // TRANSPOSE: V_ajc = T_bj U_abc 23 // If Add != 0, "=" is replaced by "+=" 24 static int CeedTensorContract_Ref(Ceed ceed, CeedInt A, CeedInt B, 25 CeedInt C, CeedInt J, 26 const CeedScalar *restrict t, 27 CeedTransposeMode tmode, const CeedInt Add, 28 const CeedScalar *restrict u, 29 CeedScalar *restrict v) { 30 CeedInt tstride0 = B, tstride1 = 1; 31 if (tmode == CEED_TRANSPOSE) { 32 tstride0 = 1; tstride1 = J; 33 } 34 35 if (!Add) 36 for (CeedInt q=0; q<A*J*C; q++) 37 v[q] = (CeedScalar) 0.0; 38 39 for (CeedInt a=0; a<A; a++) 40 for (CeedInt b=0; b<B; b++) 41 for (CeedInt j=0; j<J; j++) { 42 CeedScalar tq = t[j*tstride0 + b*tstride1]; 43 for (CeedInt c=0; c<C; c++) 44 v[(a*J+j)*C+c] += tq * u[(a*B+b)*C+c]; 45 } 46 return 0; 47 } 48 49 static int CeedBasisApply_Ref(CeedBasis basis, CeedInt nelem, 50 CeedTransposeMode tmode, CeedEvalMode emode, 51 CeedVector U, CeedVector V) { 52 int ierr; 53 Ceed ceed; 54 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 55 CeedInt dim, ncomp, ndof, nqpt; 56 ierr = CeedBasisGetDimension(basis, &dim); CeedChk(ierr); 57 ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChk(ierr); 58 ierr = CeedBasisGetNumNodes(basis, &ndof); CeedChk(ierr); 59 ierr = CeedBasisGetNumQuadraturePoints(basis, &nqpt); CeedChk(ierr); 60 const CeedInt add = (tmode == CEED_TRANSPOSE); 61 const CeedScalar *u; 62 CeedScalar *v; 63 if (U) { 64 ierr = CeedVectorGetArrayRead(U, CEED_MEM_HOST, &u); CeedChk(ierr); 65 } else if (emode != CEED_EVAL_WEIGHT) { 66 return CeedError(ceed, 1, 67 "An input vector is required for this CeedEvalMode"); 68 } 69 ierr = CeedVectorGetArray(V, CEED_MEM_HOST, &v); CeedChk(ierr); 70 71 // Clear v if operating in transpose 72 if (tmode == CEED_TRANSPOSE) { 73 const CeedInt vsize = nelem*ncomp*ndof; 74 for (CeedInt i = 0; i < vsize; i++) 75 v[i] = (CeedScalar) 0.0; 76 } 77 bool tensorbasis; 78 ierr = CeedBasisGetTensorStatus(basis, &tensorbasis); CeedChk(ierr); 79 // Tensor basis 80 if (tensorbasis) { 81 CeedInt P1d, Q1d; 82 ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChk(ierr); 83 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q1d); CeedChk(ierr); 84 switch (emode) { 85 // Interpolate to/from quadrature points 86 case CEED_EVAL_INTERP: { 87 CeedInt P = P1d, Q = Q1d; 88 if (tmode == CEED_TRANSPOSE) { 89 P = Q1d; Q = P1d; 90 } 91 CeedInt pre = ncomp*CeedIntPow(P, dim-1), post = nelem; 92 CeedScalar tmp[2][nelem*ncomp*Q*CeedIntPow(P>Q?P:Q, dim-1)]; 93 CeedScalar *interp1d; 94 ierr = CeedBasisGetInterp(basis, &interp1d); CeedChk(ierr); 95 for (CeedInt d=0; d<dim; d++) { 96 ierr = CeedTensorContract_Ref(ceed, pre, P, post, Q, 97 interp1d, tmode, add&&(d==dim-1), 98 d==0?u:tmp[d%2], d==dim-1?v:tmp[(d+1)%2]); 99 CeedChk(ierr); 100 pre /= P; 101 post *= Q; 102 } 103 } break; 104 // Evaluate the gradient to/from quadrature points 105 case CEED_EVAL_GRAD: { 106 // In CEED_NOTRANSPOSE mode: 107 // u has shape [dim, ncomp, P^dim, nelem], row-major layout 108 // v has shape [dim, ncomp, Q^dim, nelem], row-major layout 109 // In CEED_TRANSPOSE mode, the sizes of u and v are switched. 110 CeedInt P = P1d, Q = Q1d; 111 if (tmode == CEED_TRANSPOSE) { 112 P = Q1d, Q = Q1d; 113 } 114 CeedBasis_Ref *impl; 115 ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr); 116 CeedScalar interp[nelem*ncomp*Q*CeedIntPow(P>Q?P:Q, dim-1)]; 117 CeedInt pre = ncomp*CeedIntPow(P, dim-1), post = nelem; 118 CeedScalar tmp[2][nelem*ncomp*Q*CeedIntPow(P>Q?P:Q, dim-1)]; 119 CeedScalar *interp1d; 120 ierr = CeedBasisGetInterp(basis, &interp1d); CeedChk(ierr); 121 // Interpolate to quadrature points (NoTranspose) 122 // or Grad to quadrature points (Transpose) 123 for (CeedInt d=0; d<dim; d++) { 124 ierr = CeedTensorContract_Ref(ceed, pre, P, post, Q, 125 (tmode == CEED_NOTRANSPOSE 126 ? interp1d 127 : impl->colograd1d), 128 tmode, add&&(d>0), 129 (tmode == CEED_NOTRANSPOSE 130 ? (d==0?u:tmp[d%2]) 131 : u + d*nqpt*ncomp*nelem), 132 (tmode == CEED_NOTRANSPOSE 133 ? (d==dim-1?interp:tmp[(d+1)%2]) 134 : interp)); 135 CeedChk(ierr); 136 pre /= P; 137 post *= Q; 138 } 139 // Grad to quadrature points (NoTranspose) 140 // or Interpolate to dofs (Transpose) 141 P = Q1d, Q = Q1d; 142 if (tmode == CEED_TRANSPOSE) { 143 P = Q1d, Q = P1d; 144 } 145 pre = ncomp*CeedIntPow(P, dim-1), post = nelem; 146 for (CeedInt d=0; d<dim; d++) { 147 ierr = CeedTensorContract_Ref(ceed, pre, P, post, Q, 148 (tmode == CEED_NOTRANSPOSE 149 ? impl->colograd1d 150 : interp1d), 151 tmode, add&&(d==dim-1), 152 (tmode == CEED_NOTRANSPOSE 153 ? interp 154 : (d==0?interp:tmp[d%2])), 155 (tmode == CEED_NOTRANSPOSE 156 ? v + d*nqpt*ncomp*nelem 157 : (d==dim-1?v:tmp[(d+1)%2]))); 158 CeedChk(ierr); 159 pre /= P; 160 post *= Q; 161 } 162 } break; 163 // Retrieve interpolation weights 164 case CEED_EVAL_WEIGHT: { 165 if (tmode == CEED_TRANSPOSE) 166 return CeedError(ceed, 1, 167 "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 168 CeedInt Q = Q1d; 169 CeedScalar *qweight1d; 170 ierr = CeedBasisGetQWeights(basis, &qweight1d); CeedChk(ierr); 171 for (CeedInt d=0; d<dim; d++) { 172 CeedInt pre = CeedIntPow(Q, dim-d-1), post = CeedIntPow(Q, d); 173 for (CeedInt i=0; i<pre; i++) 174 for (CeedInt j=0; j<Q; j++) 175 for (CeedInt k=0; k<post; k++) { 176 CeedScalar w = qweight1d[j] 177 * (d == 0 ? 1 : v[((i*Q + j)*post + k)*nelem]); 178 for (CeedInt e=0; e<nelem; e++) 179 v[((i*Q + j)*post + k)*nelem + e] = w; 180 } 181 } 182 } break; 183 // Evaluate the divergence to/from the quadrature points 184 case CEED_EVAL_DIV: 185 return CeedError(ceed, 1, "CEED_EVAL_DIV not supported"); 186 // Evaluate the curl to/from the quadrature points 187 case CEED_EVAL_CURL: 188 return CeedError(ceed, 1, "CEED_EVAL_CURL not supported"); 189 // Take no action, BasisApply should not have been called 190 case CEED_EVAL_NONE: 191 return CeedError(ceed, 1, 192 "CEED_EVAL_NONE does not make sense in this context"); 193 } 194 } else { 195 // Non-tensor basis 196 switch (emode) { 197 // Interpolate to/from quadrature points 198 case CEED_EVAL_INTERP: { 199 CeedInt P = ndof, Q = nqpt; 200 CeedScalar *interp; 201 ierr = CeedBasisGetInterp(basis, &interp); CeedChk(ierr); 202 if (tmode == CEED_TRANSPOSE) { 203 P = nqpt; Q = ndof; 204 } 205 ierr = CeedTensorContract_Ref(ceed, ncomp, P, nelem, Q, 206 interp, tmode, add, u, v); 207 CeedChk(ierr); 208 } 209 break; 210 // Evaluate the gradient to/from quadrature points 211 case CEED_EVAL_GRAD: { 212 CeedInt P = ndof, Q = dim*nqpt; 213 CeedScalar *grad; 214 ierr = CeedBasisGetGrad(basis, &grad); CeedChk(ierr); 215 if (tmode == CEED_TRANSPOSE) { 216 P = dim*nqpt; Q = ndof; 217 } 218 ierr = CeedTensorContract_Ref(ceed, ncomp, P, nelem, Q, 219 grad, tmode, add, u, v); 220 CeedChk(ierr); 221 } 222 break; 223 // Retrieve interpolation weights 224 case CEED_EVAL_WEIGHT: { 225 if (tmode == CEED_TRANSPOSE) 226 return CeedError(ceed, 1, 227 "CEED_EVAL_WEIGHT incompatible with CEED_TRANSPOSE"); 228 CeedScalar *qweight; 229 ierr = CeedBasisGetQWeights(basis, &qweight); CeedChk(ierr); 230 for (CeedInt i=0; i<nqpt; i++) 231 for (CeedInt e=0; e<nelem; e++) 232 v[i*nelem + e] = qweight[i]; 233 } break; 234 // Evaluate the divergence to/from the quadrature points 235 case CEED_EVAL_DIV: 236 return CeedError(ceed, 1, "CEED_EVAL_DIV not supported"); 237 // Evaluate the curl to/from the quadrature points 238 case CEED_EVAL_CURL: 239 return CeedError(ceed, 1, "CEED_EVAL_CURL not supported"); 240 // Take no action, BasisApply should not have been called 241 case CEED_EVAL_NONE: 242 return CeedError(ceed, 1, 243 "CEED_EVAL_NONE does not make sense in this context"); 244 } 245 } 246 if (U) { 247 ierr = CeedVectorRestoreArrayRead(U, &u); CeedChk(ierr); 248 } 249 ierr = CeedVectorRestoreArray(V, &v); CeedChk(ierr); 250 return 0; 251 } 252 253 static int CeedBasisDestroyNonTensor_Ref(CeedBasis basis) { 254 return 0; 255 } 256 257 static int CeedBasisDestroyTensor_Ref(CeedBasis basis) { 258 int ierr; 259 CeedBasis_Ref *impl; 260 ierr = CeedBasisGetData(basis, (void *)&impl); CeedChk(ierr); 261 262 ierr = CeedFree(&impl->colograd1d); CeedChk(ierr); 263 ierr = CeedFree(&impl); CeedChk(ierr); 264 265 return 0; 266 } 267 268 int CeedBasisCreateTensorH1_Ref(CeedInt dim, CeedInt P1d, 269 CeedInt Q1d, const CeedScalar *interp1d, 270 const CeedScalar *grad1d, 271 const CeedScalar *qref1d, 272 const CeedScalar *qweight1d, 273 CeedBasis basis) { 274 int ierr; 275 Ceed ceed; 276 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 277 CeedBasis_Ref *impl; 278 ierr = CeedCalloc(1, &impl); CeedChk(ierr); 279 ierr = CeedMalloc(Q1d*Q1d, &impl->colograd1d); CeedChk(ierr); 280 ierr = CeedBasisGetCollocatedGrad(basis, impl->colograd1d); CeedChk(ierr); 281 ierr = CeedBasisSetData(basis, (void *)&impl); CeedChk(ierr); 282 283 ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 284 CeedBasisApply_Ref); CeedChk(ierr); 285 ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 286 CeedBasisDestroyTensor_Ref); CeedChk(ierr); 287 return 0; 288 } 289 290 291 292 int CeedBasisCreateH1_Ref(CeedElemTopology topo, CeedInt dim, 293 CeedInt ndof, CeedInt nqpts, 294 const CeedScalar *interp, 295 const CeedScalar *grad, 296 const CeedScalar *qref, 297 const CeedScalar *qweight, 298 CeedBasis basis) { 299 int ierr; 300 Ceed ceed; 301 ierr = CeedBasisGetCeed(basis, &ceed); CeedChk(ierr); 302 303 ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 304 CeedBasisApply_Ref); CeedChk(ierr); 305 ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 306 CeedBasisDestroyNonTensor_Ref); CeedChk(ierr); 307 308 return 0; 309 } 310