1*9ff05d55SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 2*9ff05d55SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3*9ff05d55SJeremy L Thompson // 4*9ff05d55SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 5*9ff05d55SJeremy L Thompson // 6*9ff05d55SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 7*9ff05d55SJeremy L Thompson 8*9ff05d55SJeremy L Thompson /// @file 9*9ff05d55SJeremy L Thompson /// Internal header for CUDA shared memory non-tensor product basis templates 10*9ff05d55SJeremy L Thompson #include <ceed/types.h> 11*9ff05d55SJeremy L Thompson 12*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 13*9ff05d55SJeremy L Thompson // 1D tensor contraction 14*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 15*9ff05d55SJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D> 16*9ff05d55SJeremy L Thompson inline __device__ void Contract1d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 17*9ff05d55SJeremy L Thompson data.slice[data.t_id_x] = *U; 18*9ff05d55SJeremy L Thompson __syncthreads(); 19*9ff05d55SJeremy L Thompson *V = 0.0; 20*9ff05d55SJeremy L Thompson if (data.t_id_x < Q_1D) { 21*9ff05d55SJeremy L Thompson for (CeedInt i = 0; i < P_1D; i++) { 22*9ff05d55SJeremy L Thompson *V += B[i + data.t_id_x * P_1D] * data.slice[i]; // Contract x direction 23*9ff05d55SJeremy L Thompson } 24*9ff05d55SJeremy L Thompson } 25*9ff05d55SJeremy L Thompson __syncthreads(); 26*9ff05d55SJeremy L Thompson } 27*9ff05d55SJeremy L Thompson 28*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 29*9ff05d55SJeremy L Thompson // 1D transpose tensor contraction 30*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 31*9ff05d55SJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D> 32*9ff05d55SJeremy L Thompson inline __device__ void ContractTranspose1d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 33*9ff05d55SJeremy L Thompson data.slice[data.t_id_x] = *U; 34*9ff05d55SJeremy L Thompson __syncthreads(); 35*9ff05d55SJeremy L Thompson if (data.t_id_x < P_1D) { 36*9ff05d55SJeremy L Thompson for (CeedInt i = 0; i < Q_1D; i++) { 37*9ff05d55SJeremy L Thompson *V += B[data.t_id_x + i * P_1D] * data.slice[i]; // Contract x direction 38*9ff05d55SJeremy L Thompson } 39*9ff05d55SJeremy L Thompson } 40*9ff05d55SJeremy L Thompson __syncthreads(); 41*9ff05d55SJeremy L Thompson } 42*9ff05d55SJeremy L Thompson 43*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 44*9ff05d55SJeremy L Thompson // Interpolate to quadrature points 45*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 46*9ff05d55SJeremy L Thompson template <int NUM_COMP, int P, int Q> 47*9ff05d55SJeremy L Thompson inline __device__ void InterpNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 48*9ff05d55SJeremy L Thompson CeedScalar *__restrict__ r_V) { 49*9ff05d55SJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 50*9ff05d55SJeremy L Thompson Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]); 51*9ff05d55SJeremy L Thompson } 52*9ff05d55SJeremy L Thompson } 53*9ff05d55SJeremy L Thompson 54*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 55*9ff05d55SJeremy L Thompson // Interpolate transpose 56*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 57*9ff05d55SJeremy L Thompson template <int NUM_COMP, int P, int Q> 58*9ff05d55SJeremy L Thompson inline __device__ void InterpTransposeNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 59*9ff05d55SJeremy L Thompson CeedScalar *__restrict__ r_V) { 60*9ff05d55SJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 61*9ff05d55SJeremy L Thompson r_V[comp] = 0.0; 62*9ff05d55SJeremy L Thompson ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]); 63*9ff05d55SJeremy L Thompson } 64*9ff05d55SJeremy L Thompson } 65*9ff05d55SJeremy L Thompson 66*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 67*9ff05d55SJeremy L Thompson // Derivatives at quadrature points 68*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 69*9ff05d55SJeremy L Thompson template <int NUM_COMP, int DIM, int P, int Q> 70*9ff05d55SJeremy L Thompson inline __device__ void GradNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G, CeedScalar *__restrict__ r_V) { 71*9ff05d55SJeremy L Thompson for (CeedInt dim = 0; dim < DIM; dim++) { 72*9ff05d55SJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 73*9ff05d55SJeremy L Thompson Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], &c_G[dim * P * Q], &r_V[comp + dim * NUM_COMP]); 74*9ff05d55SJeremy L Thompson } 75*9ff05d55SJeremy L Thompson } 76*9ff05d55SJeremy L Thompson } 77*9ff05d55SJeremy L Thompson 78*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 79*9ff05d55SJeremy L Thompson // Derivatives transpose 80*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 81*9ff05d55SJeremy L Thompson template <int NUM_COMP, int DIM, int P, int Q> 82*9ff05d55SJeremy L Thompson inline __device__ void GradTransposeNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G, 83*9ff05d55SJeremy L Thompson CeedScalar *__restrict__ r_V) { 84*9ff05d55SJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_V[comp] = 0.0; 85*9ff05d55SJeremy L Thompson for (CeedInt dim = 0; dim < DIM; dim++) { 86*9ff05d55SJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 87*9ff05d55SJeremy L Thompson ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp + dim * NUM_COMP], &c_G[dim * P * Q], &r_V[comp]); 88*9ff05d55SJeremy L Thompson } 89*9ff05d55SJeremy L Thompson } 90*9ff05d55SJeremy L Thompson } 91*9ff05d55SJeremy L Thompson 92*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 93*9ff05d55SJeremy L Thompson // Quadrature weights 94*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 95*9ff05d55SJeremy L Thompson template <int Q> 96*9ff05d55SJeremy L Thompson inline __device__ void WeightNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ q_weight, CeedScalar *w) { 97*9ff05d55SJeremy L Thompson *w = (data.t_id_x < Q) ? q_weight[data.t_id_x] : 0.0; 98*9ff05d55SJeremy L Thompson } 99