19ff05d55SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 29ff05d55SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 39ff05d55SJeremy L Thompson // 49ff05d55SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 59ff05d55SJeremy L Thompson // 69ff05d55SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 79ff05d55SJeremy L Thompson 89ff05d55SJeremy L Thompson /// @file 96c13bbcbSJeremy L Thompson /// Internal header for CUDA shared memory non-tensor basis templates 109ff05d55SJeremy L Thompson #include <ceed/types.h> 119ff05d55SJeremy L Thompson 129ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 139ff05d55SJeremy L Thompson // 1D tensor contraction 149ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 159ff05d55SJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D> 169ff05d55SJeremy L Thompson inline __device__ void Contract1d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 179ff05d55SJeremy L Thompson data.slice[data.t_id_x] = *U; 189ff05d55SJeremy L Thompson __syncthreads(); 199ff05d55SJeremy L Thompson *V = 0.0; 209ff05d55SJeremy L Thompson if (data.t_id_x < Q_1D) { 219ff05d55SJeremy L Thompson for (CeedInt i = 0; i < P_1D; i++) { 229ff05d55SJeremy L Thompson *V += B[i + data.t_id_x * P_1D] * data.slice[i]; // Contract x direction 239ff05d55SJeremy L Thompson } 249ff05d55SJeremy L Thompson } 259ff05d55SJeremy L Thompson __syncthreads(); 269ff05d55SJeremy L Thompson } 279ff05d55SJeremy L Thompson 289ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 299ff05d55SJeremy L Thompson // 1D transpose tensor contraction 309ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 319ff05d55SJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D> 329ff05d55SJeremy L Thompson inline __device__ void ContractTranspose1d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 339ff05d55SJeremy L Thompson data.slice[data.t_id_x] = *U; 349ff05d55SJeremy L Thompson __syncthreads(); 359ff05d55SJeremy L Thompson if (data.t_id_x < P_1D) { 369ff05d55SJeremy L Thompson for (CeedInt i = 0; i < Q_1D; i++) { 379ff05d55SJeremy L Thompson *V += B[data.t_id_x + i * P_1D] * data.slice[i]; // Contract x direction 389ff05d55SJeremy L Thompson } 399ff05d55SJeremy L Thompson } 409ff05d55SJeremy L Thompson __syncthreads(); 419ff05d55SJeremy L Thompson } 429ff05d55SJeremy L Thompson 439ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 449ff05d55SJeremy L Thompson // Interpolate to quadrature points 459ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 4699421279SJeremy L Thompson template <int NUM_COMP, int P, int Q, int T_1D> 479ff05d55SJeremy L Thompson inline __device__ void InterpNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 489ff05d55SJeremy L Thompson CeedScalar *__restrict__ r_V) { 499ff05d55SJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 509ff05d55SJeremy L Thompson Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]); 519ff05d55SJeremy L Thompson } 529ff05d55SJeremy L Thompson } 539ff05d55SJeremy L Thompson 549ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 559ff05d55SJeremy L Thompson // Interpolate transpose 569ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 5799421279SJeremy L Thompson template <int NUM_COMP, int P, int Q, int T_1D> 589ff05d55SJeremy L Thompson inline __device__ void InterpTransposeNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 599ff05d55SJeremy L Thompson CeedScalar *__restrict__ r_V) { 609ff05d55SJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 619ff05d55SJeremy L Thompson r_V[comp] = 0.0; 629ff05d55SJeremy L Thompson ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]); 639ff05d55SJeremy L Thompson } 649ff05d55SJeremy L Thompson } 659ff05d55SJeremy L Thompson 669ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 679ff05d55SJeremy L Thompson // Derivatives at quadrature points 689ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 6999421279SJeremy L Thompson template <int NUM_COMP, int DIM, int P, int Q, int T_1D> 709ff05d55SJeremy L Thompson inline __device__ void GradNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G, CeedScalar *__restrict__ r_V) { 719ff05d55SJeremy L Thompson for (CeedInt dim = 0; dim < DIM; dim++) { 729ff05d55SJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 739ff05d55SJeremy L Thompson Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], &c_G[dim * P * Q], &r_V[comp + dim * NUM_COMP]); 749ff05d55SJeremy L Thompson } 759ff05d55SJeremy L Thompson } 769ff05d55SJeremy L Thompson } 779ff05d55SJeremy L Thompson 789ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 799ff05d55SJeremy L Thompson // Derivatives transpose 809ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 8199421279SJeremy L Thompson template <int NUM_COMP, int DIM, int P, int Q, int T_1D> 829ff05d55SJeremy L Thompson inline __device__ void GradTransposeNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G, 839ff05d55SJeremy L Thompson CeedScalar *__restrict__ r_V) { 849ff05d55SJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_V[comp] = 0.0; 859ff05d55SJeremy L Thompson for (CeedInt dim = 0; dim < DIM; dim++) { 869ff05d55SJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 879ff05d55SJeremy L Thompson ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp + dim * NUM_COMP], &c_G[dim * P * Q], &r_V[comp]); 889ff05d55SJeremy L Thompson } 899ff05d55SJeremy L Thompson } 909ff05d55SJeremy L Thompson } 919ff05d55SJeremy L Thompson 929ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 939ff05d55SJeremy L Thompson // Quadrature weights 949ff05d55SJeremy L Thompson //------------------------------------------------------------------------------ 95*343e3094SJeremy L Thompson template <int P, int Q> 969ff05d55SJeremy L Thompson inline __device__ void WeightNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ q_weight, CeedScalar *w) { 979ff05d55SJeremy L Thompson *w = (data.t_id_x < Q) ? q_weight[data.t_id_x] : 0.0; 989ff05d55SJeremy L Thompson } 99