16c13bbcbSJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 26c13bbcbSJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 36c13bbcbSJeremy L Thompson // 46c13bbcbSJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 56c13bbcbSJeremy L Thompson // 66c13bbcbSJeremy L Thompson // This file is part of CEED: http://github.com/ceed 76c13bbcbSJeremy L Thompson 86c13bbcbSJeremy L Thompson /// @file 96c13bbcbSJeremy L Thompson /// Internal header for HIP shared memory non-tensor basis templates 106c13bbcbSJeremy L Thompson #include <ceed/types.h> 116c13bbcbSJeremy L Thompson 126c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 136c13bbcbSJeremy L Thompson // 1D tensor contraction 146c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 156c13bbcbSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D> 166c13bbcbSJeremy L Thompson inline __device__ void Contract1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 176c13bbcbSJeremy L Thompson data.slice[data.t_id_x] = *U; 186c13bbcbSJeremy L Thompson __syncthreads(); 196c13bbcbSJeremy L Thompson *V = 0.0; 206c13bbcbSJeremy L Thompson if (data.t_id_x < Q_1D) { 216c13bbcbSJeremy L Thompson for (CeedInt i = 0; i < P_1D; i++) { 226c13bbcbSJeremy L Thompson *V += B[i + data.t_id_x * P_1D] * data.slice[i]; // Contract x direction 236c13bbcbSJeremy L Thompson } 246c13bbcbSJeremy L Thompson } 256c13bbcbSJeremy L Thompson __syncthreads(); 266c13bbcbSJeremy L Thompson } 276c13bbcbSJeremy L Thompson 286c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 296c13bbcbSJeremy L Thompson // 1D transpose tensor contraction 306c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 316c13bbcbSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D> 326c13bbcbSJeremy L Thompson inline __device__ void ContractTranspose1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 336c13bbcbSJeremy L Thompson data.slice[data.t_id_x] = *U; 346c13bbcbSJeremy L Thompson __syncthreads(); 356c13bbcbSJeremy L Thompson if (data.t_id_x < P_1D) { 366c13bbcbSJeremy L Thompson for (CeedInt i = 0; i < Q_1D; i++) { 376c13bbcbSJeremy L Thompson *V += B[data.t_id_x + i * P_1D] * data.slice[i]; // Contract x direction 386c13bbcbSJeremy L Thompson } 396c13bbcbSJeremy L Thompson } 406c13bbcbSJeremy L Thompson __syncthreads(); 416c13bbcbSJeremy L Thompson } 426c13bbcbSJeremy L Thompson 436c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 446c13bbcbSJeremy L Thompson // Interpolate to quadrature points 456c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 466b92dc4bSJeremy L Thompson template <int NUM_COMP, int P, int Q, int T_1D> 472d217acfSJeremy L Thompson inline __device__ void InterpNonTensor(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 482d217acfSJeremy L Thompson CeedScalar *__restrict__ r_V) { 496c13bbcbSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 506c13bbcbSJeremy L Thompson Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]); 516c13bbcbSJeremy L Thompson } 526c13bbcbSJeremy L Thompson } 536c13bbcbSJeremy L Thompson 546c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 556c13bbcbSJeremy L Thompson // Interpolate transpose 566c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 576b92dc4bSJeremy L Thompson template <int NUM_COMP, int P, int Q, int T_1D> 582d217acfSJeremy L Thompson inline __device__ void InterpTransposeNonTensor(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 596c13bbcbSJeremy L Thompson CeedScalar *__restrict__ r_V) { 606c13bbcbSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 616c13bbcbSJeremy L Thompson r_V[comp] = 0.0; 626c13bbcbSJeremy L Thompson ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]); 636c13bbcbSJeremy L Thompson } 646c13bbcbSJeremy L Thompson } 656c13bbcbSJeremy L Thompson 666c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 676c13bbcbSJeremy L Thompson // Derivatives at quadrature points 686c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 696b92dc4bSJeremy L Thompson template <int NUM_COMP, int DIM, int P, int Q, int T_1D> 702d217acfSJeremy L Thompson inline __device__ void GradNonTensor(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G, CeedScalar *__restrict__ r_V) { 716c13bbcbSJeremy L Thompson for (CeedInt dim = 0; dim < DIM; dim++) { 726c13bbcbSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 736c13bbcbSJeremy L Thompson Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], &c_G[dim * P * Q], &r_V[comp + dim * NUM_COMP]); 746c13bbcbSJeremy L Thompson } 756c13bbcbSJeremy L Thompson } 766c13bbcbSJeremy L Thompson } 776c13bbcbSJeremy L Thompson 786c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 796c13bbcbSJeremy L Thompson // Derivatives transpose 806c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 816b92dc4bSJeremy L Thompson template <int NUM_COMP, int DIM, int P, int Q, int T_1D> 822d217acfSJeremy L Thompson inline __device__ void GradTransposeNonTensor(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G, 836c13bbcbSJeremy L Thompson CeedScalar *__restrict__ r_V) { 846c13bbcbSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_V[comp] = 0.0; 856c13bbcbSJeremy L Thompson for (CeedInt dim = 0; dim < DIM; dim++) { 866c13bbcbSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 876c13bbcbSJeremy L Thompson ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp + dim * NUM_COMP], &c_G[dim * P * Q], &r_V[comp]); 886c13bbcbSJeremy L Thompson } 896c13bbcbSJeremy L Thompson } 906c13bbcbSJeremy L Thompson } 916c13bbcbSJeremy L Thompson 926c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 936c13bbcbSJeremy L Thompson // Quadrature weights 946c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------ 95*343e3094SJeremy L Thompson template <int P, int Q> 962d217acfSJeremy L Thompson inline __device__ void WeightNonTensor(SharedData_Hip &data, const CeedScalar *__restrict__ q_weight, CeedScalar *w) { 972d217acfSJeremy L Thompson *w = (data.t_id_x < Q) ? q_weight[data.t_id_x] : 0.0; 986c13bbcbSJeremy L Thompson } 99