1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
26c13bbcbSJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
36c13bbcbSJeremy L Thompson //
46c13bbcbSJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
56c13bbcbSJeremy L Thompson //
66c13bbcbSJeremy L Thompson // This file is part of CEED: http://github.com/ceed
76c13bbcbSJeremy L Thompson
86c13bbcbSJeremy L Thompson /// @file
96c13bbcbSJeremy L Thompson /// Internal header for HIP shared memory non-tensor basis templates
106c13bbcbSJeremy L Thompson #include <ceed/types.h>
116c13bbcbSJeremy L Thompson
126c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
136c13bbcbSJeremy L Thompson // 1D tensor contraction
146c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
156c13bbcbSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D>
Contract1d(SharedData_Hip & data,const CeedScalar * U,const CeedScalar * B,CeedScalar * V)166c13bbcbSJeremy L Thompson inline __device__ void Contract1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) {
176c13bbcbSJeremy L Thompson data.slice[data.t_id_x] = *U;
186c13bbcbSJeremy L Thompson __syncthreads();
196c13bbcbSJeremy L Thompson *V = 0.0;
206c13bbcbSJeremy L Thompson if (data.t_id_x < Q_1D) {
216c13bbcbSJeremy L Thompson for (CeedInt i = 0; i < P_1D; i++) {
226c13bbcbSJeremy L Thompson *V += B[i + data.t_id_x * P_1D] * data.slice[i]; // Contract x direction
236c13bbcbSJeremy L Thompson }
246c13bbcbSJeremy L Thompson }
256c13bbcbSJeremy L Thompson __syncthreads();
266c13bbcbSJeremy L Thompson }
276c13bbcbSJeremy L Thompson
286c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
296c13bbcbSJeremy L Thompson // 1D transpose tensor contraction
306c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
316c13bbcbSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D>
ContractTranspose1d(SharedData_Hip & data,const CeedScalar * U,const CeedScalar * B,CeedScalar * V)326c13bbcbSJeremy L Thompson inline __device__ void ContractTranspose1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) {
336c13bbcbSJeremy L Thompson data.slice[data.t_id_x] = *U;
346c13bbcbSJeremy L Thompson __syncthreads();
356c13bbcbSJeremy L Thompson if (data.t_id_x < P_1D) {
366c13bbcbSJeremy L Thompson for (CeedInt i = 0; i < Q_1D; i++) {
376c13bbcbSJeremy L Thompson *V += B[data.t_id_x + i * P_1D] * data.slice[i]; // Contract x direction
386c13bbcbSJeremy L Thompson }
396c13bbcbSJeremy L Thompson }
406c13bbcbSJeremy L Thompson __syncthreads();
416c13bbcbSJeremy L Thompson }
426c13bbcbSJeremy L Thompson
436c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
446c13bbcbSJeremy L Thompson // Interpolate to quadrature points
456c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
466b92dc4bSJeremy L Thompson template <int NUM_COMP, int P, int Q, int T_1D>
InterpNonTensor(SharedData_Hip & data,const CeedScalar * __restrict__ r_U,const CeedScalar * c_B,CeedScalar * __restrict__ r_V)472d217acfSJeremy L Thompson inline __device__ void InterpNonTensor(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B,
482d217acfSJeremy L Thompson CeedScalar *__restrict__ r_V) {
496c13bbcbSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
506c13bbcbSJeremy L Thompson Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]);
516c13bbcbSJeremy L Thompson }
526c13bbcbSJeremy L Thompson }
536c13bbcbSJeremy L Thompson
546c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
556c13bbcbSJeremy L Thompson // Interpolate transpose
566c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
576b92dc4bSJeremy L Thompson template <int NUM_COMP, int P, int Q, int T_1D>
InterpTransposeNonTensor(SharedData_Hip & data,const CeedScalar * __restrict__ r_U,const CeedScalar * c_B,CeedScalar * __restrict__ r_V)582d217acfSJeremy L Thompson inline __device__ void InterpTransposeNonTensor(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B,
596c13bbcbSJeremy L Thompson CeedScalar *__restrict__ r_V) {
606c13bbcbSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
616c13bbcbSJeremy L Thompson r_V[comp] = 0.0;
626c13bbcbSJeremy L Thompson ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]);
636c13bbcbSJeremy L Thompson }
646c13bbcbSJeremy L Thompson }
656c13bbcbSJeremy L Thompson
666c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
676c13bbcbSJeremy L Thompson // Derivatives at quadrature points
686c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
696b92dc4bSJeremy L Thompson template <int NUM_COMP, int DIM, int P, int Q, int T_1D>
GradNonTensor(SharedData_Hip & data,const CeedScalar * __restrict__ r_U,const CeedScalar * c_G,CeedScalar * __restrict__ r_V)702d217acfSJeremy L Thompson inline __device__ void GradNonTensor(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G, CeedScalar *__restrict__ r_V) {
716c13bbcbSJeremy L Thompson for (CeedInt dim = 0; dim < DIM; dim++) {
726c13bbcbSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
736c13bbcbSJeremy L Thompson Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], &c_G[dim * P * Q], &r_V[comp + dim * NUM_COMP]);
746c13bbcbSJeremy L Thompson }
756c13bbcbSJeremy L Thompson }
766c13bbcbSJeremy L Thompson }
776c13bbcbSJeremy L Thompson
786c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
796c13bbcbSJeremy L Thompson // Derivatives transpose
806c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
816b92dc4bSJeremy L Thompson template <int NUM_COMP, int DIM, int P, int Q, int T_1D>
GradTransposeNonTensor(SharedData_Hip & data,const CeedScalar * __restrict__ r_U,const CeedScalar * c_G,CeedScalar * __restrict__ r_V)822d217acfSJeremy L Thompson inline __device__ void GradTransposeNonTensor(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G,
836c13bbcbSJeremy L Thompson CeedScalar *__restrict__ r_V) {
846c13bbcbSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_V[comp] = 0.0;
856c13bbcbSJeremy L Thompson for (CeedInt dim = 0; dim < DIM; dim++) {
866c13bbcbSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
876c13bbcbSJeremy L Thompson ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp + dim * NUM_COMP], &c_G[dim * P * Q], &r_V[comp]);
886c13bbcbSJeremy L Thompson }
896c13bbcbSJeremy L Thompson }
906c13bbcbSJeremy L Thompson }
916c13bbcbSJeremy L Thompson
926c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
936c13bbcbSJeremy L Thompson // Quadrature weights
946c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
95343e3094SJeremy L Thompson template <int P, int Q>
WeightNonTensor(SharedData_Hip & data,const CeedScalar * __restrict__ q_weight,CeedScalar * w)962d217acfSJeremy L Thompson inline __device__ void WeightNonTensor(SharedData_Hip &data, const CeedScalar *__restrict__ q_weight, CeedScalar *w) {
972d217acfSJeremy L Thompson *w = (data.t_id_x < Q) ? q_weight[data.t_id_x] : 0.0;
986c13bbcbSJeremy L Thompson }
99