xref: /libCEED/include/ceed/jit-source/hip/hip-shared-basis-nontensor-templates.h (revision 343e3094792a64f9c2da70ef2256f98e7dc173cf)
16c13bbcbSJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
26c13bbcbSJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
36c13bbcbSJeremy L Thompson //
46c13bbcbSJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
56c13bbcbSJeremy L Thompson //
66c13bbcbSJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
76c13bbcbSJeremy L Thompson 
86c13bbcbSJeremy L Thompson /// @file
96c13bbcbSJeremy L Thompson /// Internal header for HIP shared memory non-tensor basis templates
106c13bbcbSJeremy L Thompson #include <ceed/types.h>
116c13bbcbSJeremy L Thompson 
126c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
136c13bbcbSJeremy L Thompson // 1D tensor contraction
146c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
156c13bbcbSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D>
166c13bbcbSJeremy L Thompson inline __device__ void Contract1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) {
176c13bbcbSJeremy L Thompson   data.slice[data.t_id_x] = *U;
186c13bbcbSJeremy L Thompson   __syncthreads();
196c13bbcbSJeremy L Thompson   *V = 0.0;
206c13bbcbSJeremy L Thompson   if (data.t_id_x < Q_1D) {
216c13bbcbSJeremy L Thompson     for (CeedInt i = 0; i < P_1D; i++) {
226c13bbcbSJeremy L Thompson       *V += B[i + data.t_id_x * P_1D] * data.slice[i];  // Contract x direction
236c13bbcbSJeremy L Thompson     }
246c13bbcbSJeremy L Thompson   }
256c13bbcbSJeremy L Thompson   __syncthreads();
266c13bbcbSJeremy L Thompson }
276c13bbcbSJeremy L Thompson 
286c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
296c13bbcbSJeremy L Thompson // 1D transpose tensor contraction
306c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
316c13bbcbSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D>
326c13bbcbSJeremy L Thompson inline __device__ void ContractTranspose1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) {
336c13bbcbSJeremy L Thompson   data.slice[data.t_id_x] = *U;
346c13bbcbSJeremy L Thompson   __syncthreads();
356c13bbcbSJeremy L Thompson   if (data.t_id_x < P_1D) {
366c13bbcbSJeremy L Thompson     for (CeedInt i = 0; i < Q_1D; i++) {
376c13bbcbSJeremy L Thompson       *V += B[data.t_id_x + i * P_1D] * data.slice[i];  // Contract x direction
386c13bbcbSJeremy L Thompson     }
396c13bbcbSJeremy L Thompson   }
406c13bbcbSJeremy L Thompson   __syncthreads();
416c13bbcbSJeremy L Thompson }
426c13bbcbSJeremy L Thompson 
436c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
446c13bbcbSJeremy L Thompson // Interpolate to quadrature points
456c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
466b92dc4bSJeremy L Thompson template <int NUM_COMP, int P, int Q, int T_1D>
472d217acfSJeremy L Thompson inline __device__ void InterpNonTensor(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B,
482d217acfSJeremy L Thompson                                        CeedScalar *__restrict__ r_V) {
496c13bbcbSJeremy L Thompson   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
506c13bbcbSJeremy L Thompson     Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]);
516c13bbcbSJeremy L Thompson   }
526c13bbcbSJeremy L Thompson }
536c13bbcbSJeremy L Thompson 
546c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
556c13bbcbSJeremy L Thompson // Interpolate transpose
566c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
576b92dc4bSJeremy L Thompson template <int NUM_COMP, int P, int Q, int T_1D>
582d217acfSJeremy L Thompson inline __device__ void InterpTransposeNonTensor(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B,
596c13bbcbSJeremy L Thompson                                                 CeedScalar *__restrict__ r_V) {
606c13bbcbSJeremy L Thompson   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
616c13bbcbSJeremy L Thompson     r_V[comp] = 0.0;
626c13bbcbSJeremy L Thompson     ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]);
636c13bbcbSJeremy L Thompson   }
646c13bbcbSJeremy L Thompson }
656c13bbcbSJeremy L Thompson 
666c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
676c13bbcbSJeremy L Thompson // Derivatives at quadrature points
686c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
696b92dc4bSJeremy L Thompson template <int NUM_COMP, int DIM, int P, int Q, int T_1D>
702d217acfSJeremy L Thompson inline __device__ void GradNonTensor(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G, CeedScalar *__restrict__ r_V) {
716c13bbcbSJeremy L Thompson   for (CeedInt dim = 0; dim < DIM; dim++) {
726c13bbcbSJeremy L Thompson     for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
736c13bbcbSJeremy L Thompson       Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], &c_G[dim * P * Q], &r_V[comp + dim * NUM_COMP]);
746c13bbcbSJeremy L Thompson     }
756c13bbcbSJeremy L Thompson   }
766c13bbcbSJeremy L Thompson }
776c13bbcbSJeremy L Thompson 
786c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
796c13bbcbSJeremy L Thompson // Derivatives transpose
806c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
816b92dc4bSJeremy L Thompson template <int NUM_COMP, int DIM, int P, int Q, int T_1D>
822d217acfSJeremy L Thompson inline __device__ void GradTransposeNonTensor(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G,
836c13bbcbSJeremy L Thompson                                               CeedScalar *__restrict__ r_V) {
846c13bbcbSJeremy L Thompson   for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_V[comp] = 0.0;
856c13bbcbSJeremy L Thompson   for (CeedInt dim = 0; dim < DIM; dim++) {
866c13bbcbSJeremy L Thompson     for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
876c13bbcbSJeremy L Thompson       ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp + dim * NUM_COMP], &c_G[dim * P * Q], &r_V[comp]);
886c13bbcbSJeremy L Thompson     }
896c13bbcbSJeremy L Thompson   }
906c13bbcbSJeremy L Thompson }
916c13bbcbSJeremy L Thompson 
926c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
936c13bbcbSJeremy L Thompson // Quadrature weights
946c13bbcbSJeremy L Thompson //------------------------------------------------------------------------------
95*343e3094SJeremy L Thompson template <int P, int Q>
962d217acfSJeremy L Thompson inline __device__ void WeightNonTensor(SharedData_Hip &data, const CeedScalar *__restrict__ q_weight, CeedScalar *w) {
972d217acfSJeremy L Thompson   *w = (data.t_id_x < Q) ? q_weight[data.t_id_x] : 0.0;
986c13bbcbSJeremy L Thompson }
99