1 // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 /// @file 9 /// Internal header for HIP non-tensor product basis 10 #include <ceed/types.h> 11 12 #include "hip-ref-basis-nontensor-templates.h" 13 14 //------------------------------------------------------------------------------ 15 // Non-Tensor Basis Kernels 16 //------------------------------------------------------------------------------ 17 18 //------------------------------------------------------------------------------ 19 // Interp 20 //------------------------------------------------------------------------------ 21 extern "C" __global__ void Interp(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U, 22 CeedScalar *__restrict__ d_V) { 23 for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) { 24 Contract<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * num_elem, BASIS_Q * num_elem, 25 BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); 26 } 27 } 28 29 extern "C" __global__ void InterpTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U, 30 CeedScalar *__restrict__ d_V) { 31 for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) { 32 ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_elem, BASIS_P * num_elem, 33 BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); 34 } 35 } 36 37 //------------------------------------------------------------------------------ 38 // Deriv 39 //------------------------------------------------------------------------------ 40 extern "C" __global__ void Deriv(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U, 41 CeedScalar *__restrict__ d_V) { 42 for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) { 43 Contract<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * num_elem, BASIS_Q * num_elem, 44 BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); 45 } 46 } 47 48 extern "C" __global__ void DerivTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U, 49 CeedScalar *__restrict__ d_V) { 50 for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) { 51 ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_elem, BASIS_P * num_elem, 52 BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); 53 } 54 } 55 56 //------------------------------------------------------------------------------ 57 // Weight 58 //------------------------------------------------------------------------------ 59 extern "C" __global__ void Weight(const CeedInt num_elem, const CeedScalar *__restrict__ q_weight, CeedScalar *__restrict__ d_V) { 60 const CeedInt t_id = threadIdx.x; 61 // TODO load q_weight in shared memory if blockDim.z > 1? 62 63 for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) { 64 d_V[elem * BASIS_Q + t_id] = q_weight[t_id]; 65 } 66 } 67