1 // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 /// @file 9 /// Internal header for HIP non-tensor product basis 10 #ifndef CEED_HIP_REF_BASIS_NONTENSOR_H 11 #define CEED_HIP_REF_BASIS_NONTENSOR_H 12 13 #include <ceed.h> 14 15 #include "hip-ref-basis-nontensor-templates.h" 16 17 //------------------------------------------------------------------------------ 18 // Non-Tensor Basis Kernels 19 //------------------------------------------------------------------------------ 20 21 //------------------------------------------------------------------------------ 22 // Interp 23 //------------------------------------------------------------------------------ 24 extern "C" __global__ void Interp(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U, 25 CeedScalar *__restrict__ d_V) { 26 for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) { 27 Contract<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * num_elem, BASIS_Q * num_elem, 28 BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); 29 } 30 } 31 32 extern "C" __global__ void InterpTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U, 33 CeedScalar *__restrict__ d_V) { 34 for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) { 35 ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_elem, BASIS_P * num_elem, 36 BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); 37 } 38 } 39 40 //------------------------------------------------------------------------------ 41 // Deriv 42 //------------------------------------------------------------------------------ 43 extern "C" __global__ void Deriv(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U, 44 CeedScalar *__restrict__ d_V) { 45 for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) { 46 Contract<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * num_elem, BASIS_Q * num_elem, 47 BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); 48 } 49 } 50 51 extern "C" __global__ void DerivTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U, 52 CeedScalar *__restrict__ d_V) { 53 for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) { 54 ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_elem, BASIS_P * num_elem, 55 BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); 56 } 57 } 58 59 //------------------------------------------------------------------------------ 60 // Weight 61 //------------------------------------------------------------------------------ 62 extern "C" __global__ void Weight(const CeedInt num_elem, const CeedScalar *__restrict__ q_weight, CeedScalar *__restrict__ d_V) { 63 const CeedInt t_id = threadIdx.x; 64 // TODO load q_weight in shared memory if blockDim.z > 1? 65 66 for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) { 67 d_V[elem * BASIS_Q + t_id] = q_weight[t_id]; 68 } 69 } 70 71 //------------------------------------------------------------------------------ 72 73 #endif // CEED_HIP_REF_BASIS_NONTENSOR_H 74