1a0154adeSJed Brown // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2a0154adeSJed Brown // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3a0154adeSJed Brown // 4a0154adeSJed Brown // SPDX-License-Identifier: BSD-2-Clause 5a0154adeSJed Brown // 6a0154adeSJed Brown // This file is part of CEED: http://github.com/ceed 7a0154adeSJed Brown 8b2165e7aSSebastian Grimberg /// @file 9b2165e7aSSebastian Grimberg /// Internal header for CUDA non-tensor product basis 1094b7b29bSJeremy L Thompson #ifndef CEED_CUDA_REF_BASIS_NONTENSOR_H 1194b7b29bSJeremy L Thompson #define CEED_CUDA_REF_BASIS_NONTENSOR_H 12b2165e7aSSebastian Grimberg 13c9c2c079SJeremy L Thompson #include <ceed.h> 14a0154adeSJed Brown 15*d075f50bSSebastian Grimberg #include "cuda-ref-basis-nontensor-templates.h" 16*d075f50bSSebastian Grimberg 17a0154adeSJed Brown //------------------------------------------------------------------------------ 18a0154adeSJed Brown // Non-Tensor Basis Kernels 19a0154adeSJed Brown //------------------------------------------------------------------------------ 20a0154adeSJed Brown 21a0154adeSJed Brown //------------------------------------------------------------------------------ 22a0154adeSJed Brown // Interp 23a0154adeSJed Brown //------------------------------------------------------------------------------ 24*d075f50bSSebastian Grimberg extern "C" __global__ void Interp(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U, 25a0154adeSJed Brown CeedScalar *__restrict__ d_V) { 262b730f8bSJeremy L Thompson for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) { 27*d075f50bSSebastian Grimberg Contract<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * num_elem, BASIS_Q * num_elem, 28*d075f50bSSebastian Grimberg BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); 29a0154adeSJed Brown } 30a0154adeSJed Brown } 31*d075f50bSSebastian Grimberg 32*d075f50bSSebastian Grimberg extern "C" __global__ void InterpTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U, 33*d075f50bSSebastian Grimberg CeedScalar *__restrict__ d_V) { 34*d075f50bSSebastian Grimberg for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) { 35*d075f50bSSebastian Grimberg ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_elem, BASIS_P * num_elem, 36*d075f50bSSebastian Grimberg BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); 37a0154adeSJed Brown } 38a0154adeSJed Brown } 39a0154adeSJed Brown 40a0154adeSJed Brown //------------------------------------------------------------------------------ 41*d075f50bSSebastian Grimberg // Deriv 42a0154adeSJed Brown //------------------------------------------------------------------------------ 43*d075f50bSSebastian Grimberg extern "C" __global__ void Deriv(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U, 44a0154adeSJed Brown CeedScalar *__restrict__ d_V) { 452b730f8bSJeremy L Thompson for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) { 46*d075f50bSSebastian Grimberg Contract<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * num_elem, BASIS_Q * num_elem, 47*d075f50bSSebastian Grimberg BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); 48*d075f50bSSebastian Grimberg } 49a0154adeSJed Brown } 50a0154adeSJed Brown 51*d075f50bSSebastian Grimberg extern "C" __global__ void DerivTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U, 52*d075f50bSSebastian Grimberg CeedScalar *__restrict__ d_V) { 53*d075f50bSSebastian Grimberg for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) { 54*d075f50bSSebastian Grimberg ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_elem, BASIS_P * num_elem, 55*d075f50bSSebastian Grimberg BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); 56a0154adeSJed Brown } 57a0154adeSJed Brown } 58a0154adeSJed Brown 59a0154adeSJed Brown //------------------------------------------------------------------------------ 60a0154adeSJed Brown // Weight 61a0154adeSJed Brown //------------------------------------------------------------------------------ 622b730f8bSJeremy L Thompson extern "C" __global__ void Weight(const CeedInt num_elem, const CeedScalar *__restrict__ q_weight, CeedScalar *__restrict__ d_V) { 63a0154adeSJed Brown const CeedInt t_id = threadIdx.x; 64a0154adeSJed Brown // TODO load q_weight in shared memory if blockDim.z > 1? 65*d075f50bSSebastian Grimberg 662b730f8bSJeremy L Thompson for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) { 67a0154adeSJed Brown d_V[elem * BASIS_Q + t_id] = q_weight[t_id]; 68a0154adeSJed Brown } 69a0154adeSJed Brown } 70a0154adeSJed Brown 71a0154adeSJed Brown //------------------------------------------------------------------------------ 72b2165e7aSSebastian Grimberg 7394b7b29bSJeremy L Thompson #endif // CEED_CUDA_REF_BASIS_NONTENSOR_H 74