jit-source/cuda/cuda-ref-basis-nontensor.h

a0154adeSJed Brown// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
a0154adeSJed Brown// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
a0154adeSJed Brown//
a0154adeSJed Brown// SPDX-License-Identifier: BSD-2-Clause
a0154adeSJed Brown//
a0154adeSJed Brown// This file is part of CEED:  http://github.com/ceed
a0154adeSJed Brown
b2165e7aSSebastian Grimberg/// @file
b2165e7aSSebastian Grimberg/// Internal header for CUDA non-tensor product basis
94b7b29bSJeremy L Thompson#ifndef CEED_CUDA_REF_BASIS_NONTENSOR_H
94b7b29bSJeremy L Thompson#define CEED_CUDA_REF_BASIS_NONTENSOR_H
b2165e7aSSebastian Grimberg
c9c2c079SJeremy L Thompson#include <ceed.h>
a0154adeSJed Brown
*d075f50bSSebastian Grimberg#include "cuda-ref-basis-nontensor-templates.h"
*d075f50bSSebastian Grimberg
a0154adeSJed Brown//------------------------------------------------------------------------------
a0154adeSJed Brown// Non-Tensor Basis Kernels
a0154adeSJed Brown//------------------------------------------------------------------------------
a0154adeSJed Brown
a0154adeSJed Brown//------------------------------------------------------------------------------
a0154adeSJed Brown// Interp
a0154adeSJed Brown//------------------------------------------------------------------------------
*d075f50bSSebastian Grimbergextern "C" __global__ void Interp(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U,
a0154adeSJed Brown                                  CeedScalar *__restrict__ d_V) {
2b730f8bSJeremy L Thompson  for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
*d075f50bSSebastian Grimberg    Contract<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * num_elem, BASIS_Q * num_elem,
*d075f50bSSebastian Grimberg                                                                    BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V);
a0154adeSJed Brown  }
a0154adeSJed Brown}
*d075f50bSSebastian Grimberg
*d075f50bSSebastian Grimbergextern "C" __global__ void InterpTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U,
*d075f50bSSebastian Grimberg                                           CeedScalar *__restrict__ d_V) {
*d075f50bSSebastian Grimberg  for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
*d075f50bSSebastian Grimberg    ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_elem, BASIS_P * num_elem,
*d075f50bSSebastian Grimberg                                                                             BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V);
a0154adeSJed Brown  }
a0154adeSJed Brown}
a0154adeSJed Brown
a0154adeSJed Brown//------------------------------------------------------------------------------
*d075f50bSSebastian Grimberg// Deriv
a0154adeSJed Brown//------------------------------------------------------------------------------
*d075f50bSSebastian Grimbergextern "C" __global__ void Deriv(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U,
a0154adeSJed Brown                                 CeedScalar *__restrict__ d_V) {
2b730f8bSJeremy L Thompson  for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
*d075f50bSSebastian Grimberg    Contract<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * num_elem, BASIS_Q * num_elem,
*d075f50bSSebastian Grimberg                                                                   BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V);
*d075f50bSSebastian Grimberg  }
a0154adeSJed Brown}
a0154adeSJed Brown
*d075f50bSSebastian Grimbergextern "C" __global__ void DerivTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U,
*d075f50bSSebastian Grimberg                                          CeedScalar *__restrict__ d_V) {
*d075f50bSSebastian Grimberg  for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
*d075f50bSSebastian Grimberg    ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_elem, BASIS_P * num_elem,
*d075f50bSSebastian Grimberg                                                                            BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V);
a0154adeSJed Brown  }
a0154adeSJed Brown}
a0154adeSJed Brown
a0154adeSJed Brown//------------------------------------------------------------------------------
a0154adeSJed Brown// Weight
a0154adeSJed Brown//------------------------------------------------------------------------------
2b730f8bSJeremy L Thompsonextern "C" __global__ void Weight(const CeedInt num_elem, const CeedScalar *__restrict__ q_weight, CeedScalar *__restrict__ d_V) {
a0154adeSJed Brown  const CeedInt t_id = threadIdx.x;
a0154adeSJed Brown  // TODO load q_weight in shared memory if blockDim.z > 1?
*d075f50bSSebastian Grimberg
2b730f8bSJeremy L Thompson  for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
a0154adeSJed Brown    d_V[elem * BASIS_Q + t_id] = q_weight[t_id];
a0154adeSJed Brown  }
a0154adeSJed Brown}
a0154adeSJed Brown
a0154adeSJed Brown//------------------------------------------------------------------------------
b2165e7aSSebastian Grimberg
94b7b29bSJeremy L Thompson#endif  // CEED_CUDA_REF_BASIS_NONTENSOR_H