1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2a0154adeSJed Brown // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3a0154adeSJed Brown //
4a0154adeSJed Brown // SPDX-License-Identifier: BSD-2-Clause
5a0154adeSJed Brown //
6a0154adeSJed Brown // This file is part of CEED: http://github.com/ceed
7a0154adeSJed Brown
8b2165e7aSSebastian Grimberg /// @file
9b2165e7aSSebastian Grimberg /// Internal header for CUDA non-tensor product basis
10c0b5abf0SJeremy L Thompson #include <ceed/types.h>
11a0154adeSJed Brown
12d075f50bSSebastian Grimberg #include "cuda-ref-basis-nontensor-templates.h"
13d075f50bSSebastian Grimberg
14a0154adeSJed Brown //------------------------------------------------------------------------------
15a0154adeSJed Brown // Non-Tensor Basis Kernels
16a0154adeSJed Brown //------------------------------------------------------------------------------
17a0154adeSJed Brown
18a0154adeSJed Brown //------------------------------------------------------------------------------
19a0154adeSJed Brown // Interp
20a0154adeSJed Brown //------------------------------------------------------------------------------
Interp(const CeedInt num_elem,const CeedScalar * __restrict__ d_B,const CeedScalar * __restrict__ d_U,CeedScalar * __restrict__ d_V)21d075f50bSSebastian Grimberg extern "C" __global__ void Interp(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U,
22a0154adeSJed Brown CeedScalar *__restrict__ d_V) {
232b730f8bSJeremy L Thompson for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
24d075f50bSSebastian Grimberg Contract<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * num_elem, BASIS_Q * num_elem,
25d075f50bSSebastian Grimberg BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V);
26a0154adeSJed Brown }
27a0154adeSJed Brown }
28d075f50bSSebastian Grimberg
InterpTranspose(const CeedInt num_elem,const CeedScalar * __restrict__ d_B,const CeedScalar * __restrict__ d_U,CeedScalar * __restrict__ d_V)29d075f50bSSebastian Grimberg extern "C" __global__ void InterpTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U,
30d075f50bSSebastian Grimberg CeedScalar *__restrict__ d_V) {
31d075f50bSSebastian Grimberg for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
32d075f50bSSebastian Grimberg ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_elem, BASIS_P * num_elem,
33d075f50bSSebastian Grimberg BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V);
34a0154adeSJed Brown }
35a0154adeSJed Brown }
36a0154adeSJed Brown
37a0154adeSJed Brown //------------------------------------------------------------------------------
38d075f50bSSebastian Grimberg // Deriv
39a0154adeSJed Brown //------------------------------------------------------------------------------
Deriv(const CeedInt num_elem,const CeedScalar * __restrict__ d_B,const CeedScalar * __restrict__ d_U,CeedScalar * __restrict__ d_V)40d075f50bSSebastian Grimberg extern "C" __global__ void Deriv(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U,
41a0154adeSJed Brown CeedScalar *__restrict__ d_V) {
422b730f8bSJeremy L Thompson for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
43d075f50bSSebastian Grimberg Contract<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * num_elem, BASIS_Q * num_elem,
44d075f50bSSebastian Grimberg BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V);
45d075f50bSSebastian Grimberg }
46a0154adeSJed Brown }
47a0154adeSJed Brown
DerivTranspose(const CeedInt num_elem,const CeedScalar * __restrict__ d_B,const CeedScalar * __restrict__ d_U,CeedScalar * __restrict__ d_V)48d075f50bSSebastian Grimberg extern "C" __global__ void DerivTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U,
49d075f50bSSebastian Grimberg CeedScalar *__restrict__ d_V) {
50d075f50bSSebastian Grimberg for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
51d075f50bSSebastian Grimberg ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_elem, BASIS_P * num_elem,
52d075f50bSSebastian Grimberg BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V);
53a0154adeSJed Brown }
54a0154adeSJed Brown }
55a0154adeSJed Brown
56a0154adeSJed Brown //------------------------------------------------------------------------------
57a0154adeSJed Brown // Weight
58a0154adeSJed Brown //------------------------------------------------------------------------------
Weight(const CeedInt num_elem,const CeedScalar * __restrict__ q_weight,CeedScalar * __restrict__ d_V)592b730f8bSJeremy L Thompson extern "C" __global__ void Weight(const CeedInt num_elem, const CeedScalar *__restrict__ q_weight, CeedScalar *__restrict__ d_V) {
60a0154adeSJed Brown const CeedInt t_id = threadIdx.x;
61a0154adeSJed Brown // TODO load q_weight in shared memory if blockDim.z > 1?
62d075f50bSSebastian Grimberg
632b730f8bSJeremy L Thompson for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
64a0154adeSJed Brown d_V[elem * BASIS_Q + t_id] = q_weight[t_id];
65a0154adeSJed Brown }
66a0154adeSJed Brown }
67