1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED: http://github.com/ceed
7
8 /// @file
9 /// Internal header for HIP non-tensor product basis
10 #include <ceed/types.h>
11
12 #include "hip-ref-basis-nontensor-templates.h"
13
14 //------------------------------------------------------------------------------
15 // Non-Tensor Basis Kernels
16 //------------------------------------------------------------------------------
17
18 //------------------------------------------------------------------------------
19 // Interp
20 //------------------------------------------------------------------------------
Interp(const CeedInt num_elem,const CeedScalar * __restrict__ d_B,const CeedScalar * __restrict__ d_U,CeedScalar * __restrict__ d_V)21 extern "C" __global__ void Interp(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U,
22 CeedScalar *__restrict__ d_V) {
23 for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
24 Contract<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * num_elem, BASIS_Q * num_elem,
25 BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V);
26 }
27 }
28
InterpTranspose(const CeedInt num_elem,const CeedScalar * __restrict__ d_B,const CeedScalar * __restrict__ d_U,CeedScalar * __restrict__ d_V)29 extern "C" __global__ void InterpTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U,
30 CeedScalar *__restrict__ d_V) {
31 for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
32 ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_elem, BASIS_P * num_elem,
33 BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V);
34 }
35 }
36
37 //------------------------------------------------------------------------------
38 // Deriv
39 //------------------------------------------------------------------------------
Deriv(const CeedInt num_elem,const CeedScalar * __restrict__ d_B,const CeedScalar * __restrict__ d_U,CeedScalar * __restrict__ d_V)40 extern "C" __global__ void Deriv(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U,
41 CeedScalar *__restrict__ d_V) {
42 for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
43 Contract<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * num_elem, BASIS_Q * num_elem,
44 BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V);
45 }
46 }
47
DerivTranspose(const CeedInt num_elem,const CeedScalar * __restrict__ d_B,const CeedScalar * __restrict__ d_U,CeedScalar * __restrict__ d_V)48 extern "C" __global__ void DerivTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U,
49 CeedScalar *__restrict__ d_V) {
50 for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
51 ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_elem, BASIS_P * num_elem,
52 BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V);
53 }
54 }
55
56 //------------------------------------------------------------------------------
57 // Weight
58 //------------------------------------------------------------------------------
Weight(const CeedInt num_elem,const CeedScalar * __restrict__ q_weight,CeedScalar * __restrict__ d_V)59 extern "C" __global__ void Weight(const CeedInt num_elem, const CeedScalar *__restrict__ q_weight, CeedScalar *__restrict__ d_V) {
60 const CeedInt t_id = threadIdx.x;
61 // TODO load q_weight in shared memory if blockDim.z > 1?
62
63 for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
64 d_V[elem * BASIS_Q + t_id] = q_weight[t_id];
65 }
66 }
67