xref: /libCEED/include/ceed/jit-source/cuda/cuda-ref-basis-nontensor.h (revision d075f50ba6d3b1e38c233860adb1de6c814f0afc) !
1a0154adeSJed Brown // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2a0154adeSJed Brown // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3a0154adeSJed Brown //
4a0154adeSJed Brown // SPDX-License-Identifier: BSD-2-Clause
5a0154adeSJed Brown //
6a0154adeSJed Brown // This file is part of CEED:  http://github.com/ceed
7a0154adeSJed Brown 
8b2165e7aSSebastian Grimberg /// @file
9b2165e7aSSebastian Grimberg /// Internal header for CUDA non-tensor product basis
1094b7b29bSJeremy L Thompson #ifndef CEED_CUDA_REF_BASIS_NONTENSOR_H
1194b7b29bSJeremy L Thompson #define CEED_CUDA_REF_BASIS_NONTENSOR_H
12b2165e7aSSebastian Grimberg 
13c9c2c079SJeremy L Thompson #include <ceed.h>
14a0154adeSJed Brown 
15*d075f50bSSebastian Grimberg #include "cuda-ref-basis-nontensor-templates.h"
16*d075f50bSSebastian Grimberg 
17a0154adeSJed Brown //------------------------------------------------------------------------------
18a0154adeSJed Brown // Non-Tensor Basis Kernels
19a0154adeSJed Brown //------------------------------------------------------------------------------
20a0154adeSJed Brown 
21a0154adeSJed Brown //------------------------------------------------------------------------------
22a0154adeSJed Brown // Interp
23a0154adeSJed Brown //------------------------------------------------------------------------------
24*d075f50bSSebastian Grimberg extern "C" __global__ void Interp(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U,
25a0154adeSJed Brown                                   CeedScalar *__restrict__ d_V) {
262b730f8bSJeremy L Thompson   for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
27*d075f50bSSebastian Grimberg     Contract<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * num_elem, BASIS_Q * num_elem,
28*d075f50bSSebastian Grimberg                                                                     BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V);
29a0154adeSJed Brown   }
30a0154adeSJed Brown }
31*d075f50bSSebastian Grimberg 
32*d075f50bSSebastian Grimberg extern "C" __global__ void InterpTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U,
33*d075f50bSSebastian Grimberg                                            CeedScalar *__restrict__ d_V) {
34*d075f50bSSebastian Grimberg   for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
35*d075f50bSSebastian Grimberg     ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_elem, BASIS_P * num_elem,
36*d075f50bSSebastian Grimberg                                                                              BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V);
37a0154adeSJed Brown   }
38a0154adeSJed Brown }
39a0154adeSJed Brown 
40a0154adeSJed Brown //------------------------------------------------------------------------------
41*d075f50bSSebastian Grimberg // Deriv
42a0154adeSJed Brown //------------------------------------------------------------------------------
43*d075f50bSSebastian Grimberg extern "C" __global__ void Deriv(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U,
44a0154adeSJed Brown                                  CeedScalar *__restrict__ d_V) {
452b730f8bSJeremy L Thompson   for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
46*d075f50bSSebastian Grimberg     Contract<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * num_elem, BASIS_Q * num_elem,
47*d075f50bSSebastian Grimberg                                                                    BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V);
48*d075f50bSSebastian Grimberg   }
49a0154adeSJed Brown }
50a0154adeSJed Brown 
51*d075f50bSSebastian Grimberg extern "C" __global__ void DerivTranspose(const CeedInt num_elem, const CeedScalar *__restrict__ d_B, const CeedScalar *__restrict__ d_U,
52*d075f50bSSebastian Grimberg                                           CeedScalar *__restrict__ d_V) {
53*d075f50bSSebastian Grimberg   for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
54*d075f50bSSebastian Grimberg     ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BASIS_Q * num_elem, BASIS_P * num_elem,
55*d075f50bSSebastian Grimberg                                                                             BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V);
56a0154adeSJed Brown   }
57a0154adeSJed Brown }
58a0154adeSJed Brown 
59a0154adeSJed Brown //------------------------------------------------------------------------------
60a0154adeSJed Brown // Weight
61a0154adeSJed Brown //------------------------------------------------------------------------------
622b730f8bSJeremy L Thompson extern "C" __global__ void Weight(const CeedInt num_elem, const CeedScalar *__restrict__ q_weight, CeedScalar *__restrict__ d_V) {
63a0154adeSJed Brown   const CeedInt t_id = threadIdx.x;
64a0154adeSJed Brown   // TODO load q_weight in shared memory if blockDim.z > 1?
65*d075f50bSSebastian Grimberg 
662b730f8bSJeremy L Thompson   for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * blockDim.z) {
67a0154adeSJed Brown     d_V[elem * BASIS_Q + t_id] = q_weight[t_id];
68a0154adeSJed Brown   }
69a0154adeSJed Brown }
70a0154adeSJed Brown 
71a0154adeSJed Brown //------------------------------------------------------------------------------
72b2165e7aSSebastian Grimberg 
7394b7b29bSJeremy L Thompson #endif  // CEED_CUDA_REF_BASIS_NONTENSOR_H
74