xref: /libCEED/include/ceed/jit-source/cuda/cuda-shared-basis-nontensor-templates.h (revision 9ff05d55386b4e6413be60b7231511258906fd9f)
1*9ff05d55SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
2*9ff05d55SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3*9ff05d55SJeremy L Thompson //
4*9ff05d55SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
5*9ff05d55SJeremy L Thompson //
6*9ff05d55SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
7*9ff05d55SJeremy L Thompson 
8*9ff05d55SJeremy L Thompson /// @file
9*9ff05d55SJeremy L Thompson /// Internal header for CUDA shared memory non-tensor product basis templates
10*9ff05d55SJeremy L Thompson #include <ceed/types.h>
11*9ff05d55SJeremy L Thompson 
12*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
13*9ff05d55SJeremy L Thompson // 1D tensor contraction
14*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
15*9ff05d55SJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D>
16*9ff05d55SJeremy L Thompson inline __device__ void Contract1d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) {
17*9ff05d55SJeremy L Thompson   data.slice[data.t_id_x] = *U;
18*9ff05d55SJeremy L Thompson   __syncthreads();
19*9ff05d55SJeremy L Thompson   *V = 0.0;
20*9ff05d55SJeremy L Thompson   if (data.t_id_x < Q_1D) {
21*9ff05d55SJeremy L Thompson     for (CeedInt i = 0; i < P_1D; i++) {
22*9ff05d55SJeremy L Thompson       *V += B[i + data.t_id_x * P_1D] * data.slice[i];  // Contract x direction
23*9ff05d55SJeremy L Thompson     }
24*9ff05d55SJeremy L Thompson   }
25*9ff05d55SJeremy L Thompson   __syncthreads();
26*9ff05d55SJeremy L Thompson }
27*9ff05d55SJeremy L Thompson 
28*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
29*9ff05d55SJeremy L Thompson // 1D transpose tensor contraction
30*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
31*9ff05d55SJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D>
32*9ff05d55SJeremy L Thompson inline __device__ void ContractTranspose1d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) {
33*9ff05d55SJeremy L Thompson   data.slice[data.t_id_x] = *U;
34*9ff05d55SJeremy L Thompson   __syncthreads();
35*9ff05d55SJeremy L Thompson   if (data.t_id_x < P_1D) {
36*9ff05d55SJeremy L Thompson     for (CeedInt i = 0; i < Q_1D; i++) {
37*9ff05d55SJeremy L Thompson       *V += B[data.t_id_x + i * P_1D] * data.slice[i];  // Contract x direction
38*9ff05d55SJeremy L Thompson     }
39*9ff05d55SJeremy L Thompson   }
40*9ff05d55SJeremy L Thompson   __syncthreads();
41*9ff05d55SJeremy L Thompson }
42*9ff05d55SJeremy L Thompson 
43*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
44*9ff05d55SJeremy L Thompson // Interpolate to quadrature points
45*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
46*9ff05d55SJeremy L Thompson template <int NUM_COMP, int P, int Q>
47*9ff05d55SJeremy L Thompson inline __device__ void InterpNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B,
48*9ff05d55SJeremy L Thompson                                        CeedScalar *__restrict__ r_V) {
49*9ff05d55SJeremy L Thompson   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
50*9ff05d55SJeremy L Thompson     Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]);
51*9ff05d55SJeremy L Thompson   }
52*9ff05d55SJeremy L Thompson }
53*9ff05d55SJeremy L Thompson 
54*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
55*9ff05d55SJeremy L Thompson // Interpolate transpose
56*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
57*9ff05d55SJeremy L Thompson template <int NUM_COMP, int P, int Q>
58*9ff05d55SJeremy L Thompson inline __device__ void InterpTransposeNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B,
59*9ff05d55SJeremy L Thompson                                                 CeedScalar *__restrict__ r_V) {
60*9ff05d55SJeremy L Thompson   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
61*9ff05d55SJeremy L Thompson     r_V[comp] = 0.0;
62*9ff05d55SJeremy L Thompson     ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]);
63*9ff05d55SJeremy L Thompson   }
64*9ff05d55SJeremy L Thompson }
65*9ff05d55SJeremy L Thompson 
66*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
67*9ff05d55SJeremy L Thompson // Derivatives at quadrature points
68*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
69*9ff05d55SJeremy L Thompson template <int NUM_COMP, int DIM, int P, int Q>
70*9ff05d55SJeremy L Thompson inline __device__ void GradNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G, CeedScalar *__restrict__ r_V) {
71*9ff05d55SJeremy L Thompson   for (CeedInt dim = 0; dim < DIM; dim++) {
72*9ff05d55SJeremy L Thompson     for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
73*9ff05d55SJeremy L Thompson       Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], &c_G[dim * P * Q], &r_V[comp + dim * NUM_COMP]);
74*9ff05d55SJeremy L Thompson     }
75*9ff05d55SJeremy L Thompson   }
76*9ff05d55SJeremy L Thompson }
77*9ff05d55SJeremy L Thompson 
78*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
79*9ff05d55SJeremy L Thompson // Derivatives transpose
80*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
81*9ff05d55SJeremy L Thompson template <int NUM_COMP, int DIM, int P, int Q>
82*9ff05d55SJeremy L Thompson inline __device__ void GradTransposeNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G,
83*9ff05d55SJeremy L Thompson                                               CeedScalar *__restrict__ r_V) {
84*9ff05d55SJeremy L Thompson   for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_V[comp] = 0.0;
85*9ff05d55SJeremy L Thompson   for (CeedInt dim = 0; dim < DIM; dim++) {
86*9ff05d55SJeremy L Thompson     for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
87*9ff05d55SJeremy L Thompson       ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp + dim * NUM_COMP], &c_G[dim * P * Q], &r_V[comp]);
88*9ff05d55SJeremy L Thompson     }
89*9ff05d55SJeremy L Thompson   }
90*9ff05d55SJeremy L Thompson }
91*9ff05d55SJeremy L Thompson 
92*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
93*9ff05d55SJeremy L Thompson // Quadrature weights
94*9ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
95*9ff05d55SJeremy L Thompson template <int Q>
96*9ff05d55SJeremy L Thompson inline __device__ void WeightNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ q_weight, CeedScalar *w) {
97*9ff05d55SJeremy L Thompson   *w = (data.t_id_x < Q) ? q_weight[data.t_id_x] : 0.0;
98*9ff05d55SJeremy L Thompson }
99