1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED: http://github.com/ceed
7
8 /// @file
9 /// Internal header for CUDA shared memory non-tensor basis templates
10 #include <ceed/types.h>
11
12 //------------------------------------------------------------------------------
13 // 1D tensor contraction
14 //------------------------------------------------------------------------------
15 template <int NUM_COMP, int P_1D, int Q_1D>
Contract1d(SharedData_Cuda & data,const CeedScalar * U,const CeedScalar * B,CeedScalar * V)16 inline __device__ void Contract1d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) {
17 data.slice[data.t_id_x] = *U;
18 __syncthreads();
19 *V = 0.0;
20 if (data.t_id_x < Q_1D) {
21 for (CeedInt i = 0; i < P_1D; i++) {
22 *V += B[i + data.t_id_x * P_1D] * data.slice[i]; // Contract x direction
23 }
24 }
25 __syncthreads();
26 }
27
28 //------------------------------------------------------------------------------
29 // 1D transpose tensor contraction
30 //------------------------------------------------------------------------------
31 template <int NUM_COMP, int P_1D, int Q_1D>
ContractTranspose1d(SharedData_Cuda & data,const CeedScalar * U,const CeedScalar * B,CeedScalar * V)32 inline __device__ void ContractTranspose1d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) {
33 data.slice[data.t_id_x] = *U;
34 __syncthreads();
35 if (data.t_id_x < P_1D) {
36 for (CeedInt i = 0; i < Q_1D; i++) {
37 *V += B[data.t_id_x + i * P_1D] * data.slice[i]; // Contract x direction
38 }
39 }
40 __syncthreads();
41 }
42
43 //------------------------------------------------------------------------------
44 // Interpolate to quadrature points
45 //------------------------------------------------------------------------------
46 template <int NUM_COMP, int P, int Q, int T_1D>
InterpNonTensor(SharedData_Cuda & data,const CeedScalar * __restrict__ r_U,const CeedScalar * c_B,CeedScalar * __restrict__ r_V)47 inline __device__ void InterpNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B,
48 CeedScalar *__restrict__ r_V) {
49 for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
50 Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]);
51 }
52 }
53
54 //------------------------------------------------------------------------------
55 // Interpolate transpose
56 //------------------------------------------------------------------------------
57 template <int NUM_COMP, int P, int Q, int T_1D>
InterpTransposeNonTensor(SharedData_Cuda & data,const CeedScalar * __restrict__ r_U,const CeedScalar * c_B,CeedScalar * __restrict__ r_V)58 inline __device__ void InterpTransposeNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B,
59 CeedScalar *__restrict__ r_V) {
60 for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
61 r_V[comp] = 0.0;
62 ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]);
63 }
64 }
65
66 //------------------------------------------------------------------------------
67 // Derivatives at quadrature points
68 //------------------------------------------------------------------------------
69 template <int NUM_COMP, int DIM, int P, int Q, int T_1D>
GradNonTensor(SharedData_Cuda & data,const CeedScalar * __restrict__ r_U,const CeedScalar * c_G,CeedScalar * __restrict__ r_V)70 inline __device__ void GradNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G, CeedScalar *__restrict__ r_V) {
71 for (CeedInt dim = 0; dim < DIM; dim++) {
72 for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
73 Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], &c_G[dim * P * Q], &r_V[comp + dim * NUM_COMP]);
74 }
75 }
76 }
77
78 //------------------------------------------------------------------------------
79 // Derivatives transpose
80 //------------------------------------------------------------------------------
81 template <int NUM_COMP, int DIM, int P, int Q, int T_1D>
GradTransposeNonTensor(SharedData_Cuda & data,const CeedScalar * __restrict__ r_U,const CeedScalar * c_G,CeedScalar * __restrict__ r_V)82 inline __device__ void GradTransposeNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G,
83 CeedScalar *__restrict__ r_V) {
84 for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_V[comp] = 0.0;
85 for (CeedInt dim = 0; dim < DIM; dim++) {
86 for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
87 ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp + dim * NUM_COMP], &c_G[dim * P * Q], &r_V[comp]);
88 }
89 }
90 }
91
92 //------------------------------------------------------------------------------
93 // Quadrature weights
94 //------------------------------------------------------------------------------
95 template <int P, int Q>
WeightNonTensor(SharedData_Cuda & data,const CeedScalar * __restrict__ q_weight,CeedScalar * w)96 inline __device__ void WeightNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ q_weight, CeedScalar *w) {
97 *w = (data.t_id_x < Q) ? q_weight[data.t_id_x] : 0.0;
98 }
99