xref: /libCEED/include/ceed/jit-source/hip/hip-shared-basis-nontensor-templates.h (revision 1f6c24fecef91b56ee93b512ed53da4b2ab340d3)
1 // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 /// @file
9 /// Internal header for HIP shared memory non-tensor basis templates
10 #include <ceed/types.h>
11 
12 //------------------------------------------------------------------------------
13 // 1D tensor contraction
14 //------------------------------------------------------------------------------
15 template <int NUM_COMP, int P_1D, int Q_1D>
16 inline __device__ void Contract1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) {
17   data.slice[data.t_id_x] = *U;
18   __syncthreads();
19   *V = 0.0;
20   if (data.t_id_x < Q_1D) {
21     for (CeedInt i = 0; i < P_1D; i++) {
22       *V += B[i + data.t_id_x * P_1D] * data.slice[i];  // Contract x direction
23     }
24   }
25   __syncthreads();
26 }
27 
28 //------------------------------------------------------------------------------
29 // 1D transpose tensor contraction
30 //------------------------------------------------------------------------------
31 template <int NUM_COMP, int P_1D, int Q_1D>
32 inline __device__ void ContractTranspose1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) {
33   data.slice[data.t_id_x] = *U;
34   __syncthreads();
35   if (data.t_id_x < P_1D) {
36     for (CeedInt i = 0; i < Q_1D; i++) {
37       *V += B[data.t_id_x + i * P_1D] * data.slice[i];  // Contract x direction
38     }
39   }
40   __syncthreads();
41 }
42 
43 //------------------------------------------------------------------------------
44 // Interpolate to quadrature points
45 //------------------------------------------------------------------------------
46 template <int NUM_COMP, int P, int Q>
47 inline __device__ void Interp1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, CeedScalar *__restrict__ r_V) {
48   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
49     Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]);
50   }
51 }
52 
53 //------------------------------------------------------------------------------
54 // Interpolate transpose
55 //------------------------------------------------------------------------------
56 template <int NUM_COMP, int P, int Q>
57 inline __device__ void InterpTranspose1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B,
58                                          CeedScalar *__restrict__ r_V) {
59   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
60     r_V[comp] = 0.0;
61     ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]);
62   }
63 }
64 
65 //------------------------------------------------------------------------------
66 // Derivatives at quadrature points
67 //------------------------------------------------------------------------------
68 template <int NUM_COMP, int P, int Q>
69 inline __device__ void Grad1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G, CeedScalar *__restrict__ r_V) {
70   for (CeedInt dim = 0; dim < DIM; dim++) {
71     for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
72       Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], &c_G[dim * P * Q], &r_V[comp + dim * NUM_COMP]);
73     }
74   }
75 }
76 
77 //------------------------------------------------------------------------------
78 // Derivatives transpose
79 //------------------------------------------------------------------------------
80 template <int NUM_COMP, int P, int Q>
81 inline __device__ void GradTranspose1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G,
82                                        CeedScalar *__restrict__ r_V) {
83   for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_V[comp] = 0.0;
84   for (CeedInt dim = 0; dim < DIM; dim++) {
85     for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
86       ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp + dim * NUM_COMP], &c_G[dim * P * Q], &r_V[comp]);
87     }
88   }
89 }
90 
91 //------------------------------------------------------------------------------
92 // Quadrature weights
93 //------------------------------------------------------------------------------
94 template <int Q>
95 inline __device__ void Weight1d(SharedData_Hip &data, const CeedScalar *__restrict__ q_weight_1d, CeedScalar *w) {
96   *w = (data.t_id_x < Q) ? q_weight_1d[data.t_id_x] : 0.0;
97 }
98