xref: /libCEED/include/ceed/jit-source/cuda/cuda-shared-basis-read-write-templates.h (revision e03fef56705b317edc4a39dfee40c8366660a6d6)
1 // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 /// @file
9 /// Internal header for CUDA shared memory basis read/write templates
10 
11 #include <ceed.h>
12 
13 //------------------------------------------------------------------------------
14 // 1D
15 //------------------------------------------------------------------------------
16 
17 //------------------------------------------------------------------------------
18 // E-vector -> single element
19 //------------------------------------------------------------------------------
20 template <int NUM_COMP, int P_1D>
21 inline __device__ void ReadElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp,
22                                             const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) {
23   if (data.t_id_x < P_1D) {
24     const CeedInt node = data.t_id_x;
25     const CeedInt ind  = node * strides_node + elem * strides_elem;
26 
27     for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
28       r_u[comp] = d_u[ind + comp * strides_comp];
29     }
30   }
31 }
32 
33 //------------------------------------------------------------------------------
34 // Single element -> E-vector
35 //------------------------------------------------------------------------------
36 template <int NUM_COMP, int P_1D>
37 inline __device__ void WriteElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp,
38                                              const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) {
39   if (data.t_id_x < P_1D) {
40     const CeedInt node = data.t_id_x;
41     const CeedInt ind  = node * strides_node + elem * strides_elem;
42 
43     for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
44       d_v[ind + comp * strides_comp] = r_v[comp];
45     }
46   }
47 }
48 
49 template <int NUM_COMP, int P_1D>
50 inline __device__ void SumElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp,
51                                            const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) {
52   if (data.t_id_x < P_1D) {
53     const CeedInt node = data.t_id_x;
54     const CeedInt ind  = node * strides_node + elem * strides_elem;
55 
56     for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
57       d_v[ind + comp * strides_comp] += r_v[comp];
58     }
59   }
60 }
61 
62 //------------------------------------------------------------------------------
63 // 2D
64 //------------------------------------------------------------------------------
65 
66 //------------------------------------------------------------------------------
67 // E-vector -> single element
68 //------------------------------------------------------------------------------
69 template <int NUM_COMP, int P_1D>
70 inline __device__ void ReadElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp,
71                                             const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) {
72   if (data.t_id_x < P_1D && data.t_id_y < P_1D) {
73     const CeedInt node = data.t_id_x + data.t_id_y * P_1D;
74     const CeedInt ind  = node * strides_node + elem * strides_elem;
75 
76     for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
77       r_u[comp] = d_u[ind + comp * strides_comp];
78     }
79   }
80 }
81 
82 //------------------------------------------------------------------------------
83 // Single element -> E-vector
84 //------------------------------------------------------------------------------
85 template <int NUM_COMP, int P_1D>
86 inline __device__ void WriteElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp,
87                                              const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) {
88   if (data.t_id_x < P_1D && data.t_id_y < P_1D) {
89     const CeedInt node = data.t_id_x + data.t_id_y * P_1D;
90     const CeedInt ind  = node * strides_node + elem * strides_elem;
91 
92     for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
93       d_v[ind + comp * strides_comp] = r_v[comp];
94     }
95   }
96 }
97 
98 template <int NUM_COMP, int P_1D>
99 inline __device__ void SumElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp,
100                                            const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) {
101   if (data.t_id_x < P_1D && data.t_id_y < P_1D) {
102     const CeedInt node = data.t_id_x + data.t_id_y * P_1D;
103     const CeedInt ind  = node * strides_node + elem * strides_elem;
104 
105     for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
106       d_v[ind + comp * strides_comp] += r_v[comp];
107     }
108   }
109 }
110 
111 //------------------------------------------------------------------------------
112 // 3D
113 //------------------------------------------------------------------------------
114 
115 //------------------------------------------------------------------------------
116 // E-vector -> single element
117 //------------------------------------------------------------------------------
118 template <int NUM_COMP, int P_1D>
119 inline __device__ void ReadElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp,
120                                             const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) {
121   if (data.t_id_x < P_1D && data.t_id_y < P_1D) {
122     for (CeedInt z = 0; z < P_1D; z++) {
123       const CeedInt node = data.t_id_x + data.t_id_y * P_1D + z * P_1D * P_1D;
124       const CeedInt ind  = node * strides_node + elem * strides_elem;
125 
126       for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
127         r_u[z + comp * P_1D] = d_u[ind + comp * strides_comp];
128       }
129     }
130   }
131 }
132 
133 //------------------------------------------------------------------------------
134 // Single element -> E-vector
135 //------------------------------------------------------------------------------
136 template <int NUM_COMP, int P_1D>
137 inline __device__ void WriteElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp,
138                                              const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) {
139   if (data.t_id_x < P_1D && data.t_id_y < P_1D) {
140     for (CeedInt z = 0; z < P_1D; z++) {
141       const CeedInt node = data.t_id_x + data.t_id_y * P_1D + z * P_1D * P_1D;
142       const CeedInt ind  = node * strides_node + elem * strides_elem;
143 
144       for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
145         d_v[ind + comp * strides_comp] = r_v[z + comp * P_1D];
146       }
147     }
148   }
149 }
150 
151 template <int NUM_COMP, int P_1D>
152 inline __device__ void SumElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp,
153                                            const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) {
154   if (data.t_id_x < P_1D && data.t_id_y < P_1D) {
155     for (CeedInt z = 0; z < P_1D; z++) {
156       const CeedInt node = data.t_id_x + data.t_id_y * P_1D + z * P_1D * P_1D;
157       const CeedInt ind  = node * strides_node + elem * strides_elem;
158 
159       for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
160         d_v[ind + comp * strides_comp] += r_v[z + comp * P_1D];
161       }
162     }
163   }
164 }
165