xref: /libCEED/include/ceed/jit-source/hip/hip-ref-restriction-curl-oriented.h (revision 58f118fde9e934489c18d94d567a96d887adab93)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 /// @file
9 /// Internal header for HIP curl-oriented element restriction kernels
10 #ifndef CEED_HIP_REF_RESTRICTION_CURL_ORIENTED_H
11 #define CEED_HIP_REF_RESTRICTION_CURL_ORIENTED_H
12 
13 #include <ceed.h>
14 
15 //------------------------------------------------------------------------------
16 // L-vector -> E-vector, curl-oriented
17 //------------------------------------------------------------------------------
18 extern "C" __global__ void CurlOrientedNoTranspose(const CeedInt *__restrict__ indices, const CeedInt8 *__restrict__ curl_orients,
19                                                    const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) {
20   for (CeedInt node = blockIdx.x * blockDim.x + threadIdx.x; node < RSTR_NUM_ELEM * RSTR_ELEM_SIZE; node += blockDim.x * gridDim.x) {
21     const CeedInt  loc_node       = node % RSTR_ELEM_SIZE;
22     const CeedInt  elem           = node / RSTR_ELEM_SIZE;
23     const CeedInt  ind_dl         = loc_node > 0 ? indices[node - 1] : 0;
24     const CeedInt  ind_d          = indices[node];
25     const CeedInt  ind_du         = loc_node < (RSTR_ELEM_SIZE - 1) ? indices[node + 1] : 0;
26     const CeedInt8 curl_orient_dl = curl_orients[3 * node + 0];
27     const CeedInt8 curl_orient_d  = curl_orients[3 * node + 1];
28     const CeedInt8 curl_orient_du = curl_orients[3 * node + 2];
29 
30     for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) {
31       CeedScalar value = 0.0;
32       value += loc_node > 0 ? u[ind_dl + comp * RSTR_COMP_STRIDE] * curl_orient_dl : 0.0;
33       value += u[ind_d + comp * RSTR_COMP_STRIDE] * curl_orient_d;
34       value += loc_node < (RSTR_ELEM_SIZE - 1) ? u[ind_du + comp * RSTR_COMP_STRIDE] * curl_orient_du : 0.0;
35       v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value;
36     }
37   }
38 }
39 
40 //------------------------------------------------------------------------------
41 // L-vector -> E-vector, unsigned curl-oriented
42 //------------------------------------------------------------------------------
43 extern "C" __global__ void CurlOrientedUnsignedNoTranspose(const CeedInt *__restrict__ indices, const CeedInt8 *__restrict__ curl_orients,
44                                                            const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) {
45   for (CeedInt node = blockIdx.x * blockDim.x + threadIdx.x; node < RSTR_NUM_ELEM * RSTR_ELEM_SIZE; node += blockDim.x * gridDim.x) {
46     const CeedInt  loc_node       = node % RSTR_ELEM_SIZE;
47     const CeedInt  elem           = node / RSTR_ELEM_SIZE;
48     const CeedInt  ind_dl         = loc_node > 0 ? indices[node - 1] : 0;
49     const CeedInt  ind_d          = indices[node];
50     const CeedInt  ind_du         = loc_node < (RSTR_ELEM_SIZE - 1) ? indices[node + 1] : 0;
51     const CeedInt8 curl_orient_dl = abs(curl_orients[3 * node + 0]);
52     const CeedInt8 curl_orient_d  = abs(curl_orients[3 * node + 1]);
53     const CeedInt8 curl_orient_du = abs(curl_orients[3 * node + 2]);
54 
55     for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) {
56       CeedScalar value = 0.0;
57       value += loc_node > 0 ? u[ind_dl + comp * RSTR_COMP_STRIDE] * curl_orient_dl : 0.0;
58       value += u[ind_d + comp * RSTR_COMP_STRIDE] * curl_orient_d;
59       value += loc_node < (RSTR_ELEM_SIZE - 1) ? u[ind_du + comp * RSTR_COMP_STRIDE] * curl_orient_du : 0.0;
60       v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value;
61     }
62   }
63 }
64 
65 //------------------------------------------------------------------------------
66 // E-vector -> L-vector, curl-oriented
67 //------------------------------------------------------------------------------
68 #if !USE_DETERMINISTIC
69 extern "C" __global__ void CurlOrientedTranspose(const CeedInt *__restrict__ indices, const CeedInt8 *__restrict__ curl_orients,
70                                                  const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) {
71   for (CeedInt node = blockIdx.x * blockDim.x + threadIdx.x; node < RSTR_NUM_ELEM * RSTR_ELEM_SIZE; node += blockDim.x * gridDim.x) {
72     const CeedInt  ind            = indices[node];
73     const CeedInt  loc_node       = node % RSTR_ELEM_SIZE;
74     const CeedInt  elem           = node / RSTR_ELEM_SIZE;
75     const CeedInt8 curl_orient_du = loc_node > 0 ? curl_orients[3 * node - 1] : 0.0;
76     const CeedInt8 curl_orient_d  = curl_orients[3 * node + 1];
77     const CeedInt8 curl_orient_dl = loc_node < (RSTR_ELEM_SIZE - 1) ? curl_orients[3 * node + 3] : 0.0;
78 
79     for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) {
80       CeedScalar value = 0.0;
81       value += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_du : 0.0;
82       value += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_d;
83       value +=
84           loc_node < (RSTR_ELEM_SIZE - 1) ? u[loc_node + 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_dl : 0.0;
85       atomicAdd(v + ind + comp * RSTR_COMP_STRIDE, value);
86     }
87   }
88 }
89 #else
90 extern "C" __global__ void CurlOrientedTranspose(const CeedInt *__restrict__ l_vec_indices, const CeedInt *__restrict__ t_indices,
91                                                  const CeedInt *__restrict__ t_offsets, const CeedInt8 *__restrict__ curl_orients,
92                                                  const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) {
93   CeedScalar value[RSTR_NUM_COMP];
94 
95   for (CeedInt i = blockIdx.x * blockDim.x + threadIdx.x; i < RSTR_NUM_NODES; i += blockDim.x * gridDim.x) {
96     const CeedInt ind     = l_vec_indices[i];
97     const CeedInt range_1 = t_offsets[i];
98     const CeedInt range_N = t_offsets[i + 1];
99 
100     for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) value[comp] = 0.0;
101 
102     for (CeedInt j = range_1; j < range_N; j++) {
103       const CeedInt  t_ind          = t_indices[j];
104       const CeedInt  loc_node       = t_ind % RSTR_ELEM_SIZE;
105       const CeedInt  elem           = t_ind / RSTR_ELEM_SIZE;
106       const CeedInt8 curl_orient_du = loc_node > 0 ? curl_orients[3 * t_ind - 1] : 0.0;
107       const CeedInt8 curl_orient_d  = curl_orients[3 * t_ind + 1];
108       const CeedInt8 curl_orient_dl = loc_node < (RSTR_ELEM_SIZE - 1) ? curl_orients[3 * t_ind + 3] : 0.0;
109 
110       for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) {
111         value[comp] += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_du : 0.0;
112         value[comp] += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_d;
113         value[comp] +=
114             loc_node < (RSTR_ELEM_SIZE - 1) ? u[loc_node + 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_dl : 0.0;
115       }
116     }
117 
118     for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) v[ind + comp * RSTR_COMP_STRIDE] += value[comp];
119   }
120 }
121 #endif
122 
123 //------------------------------------------------------------------------------
124 // E-vector -> L-vector, unsigned curl-oriented
125 //------------------------------------------------------------------------------
126 #if !USE_DETERMINISTIC
127 extern "C" __global__ void CurlOrientedUnsignedTranspose(const CeedInt *__restrict__ indices, const CeedInt8 *__restrict__ curl_orients,
128                                                          const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) {
129   for (CeedInt node = blockIdx.x * blockDim.x + threadIdx.x; node < RSTR_NUM_ELEM * RSTR_ELEM_SIZE; node += blockDim.x * gridDim.x) {
130     const CeedInt  loc_node       = node % RSTR_ELEM_SIZE;
131     const CeedInt  elem           = node / RSTR_ELEM_SIZE;
132     const CeedInt  ind            = indices[node];
133     const CeedInt8 curl_orient_du = loc_node > 0 ? abs(curl_orients[3 * node - 1]) : 0.0;
134     const CeedInt8 curl_orient_d  = abs(curl_orients[3 * node + 1]);
135     const CeedInt8 curl_orient_dl = loc_node < (RSTR_ELEM_SIZE - 1) ? abs(curl_orients[3 * node + 3]) : 0.0;
136 
137     for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) {
138       CeedScalar value = 0.0;
139       value += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_du : 0.0;
140       value += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_d;
141       value +=
142           loc_node < (RSTR_ELEM_SIZE - 1) ? u[loc_node + 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_dl : 0.0;
143       atomicAdd(v + ind + comp * RSTR_COMP_STRIDE, value);
144     }
145   }
146 }
147 #else
148 extern "C" __global__ void CurlOrientedUnsignedTranspose(const CeedInt *__restrict__ l_vec_indices, const CeedInt *__restrict__ t_indices,
149                                                          const CeedInt *__restrict__ t_offsets, const CeedInt8 *__restrict__ curl_orients,
150                                                          const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) {
151   CeedScalar value[RSTR_NUM_COMP];
152 
153   for (CeedInt i = blockIdx.x * blockDim.x + threadIdx.x; i < RSTR_NUM_NODES; i += blockDim.x * gridDim.x) {
154     const CeedInt ind     = l_vec_indices[i];
155     const CeedInt range_1 = t_offsets[i];
156     const CeedInt range_N = t_offsets[i + 1];
157 
158     for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) value[comp] = 0.0;
159 
160     for (CeedInt j = range_1; j < range_N; j++) {
161       const CeedInt  t_ind          = t_indices[j];
162       const CeedInt  loc_node       = t_ind % RSTR_ELEM_SIZE;
163       const CeedInt  elem           = t_ind / RSTR_ELEM_SIZE;
164       const CeedInt8 curl_orient_du = loc_node > 0 ? abs(curl_orients[3 * t_ind - 1]) : 0.0;
165       const CeedInt8 curl_orient_d  = abs(curl_orients[3 * t_ind + 1]);
166       const CeedInt8 curl_orient_dl = loc_node < (RSTR_ELEM_SIZE - 1) ? abs(curl_orients[3 * t_ind + 3]) : 0.0;
167 
168       for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) {
169         value[comp] += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_du : 0.0;
170         value[comp] += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_d;
171         value[comp] +=
172             loc_node < (RSTR_ELEM_SIZE - 1) ? u[loc_node + 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_dl : 0.0;
173       }
174     }
175 
176     for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) v[ind + comp * RSTR_COMP_STRIDE] += value[comp];
177   }
178 }
179 #endif
180 
181 //------------------------------------------------------------------------------
182 
183 #endif  // CEED_HIP_REF_RESTRICTION_CURL_ORIENTED_H
184