1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 /// @file 9 /// Internal header for HIP curl-oriented element restriction kernels 10 #ifndef CEED_HIP_REF_RESTRICTION_CURL_ORIENTED_H 11 #define CEED_HIP_REF_RESTRICTION_CURL_ORIENTED_H 12 13 #include <ceed.h> 14 15 //------------------------------------------------------------------------------ 16 // L-vector -> E-vector, curl-oriented 17 //------------------------------------------------------------------------------ 18 extern "C" __global__ void CurlOrientedNoTranspose(const CeedInt *__restrict__ indices, const CeedInt8 *__restrict__ curl_orients, 19 const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) { 20 for (CeedInt node = blockIdx.x * blockDim.x + threadIdx.x; node < RSTR_NUM_ELEM * RSTR_ELEM_SIZE; node += blockDim.x * gridDim.x) { 21 const CeedInt loc_node = node % RSTR_ELEM_SIZE; 22 const CeedInt elem = node / RSTR_ELEM_SIZE; 23 const CeedInt ind_dl = loc_node > 0 ? indices[node - 1] : 0; 24 const CeedInt ind_d = indices[node]; 25 const CeedInt ind_du = loc_node < (RSTR_ELEM_SIZE - 1) ? indices[node + 1] : 0; 26 const CeedInt8 curl_orient_dl = curl_orients[3 * node + 0]; 27 const CeedInt8 curl_orient_d = curl_orients[3 * node + 1]; 28 const CeedInt8 curl_orient_du = curl_orients[3 * node + 2]; 29 30 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { 31 CeedScalar value = 0.0; 32 value += loc_node > 0 ? u[ind_dl + comp * RSTR_COMP_STRIDE] * curl_orient_dl : 0.0; 33 value += u[ind_d + comp * RSTR_COMP_STRIDE] * curl_orient_d; 34 value += loc_node < (RSTR_ELEM_SIZE - 1) ? u[ind_du + comp * RSTR_COMP_STRIDE] * curl_orient_du : 0.0; 35 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value; 36 } 37 } 38 } 39 40 //------------------------------------------------------------------------------ 41 // L-vector -> E-vector, unsigned curl-oriented 42 //------------------------------------------------------------------------------ 43 extern "C" __global__ void CurlOrientedUnsignedNoTranspose(const CeedInt *__restrict__ indices, const CeedInt8 *__restrict__ curl_orients, 44 const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) { 45 for (CeedInt node = blockIdx.x * blockDim.x + threadIdx.x; node < RSTR_NUM_ELEM * RSTR_ELEM_SIZE; node += blockDim.x * gridDim.x) { 46 const CeedInt loc_node = node % RSTR_ELEM_SIZE; 47 const CeedInt elem = node / RSTR_ELEM_SIZE; 48 const CeedInt ind_dl = loc_node > 0 ? indices[node - 1] : 0; 49 const CeedInt ind_d = indices[node]; 50 const CeedInt ind_du = loc_node < (RSTR_ELEM_SIZE - 1) ? indices[node + 1] : 0; 51 const CeedInt8 curl_orient_dl = abs(curl_orients[3 * node + 0]); 52 const CeedInt8 curl_orient_d = abs(curl_orients[3 * node + 1]); 53 const CeedInt8 curl_orient_du = abs(curl_orients[3 * node + 2]); 54 55 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { 56 CeedScalar value = 0.0; 57 value += loc_node > 0 ? u[ind_dl + comp * RSTR_COMP_STRIDE] * curl_orient_dl : 0.0; 58 value += u[ind_d + comp * RSTR_COMP_STRIDE] * curl_orient_d; 59 value += loc_node < (RSTR_ELEM_SIZE - 1) ? u[ind_du + comp * RSTR_COMP_STRIDE] * curl_orient_du : 0.0; 60 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value; 61 } 62 } 63 } 64 65 //------------------------------------------------------------------------------ 66 // E-vector -> L-vector, curl-oriented 67 //------------------------------------------------------------------------------ 68 #if !USE_DETERMINISTIC 69 extern "C" __global__ void CurlOrientedTranspose(const CeedInt *__restrict__ indices, const CeedInt8 *__restrict__ curl_orients, 70 const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) { 71 for (CeedInt node = blockIdx.x * blockDim.x + threadIdx.x; node < RSTR_NUM_ELEM * RSTR_ELEM_SIZE; node += blockDim.x * gridDim.x) { 72 const CeedInt ind = indices[node]; 73 const CeedInt loc_node = node % RSTR_ELEM_SIZE; 74 const CeedInt elem = node / RSTR_ELEM_SIZE; 75 const CeedInt8 curl_orient_du = loc_node > 0 ? curl_orients[3 * node - 1] : 0.0; 76 const CeedInt8 curl_orient_d = curl_orients[3 * node + 1]; 77 const CeedInt8 curl_orient_dl = loc_node < (RSTR_ELEM_SIZE - 1) ? curl_orients[3 * node + 3] : 0.0; 78 79 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { 80 CeedScalar value = 0.0; 81 value += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_du : 0.0; 82 value += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_d; 83 value += 84 loc_node < (RSTR_ELEM_SIZE - 1) ? u[loc_node + 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_dl : 0.0; 85 atomicAdd(v + ind + comp * RSTR_COMP_STRIDE, value); 86 } 87 } 88 } 89 #else 90 extern "C" __global__ void CurlOrientedTranspose(const CeedInt *__restrict__ l_vec_indices, const CeedInt *__restrict__ t_indices, 91 const CeedInt *__restrict__ t_offsets, const CeedInt8 *__restrict__ curl_orients, 92 const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) { 93 CeedScalar value[RSTR_NUM_COMP]; 94 95 for (CeedInt i = blockIdx.x * blockDim.x + threadIdx.x; i < RSTR_NUM_NODES; i += blockDim.x * gridDim.x) { 96 const CeedInt ind = l_vec_indices[i]; 97 const CeedInt range_1 = t_offsets[i]; 98 const CeedInt range_N = t_offsets[i + 1]; 99 100 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) value[comp] = 0.0; 101 102 for (CeedInt j = range_1; j < range_N; j++) { 103 const CeedInt t_ind = t_indices[j]; 104 const CeedInt loc_node = t_ind % RSTR_ELEM_SIZE; 105 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; 106 const CeedInt8 curl_orient_du = loc_node > 0 ? curl_orients[3 * t_ind - 1] : 0.0; 107 const CeedInt8 curl_orient_d = curl_orients[3 * t_ind + 1]; 108 const CeedInt8 curl_orient_dl = loc_node < (RSTR_ELEM_SIZE - 1) ? curl_orients[3 * t_ind + 3] : 0.0; 109 110 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { 111 value[comp] += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_du : 0.0; 112 value[comp] += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_d; 113 value[comp] += 114 loc_node < (RSTR_ELEM_SIZE - 1) ? u[loc_node + 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_dl : 0.0; 115 } 116 } 117 118 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) v[ind + comp * RSTR_COMP_STRIDE] += value[comp]; 119 } 120 } 121 #endif 122 123 //------------------------------------------------------------------------------ 124 // E-vector -> L-vector, unsigned curl-oriented 125 //------------------------------------------------------------------------------ 126 #if !USE_DETERMINISTIC 127 extern "C" __global__ void CurlOrientedUnsignedTranspose(const CeedInt *__restrict__ indices, const CeedInt8 *__restrict__ curl_orients, 128 const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) { 129 for (CeedInt node = blockIdx.x * blockDim.x + threadIdx.x; node < RSTR_NUM_ELEM * RSTR_ELEM_SIZE; node += blockDim.x * gridDim.x) { 130 const CeedInt loc_node = node % RSTR_ELEM_SIZE; 131 const CeedInt elem = node / RSTR_ELEM_SIZE; 132 const CeedInt ind = indices[node]; 133 const CeedInt8 curl_orient_du = loc_node > 0 ? abs(curl_orients[3 * node - 1]) : 0.0; 134 const CeedInt8 curl_orient_d = abs(curl_orients[3 * node + 1]); 135 const CeedInt8 curl_orient_dl = loc_node < (RSTR_ELEM_SIZE - 1) ? abs(curl_orients[3 * node + 3]) : 0.0; 136 137 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { 138 CeedScalar value = 0.0; 139 value += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_du : 0.0; 140 value += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_d; 141 value += 142 loc_node < (RSTR_ELEM_SIZE - 1) ? u[loc_node + 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_dl : 0.0; 143 atomicAdd(v + ind + comp * RSTR_COMP_STRIDE, value); 144 } 145 } 146 } 147 #else 148 extern "C" __global__ void CurlOrientedUnsignedTranspose(const CeedInt *__restrict__ l_vec_indices, const CeedInt *__restrict__ t_indices, 149 const CeedInt *__restrict__ t_offsets, const CeedInt8 *__restrict__ curl_orients, 150 const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) { 151 CeedScalar value[RSTR_NUM_COMP]; 152 153 for (CeedInt i = blockIdx.x * blockDim.x + threadIdx.x; i < RSTR_NUM_NODES; i += blockDim.x * gridDim.x) { 154 const CeedInt ind = l_vec_indices[i]; 155 const CeedInt range_1 = t_offsets[i]; 156 const CeedInt range_N = t_offsets[i + 1]; 157 158 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) value[comp] = 0.0; 159 160 for (CeedInt j = range_1; j < range_N; j++) { 161 const CeedInt t_ind = t_indices[j]; 162 const CeedInt loc_node = t_ind % RSTR_ELEM_SIZE; 163 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; 164 const CeedInt8 curl_orient_du = loc_node > 0 ? abs(curl_orients[3 * t_ind - 1]) : 0.0; 165 const CeedInt8 curl_orient_d = abs(curl_orients[3 * t_ind + 1]); 166 const CeedInt8 curl_orient_dl = loc_node < (RSTR_ELEM_SIZE - 1) ? abs(curl_orients[3 * t_ind + 3]) : 0.0; 167 168 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { 169 value[comp] += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_du : 0.0; 170 value[comp] += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_d; 171 value[comp] += 172 loc_node < (RSTR_ELEM_SIZE - 1) ? u[loc_node + 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_dl : 0.0; 173 } 174 } 175 176 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) v[ind + comp * RSTR_COMP_STRIDE] += value[comp]; 177 } 178 } 179 #endif 180 181 //------------------------------------------------------------------------------ 182 183 #endif // CEED_HIP_REF_RESTRICTION_CURL_ORIENTED_H 184