1 // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 /// @file 9 /// Internal header for HIP curl-oriented element restriction kernels 10 11 #include <ceed.h> 12 13 //------------------------------------------------------------------------------ 14 // L-vector -> E-vector, curl-oriented 15 //------------------------------------------------------------------------------ 16 extern "C" __global__ void CurlOrientedNoTranspose(const CeedInt *__restrict__ indices, const CeedInt8 *__restrict__ curl_orients, 17 const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) { 18 for (CeedInt node = blockIdx.x * blockDim.x + threadIdx.x; node < RSTR_NUM_ELEM * RSTR_ELEM_SIZE; node += blockDim.x * gridDim.x) { 19 const CeedInt loc_node = node % RSTR_ELEM_SIZE; 20 const CeedInt elem = node / RSTR_ELEM_SIZE; 21 const CeedInt ind_dl = loc_node > 0 ? indices[node - 1] : 0; 22 const CeedInt ind_d = indices[node]; 23 const CeedInt ind_du = loc_node < (RSTR_ELEM_SIZE - 1) ? indices[node + 1] : 0; 24 const CeedInt8 curl_orient_dl = curl_orients[3 * node + 0]; 25 const CeedInt8 curl_orient_d = curl_orients[3 * node + 1]; 26 const CeedInt8 curl_orient_du = curl_orients[3 * node + 2]; 27 28 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { 29 CeedScalar value = 0.0; 30 value += loc_node > 0 ? u[ind_dl + comp * RSTR_COMP_STRIDE] * curl_orient_dl : 0.0; 31 value += u[ind_d + comp * RSTR_COMP_STRIDE] * curl_orient_d; 32 value += loc_node < (RSTR_ELEM_SIZE - 1) ? u[ind_du + comp * RSTR_COMP_STRIDE] * curl_orient_du : 0.0; 33 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value; 34 } 35 } 36 } 37 38 //------------------------------------------------------------------------------ 39 // L-vector -> E-vector, unsigned curl-oriented 40 //------------------------------------------------------------------------------ 41 extern "C" __global__ void CurlOrientedUnsignedNoTranspose(const CeedInt *__restrict__ indices, const CeedInt8 *__restrict__ curl_orients, 42 const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) { 43 for (CeedInt node = blockIdx.x * blockDim.x + threadIdx.x; node < RSTR_NUM_ELEM * RSTR_ELEM_SIZE; node += blockDim.x * gridDim.x) { 44 const CeedInt loc_node = node % RSTR_ELEM_SIZE; 45 const CeedInt elem = node / RSTR_ELEM_SIZE; 46 const CeedInt ind_dl = loc_node > 0 ? indices[node - 1] : 0; 47 const CeedInt ind_d = indices[node]; 48 const CeedInt ind_du = loc_node < (RSTR_ELEM_SIZE - 1) ? indices[node + 1] : 0; 49 const CeedInt8 curl_orient_dl = abs(curl_orients[3 * node + 0]); 50 const CeedInt8 curl_orient_d = abs(curl_orients[3 * node + 1]); 51 const CeedInt8 curl_orient_du = abs(curl_orients[3 * node + 2]); 52 53 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { 54 CeedScalar value = 0.0; 55 value += loc_node > 0 ? u[ind_dl + comp * RSTR_COMP_STRIDE] * curl_orient_dl : 0.0; 56 value += u[ind_d + comp * RSTR_COMP_STRIDE] * curl_orient_d; 57 value += loc_node < (RSTR_ELEM_SIZE - 1) ? u[ind_du + comp * RSTR_COMP_STRIDE] * curl_orient_du : 0.0; 58 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value; 59 } 60 } 61 } 62 63 //------------------------------------------------------------------------------ 64 // E-vector -> L-vector, curl-oriented 65 //------------------------------------------------------------------------------ 66 #if !USE_DETERMINISTIC 67 extern "C" __global__ void CurlOrientedTranspose(const CeedInt *__restrict__ indices, const CeedInt8 *__restrict__ curl_orients, 68 const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) { 69 for (CeedInt node = blockIdx.x * blockDim.x + threadIdx.x; node < RSTR_NUM_ELEM * RSTR_ELEM_SIZE; node += blockDim.x * gridDim.x) { 70 const CeedInt ind = indices[node]; 71 const CeedInt loc_node = node % RSTR_ELEM_SIZE; 72 const CeedInt elem = node / RSTR_ELEM_SIZE; 73 const CeedInt8 curl_orient_du = loc_node > 0 ? curl_orients[3 * node - 1] : 0.0; 74 const CeedInt8 curl_orient_d = curl_orients[3 * node + 1]; 75 const CeedInt8 curl_orient_dl = loc_node < (RSTR_ELEM_SIZE - 1) ? curl_orients[3 * node + 3] : 0.0; 76 77 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { 78 CeedScalar value = 0.0; 79 value += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_du : 0.0; 80 value += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_d; 81 value += 82 loc_node < (RSTR_ELEM_SIZE - 1) ? u[loc_node + 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_dl : 0.0; 83 atomicAdd(v + ind + comp * RSTR_COMP_STRIDE, value); 84 } 85 } 86 } 87 #else 88 extern "C" __global__ void CurlOrientedTranspose(const CeedInt *__restrict__ l_vec_indices, const CeedInt *__restrict__ t_indices, 89 const CeedInt *__restrict__ t_offsets, const CeedInt8 *__restrict__ curl_orients, 90 const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) { 91 CeedScalar value[RSTR_NUM_COMP]; 92 93 for (CeedInt i = blockIdx.x * blockDim.x + threadIdx.x; i < RSTR_NUM_NODES; i += blockDim.x * gridDim.x) { 94 const CeedInt ind = l_vec_indices[i]; 95 const CeedInt range_1 = t_offsets[i]; 96 const CeedInt range_N = t_offsets[i + 1]; 97 98 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) value[comp] = 0.0; 99 100 for (CeedInt j = range_1; j < range_N; j++) { 101 const CeedInt t_ind = t_indices[j]; 102 const CeedInt loc_node = t_ind % RSTR_ELEM_SIZE; 103 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; 104 const CeedInt8 curl_orient_du = loc_node > 0 ? curl_orients[3 * t_ind - 1] : 0.0; 105 const CeedInt8 curl_orient_d = curl_orients[3 * t_ind + 1]; 106 const CeedInt8 curl_orient_dl = loc_node < (RSTR_ELEM_SIZE - 1) ? curl_orients[3 * t_ind + 3] : 0.0; 107 108 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { 109 value[comp] += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_du : 0.0; 110 value[comp] += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_d; 111 value[comp] += 112 loc_node < (RSTR_ELEM_SIZE - 1) ? u[loc_node + 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_dl : 0.0; 113 } 114 } 115 116 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) v[ind + comp * RSTR_COMP_STRIDE] += value[comp]; 117 } 118 } 119 #endif 120 121 //------------------------------------------------------------------------------ 122 // E-vector -> L-vector, unsigned curl-oriented 123 //------------------------------------------------------------------------------ 124 #if !USE_DETERMINISTIC 125 extern "C" __global__ void CurlOrientedUnsignedTranspose(const CeedInt *__restrict__ indices, const CeedInt8 *__restrict__ curl_orients, 126 const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) { 127 for (CeedInt node = blockIdx.x * blockDim.x + threadIdx.x; node < RSTR_NUM_ELEM * RSTR_ELEM_SIZE; node += blockDim.x * gridDim.x) { 128 const CeedInt loc_node = node % RSTR_ELEM_SIZE; 129 const CeedInt elem = node / RSTR_ELEM_SIZE; 130 const CeedInt ind = indices[node]; 131 const CeedInt8 curl_orient_du = loc_node > 0 ? abs(curl_orients[3 * node - 1]) : 0.0; 132 const CeedInt8 curl_orient_d = abs(curl_orients[3 * node + 1]); 133 const CeedInt8 curl_orient_dl = loc_node < (RSTR_ELEM_SIZE - 1) ? abs(curl_orients[3 * node + 3]) : 0.0; 134 135 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { 136 CeedScalar value = 0.0; 137 value += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_du : 0.0; 138 value += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_d; 139 value += 140 loc_node < (RSTR_ELEM_SIZE - 1) ? u[loc_node + 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_dl : 0.0; 141 atomicAdd(v + ind + comp * RSTR_COMP_STRIDE, value); 142 } 143 } 144 } 145 #else 146 extern "C" __global__ void CurlOrientedUnsignedTranspose(const CeedInt *__restrict__ l_vec_indices, const CeedInt *__restrict__ t_indices, 147 const CeedInt *__restrict__ t_offsets, const CeedInt8 *__restrict__ curl_orients, 148 const CeedScalar *__restrict__ u, CeedScalar *__restrict__ v) { 149 CeedScalar value[RSTR_NUM_COMP]; 150 151 for (CeedInt i = blockIdx.x * blockDim.x + threadIdx.x; i < RSTR_NUM_NODES; i += blockDim.x * gridDim.x) { 152 const CeedInt ind = l_vec_indices[i]; 153 const CeedInt range_1 = t_offsets[i]; 154 const CeedInt range_N = t_offsets[i + 1]; 155 156 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) value[comp] = 0.0; 157 158 for (CeedInt j = range_1; j < range_N; j++) { 159 const CeedInt t_ind = t_indices[j]; 160 const CeedInt loc_node = t_ind % RSTR_ELEM_SIZE; 161 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; 162 const CeedInt8 curl_orient_du = loc_node > 0 ? abs(curl_orients[3 * t_ind - 1]) : 0.0; 163 const CeedInt8 curl_orient_d = abs(curl_orients[3 * t_ind + 1]); 164 const CeedInt8 curl_orient_dl = loc_node < (RSTR_ELEM_SIZE - 1) ? abs(curl_orients[3 * t_ind + 3]) : 0.0; 165 166 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) { 167 value[comp] += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_du : 0.0; 168 value[comp] += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_d; 169 value[comp] += 170 loc_node < (RSTR_ELEM_SIZE - 1) ? u[loc_node + 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient_dl : 0.0; 171 } 172 } 173 174 for (CeedInt comp = 0; comp < RSTR_NUM_COMP; comp++) v[ind + comp * RSTR_COMP_STRIDE] += value[comp]; 175 } 176 } 177 #endif 178