1 // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <ceed/backend.h> 10 #include <ceed/jit-tools.h> 11 #include <hip/hip_runtime.h> 12 13 #include "../hip/ceed-hip-common.h" 14 #include "../hip/ceed-hip-compile.h" 15 #include "ceed-hip-ref.h" 16 17 //------------------------------------------------------------------------------ 18 // Basis apply - tensor 19 //------------------------------------------------------------------------------ 20 static int CeedBasisApplyCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, 21 CeedVector u, CeedVector v) { 22 Ceed ceed; 23 CeedInt Q_1d, dim; 24 const CeedInt is_transpose = t_mode == CEED_TRANSPOSE; 25 const int max_block_size = 64; 26 const CeedScalar *d_u; 27 CeedScalar *d_v; 28 CeedBasis_Hip *data; 29 30 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 31 CeedCallBackend(CeedBasisGetData(basis, &data)); 32 33 // Get read/write access to u, v 34 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 35 else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 36 if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 37 else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 38 39 // Clear v for transpose operation 40 if (is_transpose && !apply_add) { 41 CeedSize length; 42 43 CeedCallBackend(CeedVectorGetLength(v, &length)); 44 CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar))); 45 } 46 CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 47 CeedCallBackend(CeedBasisGetDimension(basis, &dim)); 48 49 // Basis action 50 switch (eval_mode) { 51 case CEED_EVAL_INTERP: { 52 void *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &d_u, &d_v}; 53 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 54 55 CeedCallBackend(CeedRunKernel_Hip(ceed, data->Interp, num_elem, block_size, interp_args)); 56 } break; 57 case CEED_EVAL_GRAD: { 58 void *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &data->d_grad_1d, &d_u, &d_v}; 59 const CeedInt block_size = max_block_size; 60 61 CeedCallBackend(CeedRunKernel_Hip(ceed, data->Grad, num_elem, block_size, grad_args)); 62 } break; 63 case CEED_EVAL_WEIGHT: { 64 CeedCheck(data->d_q_weight_1d, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weights_1d not set", CeedEvalModes[eval_mode]); 65 void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight_1d, &d_v}; 66 const int block_size_x = Q_1d; 67 const int block_size_y = dim >= 2 ? Q_1d : 1; 68 69 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, num_elem, block_size_x, block_size_y, 1, weight_args)); 70 } break; 71 case CEED_EVAL_NONE: /* handled separately below */ 72 break; 73 // LCOV_EXCL_START 74 case CEED_EVAL_DIV: 75 case CEED_EVAL_CURL: 76 return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[eval_mode]); 77 // LCOV_EXCL_STOP 78 } 79 80 // Restore vectors, cover CEED_EVAL_NONE 81 CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 82 if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); 83 if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 84 return CEED_ERROR_SUCCESS; 85 } 86 87 static int CeedBasisApply_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) { 88 CeedCallBackend(CeedBasisApplyCore_Hip(basis, false, num_elem, t_mode, eval_mode, u, v)); 89 return CEED_ERROR_SUCCESS; 90 } 91 92 static int CeedBasisApplyAdd_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, 93 CeedVector v) { 94 CeedCallBackend(CeedBasisApplyCore_Hip(basis, true, num_elem, t_mode, eval_mode, u, v)); 95 return CEED_ERROR_SUCCESS; 96 } 97 98 //------------------------------------------------------------------------------ 99 // Basis apply - tensor AtPoints 100 //------------------------------------------------------------------------------ 101 static int CeedBasisApplyAtPointsCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, const CeedInt *num_points, 102 CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) { 103 Ceed ceed; 104 CeedInt Q_1d, dim, max_num_points = num_points[0]; 105 const CeedInt is_transpose = t_mode == CEED_TRANSPOSE; 106 const int max_block_size = 32; 107 const CeedScalar *d_x, *d_u; 108 CeedScalar *d_v; 109 CeedBasis_Hip *data; 110 111 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 112 CeedCallBackend(CeedBasisGetData(basis, &data)); 113 CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 114 CeedCallBackend(CeedBasisGetDimension(basis, &dim)); 115 116 // Check uniform number of points per elem 117 for (CeedInt i = 1; i < num_elem; i++) { 118 CeedCheck(max_num_points == num_points[i], ceed, CEED_ERROR_BACKEND, 119 "BasisApplyAtPoints only supported for the same number of points in each element"); 120 } 121 122 // Weight handled separately 123 if (eval_mode == CEED_EVAL_WEIGHT) { 124 CeedCall(CeedVectorSetValue(v, 1.0)); 125 return CEED_ERROR_SUCCESS; 126 } 127 128 // Build kernels if needed 129 if (data->num_points != max_num_points) { 130 CeedInt P_1d; 131 132 CeedCallBackend(CeedBasisGetNumNodes1D(basis, &P_1d)); 133 data->num_points = max_num_points; 134 135 // -- Create interp matrix to Chebyshev coefficients 136 if (!data->d_chebyshev_interp_1d) { 137 CeedSize interp_bytes; 138 CeedScalar *chebyshev_interp_1d; 139 140 interp_bytes = P_1d * Q_1d * sizeof(CeedScalar); 141 CeedCallBackend(CeedCalloc(P_1d * Q_1d, &chebyshev_interp_1d)); 142 CeedCall(CeedBasisGetChebyshevInterp1D(basis, chebyshev_interp_1d)); 143 CeedCallHip(ceed, hipMalloc((void **)&data->d_chebyshev_interp_1d, interp_bytes)); 144 CeedCallHip(ceed, hipMemcpy(data->d_chebyshev_interp_1d, chebyshev_interp_1d, interp_bytes, hipMemcpyHostToDevice)); 145 CeedCallBackend(CeedFree(&chebyshev_interp_1d)); 146 } 147 148 // -- Compile kernels 149 char *basis_kernel_source; 150 const char *basis_kernel_path; 151 CeedInt num_comp; 152 153 if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints)); 154 CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 155 CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-tensor-at-points.h", &basis_kernel_path)); 156 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 157 CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 158 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 159 CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN", 160 Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, 161 "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS", 162 max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1))); 163 CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints)); 164 CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints)); 165 CeedCallBackend(CeedFree(&basis_kernel_path)); 166 CeedCallBackend(CeedFree(&basis_kernel_source)); 167 } 168 169 // Get read/write access to u, v 170 CeedCallBackend(CeedVectorGetArrayRead(x_ref, CEED_MEM_DEVICE, &d_x)); 171 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 172 else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 173 if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 174 else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 175 176 // Clear v for transpose operation 177 if (is_transpose && !apply_add) { 178 CeedSize length; 179 180 CeedCallBackend(CeedVectorGetLength(v, &length)); 181 CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar))); 182 } 183 184 // Basis action 185 switch (eval_mode) { 186 case CEED_EVAL_INTERP: { 187 void *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &d_x, &d_u, &d_v}; 188 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 189 190 CeedCallBackend(CeedRunKernel_Hip(ceed, data->InterpAtPoints, num_elem, block_size, interp_args)); 191 } break; 192 case CEED_EVAL_GRAD: { 193 void *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &d_x, &d_u, &d_v}; 194 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 195 196 CeedCallBackend(CeedRunKernel_Hip(ceed, data->GradAtPoints, num_elem, block_size, grad_args)); 197 } break; 198 case CEED_EVAL_WEIGHT: 199 case CEED_EVAL_NONE: /* handled separately below */ 200 break; 201 // LCOV_EXCL_START 202 case CEED_EVAL_DIV: 203 case CEED_EVAL_CURL: 204 return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[eval_mode]); 205 // LCOV_EXCL_STOP 206 } 207 208 // Restore vectors, cover CEED_EVAL_NONE 209 CeedCallBackend(CeedVectorRestoreArrayRead(x_ref, &d_x)); 210 CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 211 if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); 212 if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 213 return CEED_ERROR_SUCCESS; 214 } 215 216 static int CeedBasisApplyAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode, 217 CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) { 218 CeedCallBackend(CeedBasisApplyAtPointsCore_Hip(basis, false, num_elem, num_points, t_mode, eval_mode, x_ref, u, v)); 219 return CEED_ERROR_SUCCESS; 220 } 221 222 static int CeedBasisApplyAddAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode, 223 CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) { 224 CeedCallBackend(CeedBasisApplyAtPointsCore_Hip(basis, true, num_elem, num_points, t_mode, eval_mode, x_ref, u, v)); 225 return CEED_ERROR_SUCCESS; 226 } 227 228 //------------------------------------------------------------------------------ 229 // Basis apply - non-tensor 230 //------------------------------------------------------------------------------ 231 static int CeedBasisApplyNonTensorCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, 232 CeedVector u, CeedVector v) { 233 Ceed ceed; 234 CeedInt num_nodes, num_qpts; 235 const CeedInt is_transpose = t_mode == CEED_TRANSPOSE; 236 const int elems_per_block = 1; 237 const int grid = CeedDivUpInt(num_elem, elems_per_block); 238 const CeedScalar *d_u; 239 CeedScalar *d_v; 240 CeedBasisNonTensor_Hip *data; 241 242 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 243 CeedCallBackend(CeedBasisGetData(basis, &data)); 244 CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 245 CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes)); 246 247 // Get read/write access to u, v 248 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 249 else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 250 if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 251 else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 252 253 // Clear v for transpose operation 254 if (is_transpose && !apply_add) { 255 CeedSize length; 256 257 CeedCallBackend(CeedVectorGetLength(v, &length)); 258 CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar))); 259 } 260 261 // Apply basis operation 262 switch (eval_mode) { 263 case CEED_EVAL_INTERP: { 264 void *interp_args[] = {(void *)&num_elem, &data->d_interp, &d_u, &d_v}; 265 const int block_size_x = is_transpose ? num_nodes : num_qpts; 266 267 if (is_transpose) { 268 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->InterpTranspose, grid, block_size_x, 1, elems_per_block, interp_args)); 269 } else { 270 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Interp, grid, block_size_x, 1, elems_per_block, interp_args)); 271 } 272 } break; 273 case CEED_EVAL_GRAD: { 274 void *grad_args[] = {(void *)&num_elem, &data->d_grad, &d_u, &d_v}; 275 const int block_size_x = is_transpose ? num_nodes : num_qpts; 276 277 if (is_transpose) { 278 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, grad_args)); 279 } else { 280 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, grad_args)); 281 } 282 } break; 283 case CEED_EVAL_DIV: { 284 void *div_args[] = {(void *)&num_elem, &data->d_div, &d_u, &d_v}; 285 const int block_size_x = is_transpose ? num_nodes : num_qpts; 286 287 if (is_transpose) { 288 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, div_args)); 289 } else { 290 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, div_args)); 291 } 292 } break; 293 case CEED_EVAL_CURL: { 294 void *curl_args[] = {(void *)&num_elem, &data->d_curl, &d_u, &d_v}; 295 const int block_size_x = is_transpose ? num_nodes : num_qpts; 296 297 if (is_transpose) { 298 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, curl_args)); 299 } else { 300 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, curl_args)); 301 } 302 } break; 303 case CEED_EVAL_WEIGHT: { 304 CeedCheck(data->d_q_weight, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weights not set", CeedEvalModes[eval_mode]); 305 void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight, &d_v}; 306 307 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid, num_qpts, 1, elems_per_block, weight_args)); 308 } break; 309 case CEED_EVAL_NONE: /* handled separately below */ 310 break; 311 } 312 313 // Restore vectors, cover CEED_EVAL_NONE 314 CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 315 if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); 316 if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 317 return CEED_ERROR_SUCCESS; 318 } 319 320 static int CeedBasisApplyNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, 321 CeedVector v) { 322 CeedCallBackend(CeedBasisApplyNonTensorCore_Hip(basis, false, num_elem, t_mode, eval_mode, u, v)); 323 return CEED_ERROR_SUCCESS; 324 } 325 326 static int CeedBasisApplyAddNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, 327 CeedVector v) { 328 CeedCallBackend(CeedBasisApplyNonTensorCore_Hip(basis, true, num_elem, t_mode, eval_mode, u, v)); 329 return CEED_ERROR_SUCCESS; 330 } 331 332 //------------------------------------------------------------------------------ 333 // Destroy tensor basis 334 //------------------------------------------------------------------------------ 335 static int CeedBasisDestroy_Hip(CeedBasis basis) { 336 Ceed ceed; 337 CeedBasis_Hip *data; 338 339 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 340 CeedCallBackend(CeedBasisGetData(basis, &data)); 341 CeedCallHip(ceed, hipModuleUnload(data->module)); 342 if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints)); 343 if (data->d_q_weight_1d) CeedCallHip(ceed, hipFree(data->d_q_weight_1d)); 344 CeedCallHip(ceed, hipFree(data->d_interp_1d)); 345 CeedCallHip(ceed, hipFree(data->d_grad_1d)); 346 CeedCallHip(ceed, hipFree(data->d_chebyshev_interp_1d)); 347 CeedCallBackend(CeedFree(&data)); 348 return CEED_ERROR_SUCCESS; 349 } 350 351 //------------------------------------------------------------------------------ 352 // Destroy non-tensor basis 353 //------------------------------------------------------------------------------ 354 static int CeedBasisDestroyNonTensor_Hip(CeedBasis basis) { 355 Ceed ceed; 356 CeedBasisNonTensor_Hip *data; 357 358 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 359 CeedCallBackend(CeedBasisGetData(basis, &data)); 360 CeedCallHip(ceed, hipModuleUnload(data->module)); 361 if (data->d_q_weight) CeedCallHip(ceed, hipFree(data->d_q_weight)); 362 CeedCallHip(ceed, hipFree(data->d_interp)); 363 CeedCallHip(ceed, hipFree(data->d_grad)); 364 CeedCallHip(ceed, hipFree(data->d_div)); 365 CeedCallHip(ceed, hipFree(data->d_curl)); 366 CeedCallBackend(CeedFree(&data)); 367 return CEED_ERROR_SUCCESS; 368 } 369 370 //------------------------------------------------------------------------------ 371 // Create tensor 372 //------------------------------------------------------------------------------ 373 int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, 374 const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) { 375 Ceed ceed; 376 char *basis_kernel_source; 377 const char *basis_kernel_path; 378 CeedInt num_comp; 379 const CeedInt q_bytes = Q_1d * sizeof(CeedScalar); 380 const CeedInt interp_bytes = q_bytes * P_1d; 381 CeedBasis_Hip *data; 382 383 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 384 CeedCallBackend(CeedCalloc(1, &data)); 385 386 // Copy data to GPU 387 if (q_weight_1d) { 388 CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight_1d, q_bytes)); 389 CeedCallHip(ceed, hipMemcpy(data->d_q_weight_1d, q_weight_1d, q_bytes, hipMemcpyHostToDevice)); 390 } 391 CeedCallHip(ceed, hipMalloc((void **)&data->d_interp_1d, interp_bytes)); 392 CeedCallHip(ceed, hipMemcpy(data->d_interp_1d, interp_1d, interp_bytes, hipMemcpyHostToDevice)); 393 CeedCallHip(ceed, hipMalloc((void **)&data->d_grad_1d, interp_bytes)); 394 CeedCallHip(ceed, hipMemcpy(data->d_grad_1d, grad_1d, interp_bytes, hipMemcpyHostToDevice)); 395 396 // Compile basis kernels 397 CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 398 CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-tensor.h", &basis_kernel_path)); 399 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 400 CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 401 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 402 CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 7, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN", 403 Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, 404 "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim))); 405 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 406 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Grad", &data->Grad)); 407 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 408 CeedCallBackend(CeedFree(&basis_kernel_path)); 409 CeedCallBackend(CeedFree(&basis_kernel_source)); 410 411 CeedCallBackend(CeedBasisSetData(basis, data)); 412 413 // Register backend functions 414 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Hip)); 415 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAdd_Hip)); 416 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAtPoints", CeedBasisApplyAtPoints_Hip)); 417 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAddAtPoints", CeedBasisApplyAddAtPoints_Hip)); 418 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Hip)); 419 return CEED_ERROR_SUCCESS; 420 } 421 422 //------------------------------------------------------------------------------ 423 // Create non-tensor H^1 424 //------------------------------------------------------------------------------ 425 int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *grad, 426 const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 427 Ceed ceed; 428 char *basis_kernel_source; 429 const char *basis_kernel_path; 430 CeedInt num_comp, q_comp_interp, q_comp_grad; 431 const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 432 CeedBasisNonTensor_Hip *data; 433 434 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 435 CeedCallBackend(CeedCalloc(1, &data)); 436 437 // Copy basis data to GPU 438 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 439 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_grad)); 440 if (q_weight) { 441 CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes)); 442 CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice)); 443 } 444 if (interp) { 445 const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp; 446 447 CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes)); 448 CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice)); 449 } 450 if (grad) { 451 const CeedInt grad_bytes = q_bytes * num_nodes * q_comp_grad; 452 453 CeedCallHip(ceed, hipMalloc((void **)&data->d_grad, grad_bytes)); 454 CeedCallHip(ceed, hipMemcpy(data->d_grad, grad, grad_bytes, hipMemcpyHostToDevice)); 455 } 456 457 // Compile basis kernels 458 CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 459 CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path)); 460 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 461 CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 462 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 463 CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", 464 q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_grad, "BASIS_NUM_COMP", num_comp)); 465 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 466 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose)); 467 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); 468 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); 469 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 470 CeedCallBackend(CeedFree(&basis_kernel_path)); 471 CeedCallBackend(CeedFree(&basis_kernel_source)); 472 473 CeedCallBackend(CeedBasisSetData(basis, data)); 474 475 // Register backend functions 476 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip)); 477 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip)); 478 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip)); 479 return CEED_ERROR_SUCCESS; 480 } 481 482 //------------------------------------------------------------------------------ 483 // Create non-tensor H(div) 484 //------------------------------------------------------------------------------ 485 int CeedBasisCreateHdiv_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *div, 486 const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 487 Ceed ceed; 488 char *basis_kernel_source; 489 const char *basis_kernel_path; 490 CeedInt num_comp, q_comp_interp, q_comp_div; 491 const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 492 CeedBasisNonTensor_Hip *data; 493 494 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 495 CeedCallBackend(CeedCalloc(1, &data)); 496 497 // Copy basis data to GPU 498 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 499 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp_div)); 500 if (q_weight) { 501 CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes)); 502 CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice)); 503 } 504 if (interp) { 505 const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp; 506 507 CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes)); 508 CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice)); 509 } 510 if (div) { 511 const CeedInt div_bytes = q_bytes * num_nodes * q_comp_div; 512 513 CeedCallHip(ceed, hipMalloc((void **)&data->d_div, div_bytes)); 514 CeedCallHip(ceed, hipMemcpy(data->d_div, div, div_bytes, hipMemcpyHostToDevice)); 515 } 516 517 // Compile basis kernels 518 CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 519 CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path)); 520 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 521 CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 522 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 523 CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", 524 q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_div, "BASIS_NUM_COMP", num_comp)); 525 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 526 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose)); 527 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); 528 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); 529 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 530 CeedCallBackend(CeedFree(&basis_kernel_path)); 531 CeedCallBackend(CeedFree(&basis_kernel_source)); 532 533 CeedCallBackend(CeedBasisSetData(basis, data)); 534 535 // Register backend functions 536 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip)); 537 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip)); 538 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip)); 539 return CEED_ERROR_SUCCESS; 540 } 541 542 //------------------------------------------------------------------------------ 543 // Create non-tensor H(curl) 544 //------------------------------------------------------------------------------ 545 int CeedBasisCreateHcurl_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 546 const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 547 Ceed ceed; 548 char *basis_kernel_source; 549 const char *basis_kernel_path; 550 CeedInt num_comp, q_comp_interp, q_comp_curl; 551 const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 552 CeedBasisNonTensor_Hip *data; 553 554 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 555 CeedCallBackend(CeedCalloc(1, &data)); 556 557 // Copy basis data to GPU 558 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 559 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp_curl)); 560 if (q_weight) { 561 CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes)); 562 CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice)); 563 } 564 if (interp) { 565 const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp; 566 567 CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes)); 568 CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice)); 569 } 570 if (curl) { 571 const CeedInt curl_bytes = q_bytes * num_nodes * q_comp_curl; 572 573 CeedCallHip(ceed, hipMalloc((void **)&data->d_curl, curl_bytes)); 574 CeedCallHip(ceed, hipMemcpy(data->d_curl, curl, curl_bytes, hipMemcpyHostToDevice)); 575 } 576 577 // Compile basis kernels 578 CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 579 CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path)); 580 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 581 CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 582 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 583 CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", 584 q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_curl, "BASIS_NUM_COMP", num_comp)); 585 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 586 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose)); 587 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); 588 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); 589 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 590 CeedCallBackend(CeedFree(&basis_kernel_path)); 591 CeedCallBackend(CeedFree(&basis_kernel_source)); 592 593 CeedCallBackend(CeedBasisSetData(basis, data)); 594 595 // Register backend functions 596 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip)); 597 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip)); 598 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip)); 599 return CEED_ERROR_SUCCESS; 600 } 601 602 //------------------------------------------------------------------------------ 603