1 // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed.h> 9 #include <ceed/backend.h> 10 #include <ceed/jit-tools.h> 11 #include <string.h> 12 #include <hip/hip_runtime.h> 13 14 #include "../hip/ceed-hip-common.h" 15 #include "../hip/ceed-hip-compile.h" 16 #include "ceed-hip-ref.h" 17 18 //------------------------------------------------------------------------------ 19 // Basis apply - tensor 20 //------------------------------------------------------------------------------ 21 static int CeedBasisApplyCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, 22 CeedVector u, CeedVector v) { 23 Ceed ceed; 24 CeedInt Q_1d, dim; 25 const CeedInt is_transpose = t_mode == CEED_TRANSPOSE; 26 const int max_block_size = 64; 27 const CeedScalar *d_u; 28 CeedScalar *d_v; 29 CeedBasis_Hip *data; 30 31 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 32 CeedCallBackend(CeedBasisGetData(basis, &data)); 33 34 // Get read/write access to u, v 35 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 36 else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 37 if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 38 else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 39 40 // Clear v for transpose operation 41 if (is_transpose && !apply_add) { 42 CeedInt num_comp, q_comp, num_nodes, num_qpts; 43 CeedSize length; 44 45 CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 46 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp)); 47 CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes)); 48 CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 49 length = (CeedSize)num_elem * (CeedSize)num_comp * (t_mode == CEED_TRANSPOSE ? (CeedSize)num_nodes : ((CeedSize)num_qpts * (CeedSize)q_comp)); 50 CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar))); 51 } 52 CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 53 CeedCallBackend(CeedBasisGetDimension(basis, &dim)); 54 55 // Basis action 56 switch (eval_mode) { 57 case CEED_EVAL_INTERP: { 58 void *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &d_u, &d_v}; 59 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 60 61 CeedCallBackend(CeedRunKernel_Hip(ceed, data->Interp, num_elem, block_size, interp_args)); 62 } break; 63 case CEED_EVAL_GRAD: { 64 void *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &data->d_grad_1d, &d_u, &d_v}; 65 const CeedInt block_size = max_block_size; 66 67 CeedCallBackend(CeedRunKernel_Hip(ceed, data->Grad, num_elem, block_size, grad_args)); 68 } break; 69 case CEED_EVAL_WEIGHT: { 70 CeedCheck(data->d_q_weight_1d, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weights_1d not set", CeedEvalModes[eval_mode]); 71 void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight_1d, &d_v}; 72 const int block_size_x = Q_1d; 73 const int block_size_y = dim >= 2 ? Q_1d : 1; 74 75 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, num_elem, block_size_x, block_size_y, 1, weight_args)); 76 } break; 77 case CEED_EVAL_NONE: /* handled separately below */ 78 break; 79 // LCOV_EXCL_START 80 case CEED_EVAL_DIV: 81 case CEED_EVAL_CURL: 82 return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[eval_mode]); 83 // LCOV_EXCL_STOP 84 } 85 86 // Restore vectors, cover CEED_EVAL_NONE 87 CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 88 if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); 89 if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 90 CeedCallBackend(CeedDestroy(&ceed)); 91 return CEED_ERROR_SUCCESS; 92 } 93 94 static int CeedBasisApply_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) { 95 CeedCallBackend(CeedBasisApplyCore_Hip(basis, false, num_elem, t_mode, eval_mode, u, v)); 96 return CEED_ERROR_SUCCESS; 97 } 98 99 static int CeedBasisApplyAdd_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, 100 CeedVector v) { 101 CeedCallBackend(CeedBasisApplyCore_Hip(basis, true, num_elem, t_mode, eval_mode, u, v)); 102 return CEED_ERROR_SUCCESS; 103 } 104 105 //------------------------------------------------------------------------------ 106 // Basis apply - tensor AtPoints 107 //------------------------------------------------------------------------------ 108 static int CeedBasisApplyAtPointsCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, const CeedInt *num_points, 109 CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) { 110 Ceed ceed; 111 CeedInt Q_1d, dim, max_num_points = num_points[0]; 112 const CeedInt is_transpose = t_mode == CEED_TRANSPOSE; 113 const int max_block_size = 32; 114 const CeedScalar *d_x, *d_u; 115 CeedScalar *d_v; 116 CeedBasis_Hip *data; 117 118 CeedCallBackend(CeedBasisGetData(basis, &data)); 119 CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 120 CeedCallBackend(CeedBasisGetDimension(basis, &dim)); 121 122 // Weight handled separately 123 if (eval_mode == CEED_EVAL_WEIGHT) { 124 CeedCallBackend(CeedVectorSetValue(v, 1.0)); 125 return CEED_ERROR_SUCCESS; 126 } 127 128 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 129 130 // Check padded to uniform number of points per elem 131 for (CeedInt i = 1; i < num_elem; i++) max_num_points = CeedIntMax(max_num_points, num_points[i]); 132 { 133 CeedInt num_comp, q_comp; 134 CeedSize len, len_required; 135 136 CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 137 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp)); 138 CeedCallBackend(CeedVectorGetLength(is_transpose ? u : v, &len)); 139 len_required = (CeedSize)num_comp * (CeedSize)q_comp * (CeedSize)num_elem * (CeedSize)max_num_points; 140 CeedCheck(len >= len_required, ceed, CEED_ERROR_BACKEND, 141 "Vector at points must be padded to the same number of points in each element for BasisApplyAtPoints on GPU backends." 142 " Found %" CeedSize_FMT ", Required %" CeedSize_FMT, 143 len, len_required); 144 } 145 146 // Move num_points array to device 147 if (is_transpose) { 148 const CeedInt num_bytes = num_elem * sizeof(CeedInt); 149 150 if (num_elem != data->num_elem_at_points) { 151 data->num_elem_at_points = num_elem; 152 153 if (data->d_points_per_elem) CeedCallHip(ceed, hipFree(data->d_points_per_elem)); 154 CeedCallHip(ceed, hipMalloc((void **)&data->d_points_per_elem, num_bytes)); 155 CeedCallBackend(CeedFree(&data->h_points_per_elem)); 156 CeedCallBackend(CeedCalloc(num_elem, &data->h_points_per_elem)); 157 } 158 if (memcmp(data->h_points_per_elem, num_points, num_bytes)) { 159 memcpy(data->h_points_per_elem, num_points, num_bytes); 160 CeedCallHip(ceed, hipMemcpy(data->d_points_per_elem, num_points, num_bytes, hipMemcpyHostToDevice)); 161 } 162 } 163 164 // Build kernels if needed 165 if (data->num_points != max_num_points) { 166 CeedInt P_1d; 167 168 CeedCallBackend(CeedBasisGetNumNodes1D(basis, &P_1d)); 169 data->num_points = max_num_points; 170 171 // -- Create interp matrix to Chebyshev coefficients 172 if (!data->d_chebyshev_interp_1d) { 173 CeedSize interp_bytes; 174 CeedScalar *chebyshev_interp_1d; 175 176 interp_bytes = P_1d * Q_1d * sizeof(CeedScalar); 177 CeedCallBackend(CeedCalloc(P_1d * Q_1d, &chebyshev_interp_1d)); 178 CeedCallBackend(CeedBasisGetChebyshevInterp1D(basis, chebyshev_interp_1d)); 179 CeedCallHip(ceed, hipMalloc((void **)&data->d_chebyshev_interp_1d, interp_bytes)); 180 CeedCallHip(ceed, hipMemcpy(data->d_chebyshev_interp_1d, chebyshev_interp_1d, interp_bytes, hipMemcpyHostToDevice)); 181 CeedCallBackend(CeedFree(&chebyshev_interp_1d)); 182 } 183 184 // -- Compile kernels 185 const char basis_kernel_source[] = "// AtPoints basis source\n#include <ceed/jit-source/hip/hip-ref-basis-tensor-at-points.h>\n"; 186 CeedInt num_comp; 187 188 if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints)); 189 CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 190 CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN", 191 Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, 192 "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS", 193 max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1))); 194 CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints)); 195 CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints)); 196 } 197 198 // Get read/write access to u, v 199 CeedCallBackend(CeedVectorGetArrayRead(x_ref, CEED_MEM_DEVICE, &d_x)); 200 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 201 else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 202 if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 203 else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 204 205 // Clear v for transpose operation 206 if (is_transpose && !apply_add) { 207 CeedInt num_comp, q_comp, num_nodes; 208 CeedSize length; 209 210 CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 211 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp)); 212 CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes)); 213 length = 214 (CeedSize)num_elem * (CeedSize)num_comp * (t_mode == CEED_TRANSPOSE ? (CeedSize)num_nodes : ((CeedSize)max_num_points * (CeedSize)q_comp)); 215 CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar))); 216 } 217 218 // Basis action 219 switch (eval_mode) { 220 case CEED_EVAL_INTERP: { 221 void *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v}; 222 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 223 224 CeedCallBackend(CeedRunKernel_Hip(ceed, data->InterpAtPoints, num_elem, block_size, interp_args)); 225 } break; 226 case CEED_EVAL_GRAD: { 227 void *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v}; 228 const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 229 230 CeedCallBackend(CeedRunKernel_Hip(ceed, data->GradAtPoints, num_elem, block_size, grad_args)); 231 } break; 232 case CEED_EVAL_WEIGHT: 233 case CEED_EVAL_NONE: /* handled separately below */ 234 break; 235 // LCOV_EXCL_START 236 case CEED_EVAL_DIV: 237 case CEED_EVAL_CURL: 238 return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[eval_mode]); 239 // LCOV_EXCL_STOP 240 } 241 242 // Restore vectors, cover CEED_EVAL_NONE 243 CeedCallBackend(CeedVectorRestoreArrayRead(x_ref, &d_x)); 244 CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 245 if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); 246 if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 247 CeedCallBackend(CeedDestroy(&ceed)); 248 return CEED_ERROR_SUCCESS; 249 } 250 251 static int CeedBasisApplyAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode, 252 CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) { 253 CeedCallBackend(CeedBasisApplyAtPointsCore_Hip(basis, false, num_elem, num_points, t_mode, eval_mode, x_ref, u, v)); 254 return CEED_ERROR_SUCCESS; 255 } 256 257 static int CeedBasisApplyAddAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode, 258 CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) { 259 CeedCallBackend(CeedBasisApplyAtPointsCore_Hip(basis, true, num_elem, num_points, t_mode, eval_mode, x_ref, u, v)); 260 return CEED_ERROR_SUCCESS; 261 } 262 263 //------------------------------------------------------------------------------ 264 // Basis apply - non-tensor 265 //------------------------------------------------------------------------------ 266 static int CeedBasisApplyNonTensorCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, 267 CeedVector u, CeedVector v) { 268 Ceed ceed; 269 CeedInt num_nodes, num_qpts; 270 const CeedInt is_transpose = t_mode == CEED_TRANSPOSE; 271 const int elems_per_block = 1; 272 const int grid = CeedDivUpInt(num_elem, elems_per_block); 273 const CeedScalar *d_u; 274 CeedScalar *d_v; 275 CeedBasisNonTensor_Hip *data; 276 277 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 278 CeedCallBackend(CeedBasisGetData(basis, &data)); 279 CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 280 CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes)); 281 282 // Get read/write access to u, v 283 if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 284 else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 285 if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 286 else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 287 288 // Clear v for transpose operation 289 if (is_transpose && !apply_add) { 290 CeedSize length; 291 292 CeedCallBackend(CeedVectorGetLength(v, &length)); 293 CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar))); 294 } 295 296 // Apply basis operation 297 switch (eval_mode) { 298 case CEED_EVAL_INTERP: { 299 void *interp_args[] = {(void *)&num_elem, &data->d_interp, &d_u, &d_v}; 300 const int block_size_x = is_transpose ? num_nodes : num_qpts; 301 302 if (is_transpose) { 303 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->InterpTranspose, grid, block_size_x, 1, elems_per_block, interp_args)); 304 } else { 305 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Interp, grid, block_size_x, 1, elems_per_block, interp_args)); 306 } 307 } break; 308 case CEED_EVAL_GRAD: { 309 void *grad_args[] = {(void *)&num_elem, &data->d_grad, &d_u, &d_v}; 310 const int block_size_x = is_transpose ? num_nodes : num_qpts; 311 312 if (is_transpose) { 313 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, grad_args)); 314 } else { 315 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, grad_args)); 316 } 317 } break; 318 case CEED_EVAL_DIV: { 319 void *div_args[] = {(void *)&num_elem, &data->d_div, &d_u, &d_v}; 320 const int block_size_x = is_transpose ? num_nodes : num_qpts; 321 322 if (is_transpose) { 323 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, div_args)); 324 } else { 325 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, div_args)); 326 } 327 } break; 328 case CEED_EVAL_CURL: { 329 void *curl_args[] = {(void *)&num_elem, &data->d_curl, &d_u, &d_v}; 330 const int block_size_x = is_transpose ? num_nodes : num_qpts; 331 332 if (is_transpose) { 333 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, curl_args)); 334 } else { 335 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, curl_args)); 336 } 337 } break; 338 case CEED_EVAL_WEIGHT: { 339 CeedCheck(data->d_q_weight, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weights not set", CeedEvalModes[eval_mode]); 340 void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight, &d_v}; 341 342 CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid, num_qpts, 1, elems_per_block, weight_args)); 343 } break; 344 case CEED_EVAL_NONE: /* handled separately below */ 345 break; 346 } 347 348 // Restore vectors, cover CEED_EVAL_NONE 349 CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 350 if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); 351 if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 352 CeedCallBackend(CeedDestroy(&ceed)); 353 return CEED_ERROR_SUCCESS; 354 } 355 356 static int CeedBasisApplyNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, 357 CeedVector v) { 358 CeedCallBackend(CeedBasisApplyNonTensorCore_Hip(basis, false, num_elem, t_mode, eval_mode, u, v)); 359 return CEED_ERROR_SUCCESS; 360 } 361 362 static int CeedBasisApplyAddNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, 363 CeedVector v) { 364 CeedCallBackend(CeedBasisApplyNonTensorCore_Hip(basis, true, num_elem, t_mode, eval_mode, u, v)); 365 return CEED_ERROR_SUCCESS; 366 } 367 368 //------------------------------------------------------------------------------ 369 // Destroy tensor basis 370 //------------------------------------------------------------------------------ 371 static int CeedBasisDestroy_Hip(CeedBasis basis) { 372 Ceed ceed; 373 CeedBasis_Hip *data; 374 375 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 376 CeedCallBackend(CeedBasisGetData(basis, &data)); 377 CeedCallHip(ceed, hipModuleUnload(data->module)); 378 if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints)); 379 if (data->d_q_weight_1d) CeedCallHip(ceed, hipFree(data->d_q_weight_1d)); 380 CeedCallBackend(CeedFree(&data->h_points_per_elem)); 381 if (data->d_points_per_elem) CeedCallHip(ceed, hipFree(data->d_points_per_elem)); 382 CeedCallHip(ceed, hipFree(data->d_interp_1d)); 383 CeedCallHip(ceed, hipFree(data->d_grad_1d)); 384 CeedCallHip(ceed, hipFree(data->d_chebyshev_interp_1d)); 385 CeedCallBackend(CeedFree(&data)); 386 CeedCallBackend(CeedDestroy(&ceed)); 387 return CEED_ERROR_SUCCESS; 388 } 389 390 //------------------------------------------------------------------------------ 391 // Destroy non-tensor basis 392 //------------------------------------------------------------------------------ 393 static int CeedBasisDestroyNonTensor_Hip(CeedBasis basis) { 394 Ceed ceed; 395 CeedBasisNonTensor_Hip *data; 396 397 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 398 CeedCallBackend(CeedBasisGetData(basis, &data)); 399 CeedCallHip(ceed, hipModuleUnload(data->module)); 400 if (data->d_q_weight) CeedCallHip(ceed, hipFree(data->d_q_weight)); 401 CeedCallHip(ceed, hipFree(data->d_interp)); 402 CeedCallHip(ceed, hipFree(data->d_grad)); 403 CeedCallHip(ceed, hipFree(data->d_div)); 404 CeedCallHip(ceed, hipFree(data->d_curl)); 405 CeedCallBackend(CeedFree(&data)); 406 CeedCallBackend(CeedDestroy(&ceed)); 407 return CEED_ERROR_SUCCESS; 408 } 409 410 //------------------------------------------------------------------------------ 411 // Create tensor 412 //------------------------------------------------------------------------------ 413 int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, 414 const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) { 415 Ceed ceed; 416 CeedInt num_comp; 417 const CeedInt q_bytes = Q_1d * sizeof(CeedScalar); 418 const CeedInt interp_bytes = q_bytes * P_1d; 419 CeedBasis_Hip *data; 420 421 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 422 CeedCallBackend(CeedCalloc(1, &data)); 423 424 // Copy data to GPU 425 if (q_weight_1d) { 426 CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight_1d, q_bytes)); 427 CeedCallHip(ceed, hipMemcpy(data->d_q_weight_1d, q_weight_1d, q_bytes, hipMemcpyHostToDevice)); 428 } 429 CeedCallHip(ceed, hipMalloc((void **)&data->d_interp_1d, interp_bytes)); 430 CeedCallHip(ceed, hipMemcpy(data->d_interp_1d, interp_1d, interp_bytes, hipMemcpyHostToDevice)); 431 CeedCallHip(ceed, hipMalloc((void **)&data->d_grad_1d, interp_bytes)); 432 CeedCallHip(ceed, hipMemcpy(data->d_grad_1d, grad_1d, interp_bytes, hipMemcpyHostToDevice)); 433 434 // Compile basis kernels 435 const char basis_kernel_source[] = "// Tensor basis source\n#include <ceed/jit-source/hip/hip-ref-basis-tensor.h>\n"; 436 437 CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 438 CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 7, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN", 439 Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, 440 "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim))); 441 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 442 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Grad", &data->Grad)); 443 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 444 445 CeedCallBackend(CeedBasisSetData(basis, data)); 446 447 // Register backend functions 448 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Hip)); 449 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAdd_Hip)); 450 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAtPoints", CeedBasisApplyAtPoints_Hip)); 451 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAddAtPoints", CeedBasisApplyAddAtPoints_Hip)); 452 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Hip)); 453 CeedCallBackend(CeedDestroy(&ceed)); 454 return CEED_ERROR_SUCCESS; 455 } 456 457 //------------------------------------------------------------------------------ 458 // Create non-tensor H^1 459 //------------------------------------------------------------------------------ 460 int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *grad, 461 const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 462 Ceed ceed; 463 CeedInt num_comp, q_comp_interp, q_comp_grad; 464 const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 465 CeedBasisNonTensor_Hip *data; 466 467 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 468 CeedCallBackend(CeedCalloc(1, &data)); 469 470 // Copy basis data to GPU 471 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 472 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_grad)); 473 if (q_weight) { 474 CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes)); 475 CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice)); 476 } 477 if (interp) { 478 const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp; 479 480 CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes)); 481 CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice)); 482 } 483 if (grad) { 484 const CeedInt grad_bytes = q_bytes * num_nodes * q_comp_grad; 485 486 CeedCallHip(ceed, hipMalloc((void **)&data->d_grad, grad_bytes)); 487 CeedCallHip(ceed, hipMemcpy(data->d_grad, grad, grad_bytes, hipMemcpyHostToDevice)); 488 } 489 490 // Compile basis kernels 491 const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/hip/hip-ref-basis-nontensor.h>\n"; 492 493 CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 494 CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", 495 q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_grad, "BASIS_NUM_COMP", num_comp)); 496 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 497 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose)); 498 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); 499 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); 500 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 501 502 CeedCallBackend(CeedBasisSetData(basis, data)); 503 504 // Register backend functions 505 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip)); 506 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip)); 507 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip)); 508 CeedCallBackend(CeedDestroy(&ceed)); 509 return CEED_ERROR_SUCCESS; 510 } 511 512 //------------------------------------------------------------------------------ 513 // Create non-tensor H(div) 514 //------------------------------------------------------------------------------ 515 int CeedBasisCreateHdiv_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *div, 516 const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 517 Ceed ceed; 518 CeedInt num_comp, q_comp_interp, q_comp_div; 519 const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 520 CeedBasisNonTensor_Hip *data; 521 522 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 523 CeedCallBackend(CeedCalloc(1, &data)); 524 525 // Copy basis data to GPU 526 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 527 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp_div)); 528 if (q_weight) { 529 CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes)); 530 CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice)); 531 } 532 if (interp) { 533 const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp; 534 535 CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes)); 536 CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice)); 537 } 538 if (div) { 539 const CeedInt div_bytes = q_bytes * num_nodes * q_comp_div; 540 541 CeedCallHip(ceed, hipMalloc((void **)&data->d_div, div_bytes)); 542 CeedCallHip(ceed, hipMemcpy(data->d_div, div, div_bytes, hipMemcpyHostToDevice)); 543 } 544 545 // Compile basis kernels 546 const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/hip/hip-ref-basis-nontensor.h>\n"; 547 548 CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 549 CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", 550 q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_div, "BASIS_NUM_COMP", num_comp)); 551 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 552 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose)); 553 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); 554 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); 555 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 556 557 CeedCallBackend(CeedBasisSetData(basis, data)); 558 559 // Register backend functions 560 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip)); 561 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip)); 562 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip)); 563 CeedCallBackend(CeedDestroy(&ceed)); 564 return CEED_ERROR_SUCCESS; 565 } 566 567 //------------------------------------------------------------------------------ 568 // Create non-tensor H(curl) 569 //------------------------------------------------------------------------------ 570 int CeedBasisCreateHcurl_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 571 const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 572 Ceed ceed; 573 CeedInt num_comp, q_comp_interp, q_comp_curl; 574 const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 575 CeedBasisNonTensor_Hip *data; 576 577 CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 578 CeedCallBackend(CeedCalloc(1, &data)); 579 580 // Copy basis data to GPU 581 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 582 CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp_curl)); 583 if (q_weight) { 584 CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes)); 585 CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice)); 586 } 587 if (interp) { 588 const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp; 589 590 CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes)); 591 CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice)); 592 } 593 if (curl) { 594 const CeedInt curl_bytes = q_bytes * num_nodes * q_comp_curl; 595 596 CeedCallHip(ceed, hipMalloc((void **)&data->d_curl, curl_bytes)); 597 CeedCallHip(ceed, hipMemcpy(data->d_curl, curl, curl_bytes, hipMemcpyHostToDevice)); 598 } 599 600 // Compile basis kernels 601 const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/hip/hip-ref-basis-nontensor.h>\n"; 602 603 CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 604 CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", 605 q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_curl, "BASIS_NUM_COMP", num_comp)); 606 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 607 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose)); 608 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); 609 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); 610 CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 611 612 CeedCallBackend(CeedBasisSetData(basis, data)); 613 614 // Register backend functions 615 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip)); 616 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip)); 617 CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip)); 618 CeedCallBackend(CeedDestroy(&ceed)); 619 return CEED_ERROR_SUCCESS; 620 } 621 622 //------------------------------------------------------------------------------ 623