1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 #include <ceed/ceed.h> 9 #include <ceed/backend.h> 10 #include <ceed/jit-tools.h> 11 #include <hip/hip_runtime.h> 12 #include "ceed-hip-ref.h" 13 #include "../hip/ceed-hip-compile.h" 14 15 //------------------------------------------------------------------------------ 16 // Basis apply - tensor 17 //------------------------------------------------------------------------------ 18 int CeedBasisApply_Hip(CeedBasis basis, const CeedInt num_elem, 19 CeedTransposeMode t_mode, 20 CeedEvalMode eval_mode, CeedVector u, CeedVector v) { 21 int ierr; 22 Ceed ceed; 23 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 24 Ceed_Hip *ceed_Hip; 25 ierr = CeedGetData(ceed, &ceed_Hip); CeedChkBackend(ierr); 26 CeedBasis_Hip *data; 27 ierr = CeedBasisGetData(basis, &data); CeedChkBackend(ierr); 28 const CeedInt transpose = t_mode == CEED_TRANSPOSE; 29 const int max_block_size = 64; 30 31 // Read vectors 32 const CeedScalar *d_u; 33 CeedScalar *d_v; 34 if (eval_mode != CEED_EVAL_WEIGHT) { 35 ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u); CeedChkBackend(ierr); 36 } 37 ierr = CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); 38 39 // Clear v for transpose operation 40 if (t_mode == CEED_TRANSPOSE) { 41 CeedSize length; 42 ierr = CeedVectorGetLength(v, &length); CeedChkBackend(ierr); 43 ierr = hipMemset(d_v, 0, length * sizeof(CeedScalar)); 44 CeedChk_Hip(ceed, ierr); 45 } 46 47 // Basis action 48 switch (eval_mode) { 49 case CEED_EVAL_INTERP: { 50 void *interp_args[] = {(void *) &num_elem, (void *) &transpose, 51 &data->d_interp_1d, &d_u, &d_v 52 }; 53 CeedInt Q_1d, dim; 54 ierr = CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d); CeedChkBackend(ierr); 55 ierr = CeedBasisGetDimension(basis, &dim); CeedChkBackend(ierr); 56 CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 57 58 ierr = CeedRunKernelHip(ceed, data->Interp, num_elem, block_size, interp_args); 59 CeedChkBackend(ierr); 60 } break; 61 case CEED_EVAL_GRAD: { 62 void *grad_args[] = {(void *) &num_elem, (void *) &transpose, &data->d_interp_1d, 63 &data->d_grad_1d, &d_u, &d_v 64 }; 65 CeedInt block_size = max_block_size; 66 67 ierr = CeedRunKernelHip(ceed, data->Grad, num_elem, block_size, grad_args); 68 CeedChkBackend(ierr); 69 } break; 70 case CEED_EVAL_WEIGHT: { 71 void *weight_args[] = {(void *) &num_elem, (void *) &data->d_q_weight_1d, &d_v}; 72 const int block_size = 64; 73 int grid_size = num_elem / block_size; 74 if (block_size * grid_size < num_elem) 75 grid_size += 1; 76 77 ierr = CeedRunKernelHip(ceed, data->Weight, grid_size, block_size, 78 weight_args); CeedChkBackend(ierr); 79 } break; 80 // LCOV_EXCL_START 81 // Evaluate the divergence to/from the quadrature points 82 case CEED_EVAL_DIV: 83 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 84 // Evaluate the curl to/from the quadrature points 85 case CEED_EVAL_CURL: 86 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 87 // Take no action, BasisApply should not have been called 88 case CEED_EVAL_NONE: 89 return CeedError(ceed, CEED_ERROR_BACKEND, 90 "CEED_EVAL_NONE does not make sense in this context"); 91 // LCOV_EXCL_STOP 92 } 93 94 // Restore vectors 95 if (eval_mode != CEED_EVAL_WEIGHT) { 96 ierr = CeedVectorRestoreArrayRead(u, &d_u); CeedChkBackend(ierr); 97 } 98 ierr = CeedVectorRestoreArray(v, &d_v); CeedChkBackend(ierr); 99 return CEED_ERROR_SUCCESS; 100 } 101 102 //------------------------------------------------------------------------------ 103 // Basis apply - non-tensor 104 //------------------------------------------------------------------------------ 105 int CeedBasisApplyNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, 106 CeedTransposeMode t_mode, CeedEvalMode eval_mode, 107 CeedVector u, CeedVector v) { 108 int ierr; 109 Ceed ceed; 110 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 111 Ceed_Hip *ceed_Hip; 112 ierr = CeedGetData(ceed, &ceed_Hip); CeedChkBackend(ierr); 113 CeedBasisNonTensor_Hip *data; 114 ierr = CeedBasisGetData(basis, &data); CeedChkBackend(ierr); 115 CeedInt num_nodes, num_qpts; 116 ierr = CeedBasisGetNumQuadraturePoints(basis, &num_qpts); CeedChkBackend(ierr); 117 ierr = CeedBasisGetNumNodes(basis, &num_nodes); CeedChkBackend(ierr); 118 const CeedInt transpose = t_mode == CEED_TRANSPOSE; 119 int elemsPerBlock = 1; 120 int grid = num_elem/elemsPerBlock+(( 121 num_elem/elemsPerBlock*elemsPerBlock<num_elem)?1:0); 122 123 // Read vectors 124 const CeedScalar *d_u; 125 CeedScalar *d_v; 126 if (eval_mode != CEED_EVAL_WEIGHT) { 127 ierr = CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u); CeedChkBackend(ierr); 128 } 129 ierr = CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v); CeedChkBackend(ierr); 130 131 // Clear v for transpose operation 132 if (t_mode == CEED_TRANSPOSE) { 133 CeedSize length; 134 ierr = CeedVectorGetLength(v, &length); CeedChkBackend(ierr); 135 ierr = hipMemset(d_v, 0, length * sizeof(CeedScalar)); 136 CeedChk_Hip(ceed, ierr); 137 } 138 139 // Apply basis operation 140 switch (eval_mode) { 141 case CEED_EVAL_INTERP: { 142 void *interp_args[] = {(void *) &num_elem, (void *) &transpose, 143 &data->d_interp, &d_u, &d_v 144 }; 145 if (transpose) { 146 ierr = CeedRunKernelDimHip(ceed, data->Interp, grid, num_nodes, 1, 147 elemsPerBlock, interp_args); CeedChkBackend(ierr); 148 } else { 149 ierr = CeedRunKernelDimHip(ceed, data->Interp, grid, num_qpts, 1, 150 elemsPerBlock, interp_args); CeedChkBackend(ierr); 151 } 152 } break; 153 case CEED_EVAL_GRAD: { 154 void *grad_args[] = {(void *) &num_elem, (void *) &transpose, &data->d_grad, 155 &d_u, &d_v 156 }; 157 if (transpose) { 158 ierr = CeedRunKernelDimHip(ceed, data->Grad, grid, num_nodes, 1, 159 elemsPerBlock, grad_args); CeedChkBackend(ierr); 160 } else { 161 ierr = CeedRunKernelDimHip(ceed, data->Grad, grid, num_qpts, 1, 162 elemsPerBlock, grad_args); CeedChkBackend(ierr); 163 } 164 } break; 165 case CEED_EVAL_WEIGHT: { 166 void *weight_args[] = {(void *) &num_elem, (void *) &data->d_q_weight, &d_v}; 167 ierr = CeedRunKernelDimHip(ceed, data->Weight, grid, num_qpts, 1, 168 elemsPerBlock, weight_args); CeedChkBackend(ierr); 169 } break; 170 // LCOV_EXCL_START 171 // Evaluate the divergence to/from the quadrature points 172 case CEED_EVAL_DIV: 173 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_DIV not supported"); 174 // Evaluate the curl to/from the quadrature points 175 case CEED_EVAL_CURL: 176 return CeedError(ceed, CEED_ERROR_BACKEND, "CEED_EVAL_CURL not supported"); 177 // Take no action, BasisApply should not have been called 178 case CEED_EVAL_NONE: 179 return CeedError(ceed, CEED_ERROR_BACKEND, 180 "CEED_EVAL_NONE does not make sense in this context"); 181 // LCOV_EXCL_STOP 182 } 183 184 // Restore vectors 185 if (eval_mode != CEED_EVAL_WEIGHT) { 186 ierr = CeedVectorRestoreArrayRead(u, &d_u); CeedChkBackend(ierr); 187 } 188 ierr = CeedVectorRestoreArray(v, &d_v); CeedChkBackend(ierr); 189 return CEED_ERROR_SUCCESS; 190 } 191 192 //------------------------------------------------------------------------------ 193 // Destroy tensor basis 194 //------------------------------------------------------------------------------ 195 static int CeedBasisDestroy_Hip(CeedBasis basis) { 196 int ierr; 197 Ceed ceed; 198 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 199 200 CeedBasis_Hip *data; 201 ierr = CeedBasisGetData(basis, &data); CeedChkBackend(ierr); 202 203 CeedChk_Hip(ceed, hipModuleUnload(data->module)); 204 205 ierr = hipFree(data->d_q_weight_1d); CeedChk_Hip(ceed, ierr); 206 ierr = hipFree(data->d_interp_1d); CeedChk_Hip(ceed, ierr); 207 ierr = hipFree(data->d_grad_1d); CeedChk_Hip(ceed, ierr); 208 ierr = CeedFree(&data); CeedChkBackend(ierr); 209 210 return CEED_ERROR_SUCCESS; 211 } 212 213 //------------------------------------------------------------------------------ 214 // Destroy non-tensor basis 215 //------------------------------------------------------------------------------ 216 static int CeedBasisDestroyNonTensor_Hip(CeedBasis basis) { 217 int ierr; 218 Ceed ceed; 219 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 220 221 CeedBasisNonTensor_Hip *data; 222 ierr = CeedBasisGetData(basis, &data); CeedChkBackend(ierr); 223 224 CeedChk_Hip(ceed, hipModuleUnload(data->module)); 225 226 ierr = hipFree(data->d_q_weight); CeedChk_Hip(ceed, ierr); 227 ierr = hipFree(data->d_interp); CeedChk_Hip(ceed, ierr); 228 ierr = hipFree(data->d_grad); CeedChk_Hip(ceed, ierr); 229 ierr = CeedFree(&data); CeedChkBackend(ierr); 230 231 return CEED_ERROR_SUCCESS; 232 } 233 234 //------------------------------------------------------------------------------ 235 // Create tensor 236 //------------------------------------------------------------------------------ 237 int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, 238 const CeedScalar *interp_1d, 239 const CeedScalar *grad_1d, 240 const CeedScalar *qref1d, 241 const CeedScalar *q_weight_1d, 242 CeedBasis basis) { 243 int ierr; 244 Ceed ceed; 245 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 246 CeedBasis_Hip *data; 247 ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); 248 249 // Copy data to GPU 250 const CeedInt q_bytes = Q_1d * sizeof(CeedScalar); 251 ierr = hipMalloc((void **)&data->d_q_weight_1d, q_bytes); 252 CeedChk_Hip(ceed, ierr); 253 ierr = hipMemcpy(data->d_q_weight_1d, q_weight_1d, q_bytes, 254 hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 255 256 const CeedInt interp_bytes = q_bytes * P_1d; 257 ierr = hipMalloc((void **)&data->d_interp_1d, interp_bytes); 258 CeedChk_Hip(ceed, ierr); 259 ierr = hipMemcpy(data->d_interp_1d, interp_1d, interp_bytes, 260 hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 261 262 ierr = hipMalloc((void **)&data->d_grad_1d, interp_bytes); 263 CeedChk_Hip(ceed, ierr); 264 ierr = hipMemcpy(data->d_grad_1d, grad_1d, interp_bytes, 265 hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 266 267 // Complie basis kernels 268 CeedInt ncomp; 269 ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 270 char *basis_kernel_path, *basis_kernel_source; 271 ierr = CeedGetJitAbsolutePath(ceed, 272 "ceed/jit-source/hip/hip-ref-basis-tensor.h", 273 &basis_kernel_path); CeedChkBackend(ierr); 274 CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n"); 275 ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source); 276 CeedChkBackend(ierr); 277 CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source Complete! -----\n"); 278 ierr = CeedCompileHip(ceed, basis_kernel_source, &data->module, 7, 279 "BASIS_Q_1D", Q_1d, 280 "BASIS_P_1D", P_1d, 281 "BASIS_BUF_LEN", ncomp * CeedIntPow(Q_1d > P_1d ? 282 Q_1d : P_1d, dim), 283 "BASIS_DIM", dim, 284 "BASIS_NUM_COMP", ncomp, 285 "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), 286 "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim) 287 ); CeedChkBackend(ierr); 288 ierr = CeedGetKernelHip(ceed, data->module, "Interp", &data->Interp); 289 CeedChkBackend(ierr); 290 ierr = CeedGetKernelHip(ceed, data->module, "Grad", &data->Grad); 291 CeedChkBackend(ierr); 292 ierr = CeedGetKernelHip(ceed, data->module, "Weight", &data->Weight); 293 CeedChkBackend(ierr); 294 ierr = CeedFree(&basis_kernel_path); CeedChkBackend(ierr); 295 ierr = CeedFree(&basis_kernel_source); CeedChkBackend(ierr); 296 297 ierr = CeedBasisSetData(basis, data); CeedChkBackend(ierr); 298 299 ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 300 CeedBasisApply_Hip); CeedChkBackend(ierr); 301 ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 302 CeedBasisDestroy_Hip); CeedChkBackend(ierr); 303 return CEED_ERROR_SUCCESS; 304 } 305 306 //------------------------------------------------------------------------------ 307 // Create non-tensor 308 //------------------------------------------------------------------------------ 309 int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, 310 CeedInt num_qpts, const CeedScalar *interp, 311 const CeedScalar *grad, const CeedScalar *qref, 312 const CeedScalar *q_weight, CeedBasis basis) { 313 int ierr; 314 Ceed ceed; 315 ierr = CeedBasisGetCeed(basis, &ceed); CeedChkBackend(ierr); 316 CeedBasisNonTensor_Hip *data; 317 ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); 318 319 // Copy basis data to GPU 320 const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 321 ierr = hipMalloc((void **)&data->d_q_weight, q_bytes); CeedChk_Hip(ceed, ierr); 322 ierr = hipMemcpy(data->d_q_weight, q_weight, q_bytes, 323 hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 324 325 const CeedInt interp_bytes = q_bytes * num_nodes; 326 ierr = hipMalloc((void **)&data->d_interp, interp_bytes); 327 CeedChk_Hip(ceed, ierr); 328 ierr = hipMemcpy(data->d_interp, interp, interp_bytes, 329 hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 330 331 const CeedInt grad_bytes = q_bytes * num_nodes * dim; 332 ierr = hipMalloc((void **)&data->d_grad, grad_bytes); CeedChk_Hip(ceed, ierr); 333 ierr = hipMemcpy(data->d_grad, grad, grad_bytes, 334 hipMemcpyHostToDevice); CeedChk_Hip(ceed, ierr); 335 336 // Compile basis kernels 337 CeedInt ncomp; 338 ierr = CeedBasisGetNumComponents(basis, &ncomp); CeedChkBackend(ierr); 339 char *basis_kernel_path, *basis_kernel_source; 340 ierr = CeedGetJitAbsolutePath(ceed, 341 "ceed/jit-source/hip/hip-ref-basis-nontensor.h", 342 &basis_kernel_path); CeedChkBackend(ierr); 343 CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source -----\n"); 344 ierr = CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source); 345 CeedChkBackend(ierr); 346 CeedDebug256(ceed, 2, "----- Loading Basis Kernel Source Complete! -----\n"); 347 ierr = CeedCompileHip(ceed, basis_kernel_source, &data->module, 4, 348 "BASIS_Q", num_qpts, 349 "BASIS_P", num_nodes, 350 "BASIS_DIM", dim, 351 "BASIS_NUM_COMP", ncomp 352 ); CeedChk_Hip(ceed, ierr); 353 ierr = CeedGetKernelHip(ceed, data->module, "Interp", &data->Interp); 354 CeedChk_Hip(ceed, ierr); 355 ierr = CeedGetKernelHip(ceed, data->module, "Grad", &data->Grad); 356 CeedChk_Hip(ceed, ierr); 357 ierr = CeedGetKernelHip(ceed, data->module, "Weight", &data->Weight); 358 CeedChk_Hip(ceed, ierr); 359 ierr = CeedFree(&basis_kernel_path); CeedChkBackend(ierr); 360 ierr = CeedFree(&basis_kernel_source); CeedChkBackend(ierr); 361 362 ierr = CeedBasisSetData(basis, data); CeedChkBackend(ierr); 363 364 // Register backend functions 365 ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Apply", 366 CeedBasisApplyNonTensor_Hip); CeedChkBackend(ierr); 367 ierr = CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", 368 CeedBasisDestroyNonTensor_Hip); CeedChkBackend(ierr); 369 return CEED_ERROR_SUCCESS; 370 } 371 //------------------------------------------------------------------------------ 372