1 #pragma once 2 3 #include <petscdevice.h> 4 #include <petscpkg_version.h> 5 6 /* MANSEC = Sys */ 7 /* SUBMANSEC = Device */ 8 9 #if defined(__HCC__) || (defined(__clang__) && defined(__HIP__)) 10 #define PETSC_USING_HCC 1 11 #endif 12 13 #if PetscDefined(HAVE_HIP) 14 #include <hip/hip_runtime.h> 15 16 #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0) 17 18 // cupmScalarPtrCast() returns hip{Float,Double}Complex while hipBLAS uses hipBlas{Float,Double}Complex, causing many VecCUPM errors like 19 // error: no matching function for call to 'cupmBlasXdot'. 20 // Before rocm-6.0, one can define ROCM_MATHLIBS_API_USE_HIP_COMPLEX to force rocm to 'typedef hipDoubleComplex hipBlasDoubleComplex' for example. 21 // Since then, ROCM_MATHLIBS_API_USE_HIP_COMPLEX is deprecated, and one can define HIPBLAS_V2 to use version 2 of hipBLAS that directly use hipDoubleComplex etc. 22 // Per AMD, HIPBLAS_V2 will be removed in the future so that hipBLAS only provides updated APIs (but not yet in 6.2.2 as of Sep. 27, 2024). 23 // 24 // see https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.0.0/functions.html#complex-datatypes 25 // and https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.2.2/functions.html#hipblas-v2-and-deprecations 26 #if PETSC_PKG_HIP_VERSION_GE(6, 0, 0) 27 #define HIPBLAS_V2 28 #else 29 #define ROCM_MATHLIBS_API_USE_HIP_COMPLEX 30 #endif 31 #include <hipblas/hipblas.h> 32 #include <hipsparse/hipsparse.h> 33 #else 34 #include <hipblas.h> 35 #include <hipsparse.h> 36 #endif 37 38 #if PETSC_PKG_HIP_VERSION_LT(5, 4, 0) 39 #define HIPSPARSE_ORDER_COL HIPSPARSE_ORDER_COLUMN 40 #endif 41 42 #if defined(__HIP_PLATFORM_NVCC__) 43 #include <cusolverDn.h> 44 #else // __HIP_PLATFORM_HCC__ 45 #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0) 46 #include <hipsolver/hipsolver.h> 47 #else 48 #include <hipsolver.h> 49 #endif 50 #endif // __HIP_PLATFORM_NVCC__ 51 #include <hip/hip_complex.h> // for hipComplex, hipDoubleComplex 52 53 // REMOVE ME 54 #define WaitForHIP() hipDeviceSynchronize() 55 56 /* hipBLAS, hipSPARSE and hipSolver does not have hip*GetErrorName(). We create one on our own. */ 57 PETSC_EXTERN const char *PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */ 58 PETSC_EXTERN const char *PetscHIPSPARSEGetErrorName(hipsparseStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSPARSE macro */ 59 PETSC_EXTERN const char *PetscHIPSolverGetErrorName(hipsolverStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSOLVER macro */ 60 61 #define PetscCallHIP(...) \ 62 do { \ 63 const hipError_t _p_hip_err__ = __VA_ARGS__; \ 64 if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \ 65 const char *name = hipGetErrorName(_p_hip_err__); \ 66 const char *descr = hipGetErrorString(_p_hip_err__); \ 67 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hip error %d (%s) : %s", (PetscErrorCode)_p_hip_err__, name, descr); \ 68 } \ 69 } while (0) 70 #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__) 71 72 #define PetscHIPCheckLaunch \ 73 do { \ 74 /* Check synchronous errors, i.e. pre-launch */ \ 75 PetscCallHIP(hipGetLastError()); \ 76 /* Check asynchronous errors, i.e. kernel failed (ULF) */ \ 77 PetscCallHIP(hipDeviceSynchronize()); \ 78 } while (0) 79 80 #define PetscCallHIPBLAS(...) \ 81 do { \ 82 const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__; \ 83 if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \ 84 const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \ 85 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipBLAS error %d (%s)", (PetscErrorCode)_p_hipblas_stat__, name); \ 86 } \ 87 } while (0) 88 #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__) 89 90 #if PETSC_PKG_HIP_VERSION_GE(4, 5, 0) 91 /* HIPSPARSE & HIPSOLVER have better functionality with ROCm-4.5 or newer */ 92 #define PetscCallHIPSPARSE(...) \ 93 do { \ 94 const hipsparseStatus_t _p_hipsparse_stat__ = __VA_ARGS__; \ 95 if (PetscUnlikely(_p_hipsparse_stat__ != HIPSPARSE_STATUS_SUCCESS)) { \ 96 const char *name = PetscHIPSPARSEGetErrorName(_p_hipsparse_stat__); \ 97 PetscCheck((_p_hipsparse_stat__ != HIPSPARSE_STATUS_NOT_INITIALIZED) && (_p_hipsparse_stat__ != HIPSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, "hipSPARSE errorcode %d (%s): Reports not initialized or alloc failed; this indicates the GPU has run out resources", (int)_p_hipsparse_stat__, name); \ 98 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSPARSE errorcode %d (%s)", (int)_p_hipsparse_stat__, name); \ 99 } \ 100 } while (0) 101 #define CHKERRHIPSPARSE(...) PetscCallHIPSPARSE(__VA_ARGS__) 102 103 #define PetscCallHIPSOLVER(...) \ 104 do { \ 105 const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \ 106 if (PetscUnlikely(_p_hipsolver_stat__ != HIPSOLVER_STATUS_SUCCESS)) { \ 107 const char *name = PetscHIPSolverGetErrorName(_p_hipsolver_stat__); \ 108 if (((_p_hipsolver_stat__ == HIPSOLVER_STATUS_NOT_INITIALIZED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_ALLOC_FAILED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_HIP)) { \ 109 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \ 110 "hipSolver error %d (%s). " \ 111 "This indicates the GPU may have run out resources", \ 112 (PetscErrorCode)_p_hipsolver_stat__, name); \ 113 } else { \ 114 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSolver error %d (%s)", (PetscErrorCode)_p_hipsolver_stat__, name); \ 115 } \ 116 } \ 117 } while (0) 118 #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__) 119 120 #else /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */ 121 /* hipSolver does not exist yet so we work around it 122 rocSOLVER users rocBLAS for the handle 123 * */ 124 #if defined(__HIP_PLATFORM_NVCC__) 125 #include <cusolverDn.h> 126 typedef cusolverDnHandle_t hipsolverHandle_t; 127 typedef cusolverStatus_t hipsolverStatus_t; 128 129 /* Alias hipsolverDestroy to cusolverDnDestroy */ 130 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle) 131 { 132 return cusolverDnDestroy(hipsolverhandle); 133 } 134 135 /* Alias hipsolverCreate to cusolverDnCreate */ 136 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 137 { 138 return cusolverDnCreate(hipsolverhandle); 139 } 140 141 /* Alias hipsolverGetStream to cusolverDnGetStream */ 142 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 143 { 144 return cusolverDnGetStream(handle, stream); 145 } 146 147 /* Alias hipsolverSetStream to cusolverDnSetStream */ 148 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 149 { 150 return cusolveDnSetStream(handle, stream); 151 } 152 #else /* __HIP_PLATFORM_HCC__ */ 153 #include <rocsolver.h> 154 #include <rocblas.h> 155 typedef rocblas_handle hipsolverHandle_t; 156 typedef rocblas_status hipsolverStatus_t; 157 158 /* Alias hipsolverDestroy to rocblas_destroy_handle */ 159 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle) 160 { 161 return rocblas_destroy_handle(hipsolverhandle); 162 } 163 164 /* Alias hipsolverCreate to rocblas_destroy_handle */ 165 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 166 { 167 return rocblas_create_handle(hipsolverhandle); 168 } 169 170 // Alias hipsolverGetStream to rocblas_get_stream 171 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 172 { 173 return rocblas_get_stream(handle, stream); 174 } 175 176 // Alias hipsolverSetStream to rocblas_set_stream 177 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 178 { 179 return rocblas_set_stream(handle, stream); 180 } 181 #endif // __HIP_PLATFORM_NVCC__ 182 #endif /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */ 183 // REMOVE ME 184 PETSC_EXTERN hipStream_t PetscDefaultHipStream; // The default stream used by PETSc 185 PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t *); 186 PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t *); 187 PETSC_EXTERN PetscErrorCode PetscGetCurrentHIPStream(hipStream_t *); 188 189 #endif // PETSC_HAVE_HIP 190 191 // these can also be defined in petscdevice_cuda.h so we undef and define them *only* if the 192 // current compiler is HCC. In this case if petscdevice_cuda.h is included first, the macros 193 // would already be defined, but they would be empty since we cannot be using NVCC at the same 194 // time. 195 #if PetscDefined(USING_HCC) 196 #undef PETSC_HOST_DECL 197 #undef PETSC_DEVICE_DECL 198 #undef PETSC_KERNEL_DECL 199 #undef PETSC_SHAREDMEM_DECL 200 #undef PETSC_FORCEINLINE 201 #undef PETSC_CONSTMEM_DECL 202 203 #define PETSC_HOST_DECL __host__ 204 #define PETSC_DEVICE_DECL __device__ 205 #define PETSC_KERNEL_DECL __global__ 206 #define PETSC_SHAREDMEM_DECL __shared__ 207 #define PETSC_FORCEINLINE __forceinline__ 208 #define PETSC_CONSTMEM_DECL __constant__ 209 #endif 210 211 #if !defined(PETSC_HOST_DECL) // use HOST_DECL as canary 212 #define PETSC_HOST_DECL 213 #define PETSC_DEVICE_DECL 214 #define PETSC_KERNEL_DECL 215 #define PETSC_SHAREDMEM_DECL 216 #define PETSC_FORCEINLINE inline 217 #define PETSC_CONSTMEM_DECL 218 #endif 219 220 #if !defined(PETSC_DEVICE_DEFINED_DECLS_PRIVATE) 221 #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE 222 #define PETSC_HOSTDEVICE_DECL PETSC_HOST_DECL PETSC_DEVICE_DECL 223 #define PETSC_DEVICE_INLINE_DECL PETSC_DEVICE_DECL PETSC_FORCEINLINE 224 #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE 225 #endif 226