1 #pragma once 2 3 #include <petscdevice.h> 4 #include <petscpkg_version.h> 5 6 #if defined(__HCC__) || (defined(__clang__) && defined(__HIP__)) 7 #define PETSC_USING_HCC 1 8 #endif 9 10 #if PetscDefined(HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 13 #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0) 14 15 // cupmScalarPtrCast() returns hip{Float,Double}Complex while hipBLAS uses hipBlas{Float,Double}Complex, causing many VecCUPM errors like 16 // error: no matching function for call to 'cupmBlasXdot'. 17 // Before rocm-6.0, one can define ROCM_MATHLIBS_API_USE_HIP_COMPLEX to force rocm to 'typedef hipDoubleComplex hipBlasDoubleComplex' for example. 18 // Since then, ROCM_MATHLIBS_API_USE_HIP_COMPLEX is deprecated, and one can define HIPBLAS_V2 to use version 2 of hipBLAS that directly use hipDoubleComplex etc. 19 // Per AMD, HIPBLAS_V2 will be removed in the future so that hipBLAS only provides updated APIs (but not yet in 6.2.2 as of Sep. 27, 2024). 20 // 21 // see https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.0.0/functions.html#complex-datatypes 22 // and https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.2.2/functions.html#hipblas-v2-and-deprecations 23 #if PETSC_PKG_HIP_VERSION_GE(6, 0, 0) 24 #define HIPBLAS_V2 25 #else 26 #define ROCM_MATHLIBS_API_USE_HIP_COMPLEX 27 #endif 28 #include <hipblas/hipblas.h> 29 #include <hipsparse/hipsparse.h> 30 #else 31 #include <hipblas.h> 32 #include <hipsparse.h> 33 #endif 34 35 #if PETSC_PKG_HIP_VERSION_LT(5, 4, 0) 36 #define HIPSPARSE_ORDER_COL HIPSPARSE_ORDER_COLUMN 37 #endif 38 39 #if defined(__HIP_PLATFORM_NVCC__) 40 #include <cusolverDn.h> 41 #else // __HIP_PLATFORM_HCC__ 42 #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0) 43 #include <hipsolver/hipsolver.h> 44 #else 45 #include <hipsolver.h> 46 #endif 47 #endif // __HIP_PLATFORM_NVCC__ 48 #include <hip/hip_complex.h> // for hipComplex, hipDoubleComplex 49 50 // REMOVE ME 51 #define WaitForHIP() hipDeviceSynchronize() 52 53 /* hipBLAS, hipSPARSE and hipSolver does not have hip*GetErrorName(). We create one on our own. */ 54 PETSC_EXTERN const char *PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */ 55 PETSC_EXTERN const char *PetscHIPSPARSEGetErrorName(hipsparseStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSPARSE macro */ 56 PETSC_EXTERN const char *PetscHIPSolverGetErrorName(hipsolverStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSOLVER macro */ 57 58 #define PetscCallHIP(...) \ 59 do { \ 60 const hipError_t _p_hip_err__ = __VA_ARGS__; \ 61 if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \ 62 const char *name = hipGetErrorName(_p_hip_err__); \ 63 const char *descr = hipGetErrorString(_p_hip_err__); \ 64 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hip error %d (%s) : %s", (PetscErrorCode)_p_hip_err__, name, descr); \ 65 } \ 66 } while (0) 67 #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__) 68 69 #define PetscHIPCheckLaunch \ 70 do { \ 71 /* Check synchronous errors, i.e. pre-launch */ \ 72 PetscCallHIP(hipGetLastError()); \ 73 /* Check asynchronous errors, i.e. kernel failed (ULF) */ \ 74 PetscCallHIP(hipDeviceSynchronize()); \ 75 } while (0) 76 77 #define PetscCallHIPBLAS(...) \ 78 do { \ 79 const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__; \ 80 if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \ 81 const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \ 82 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipBLAS error %d (%s)", (PetscErrorCode)_p_hipblas_stat__, name); \ 83 } \ 84 } while (0) 85 #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__) 86 87 #if PETSC_PKG_HIP_VERSION_GE(4, 5, 0) 88 /* HIPSPARSE & HIPSOLVER have better functionality with ROCm-4.5 or newer */ 89 #define PetscCallHIPSPARSE(...) \ 90 do { \ 91 const hipsparseStatus_t _p_hipsparse_stat__ = __VA_ARGS__; \ 92 if (PetscUnlikely(_p_hipsparse_stat__ != HIPSPARSE_STATUS_SUCCESS)) { \ 93 const char *name = PetscHIPSPARSEGetErrorName(_p_hipsparse_stat__); \ 94 PetscCheck((_p_hipsparse_stat__ != HIPSPARSE_STATUS_NOT_INITIALIZED) && (_p_hipsparse_stat__ != HIPSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, "hipSPARSE errorcode %d (%s): Reports not initialized or alloc failed; this indicates the GPU has run out resources", (int)_p_hipsparse_stat__, name); \ 95 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSPARSE errorcode %d (%s)", (int)_p_hipsparse_stat__, name); \ 96 } \ 97 } while (0) 98 #define CHKERRHIPSPARSE(...) PetscCallHIPSPARSE(__VA_ARGS__) 99 100 #define PetscCallHIPSOLVER(...) \ 101 do { \ 102 const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \ 103 if (PetscUnlikely(_p_hipsolver_stat__ != HIPSOLVER_STATUS_SUCCESS)) { \ 104 const char *name = PetscHIPSolverGetErrorName(_p_hipsolver_stat__); \ 105 if (((_p_hipsolver_stat__ == HIPSOLVER_STATUS_NOT_INITIALIZED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_ALLOC_FAILED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_HIP)) { \ 106 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \ 107 "hipSolver error %d (%s). " \ 108 "This indicates the GPU may have run out resources", \ 109 (PetscErrorCode)_p_hipsolver_stat__, name); \ 110 } else { \ 111 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSolver error %d (%s)", (PetscErrorCode)_p_hipsolver_stat__, name); \ 112 } \ 113 } \ 114 } while (0) 115 #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__) 116 117 #else /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */ 118 /* hipSolver does not exist yet so we work around it 119 rocSOLVER users rocBLAS for the handle 120 * */ 121 #if defined(__HIP_PLATFORM_NVCC__) 122 #include <cusolverDn.h> 123 typedef cusolverDnHandle_t hipsolverHandle_t; 124 typedef cusolverStatus_t hipsolverStatus_t; 125 126 /* Alias hipsolverDestroy to cusolverDnDestroy */ 127 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle) 128 { 129 return cusolverDnDestroy(hipsolverhandle); 130 } 131 132 /* Alias hipsolverCreate to cusolverDnCreate */ 133 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 134 { 135 return cusolverDnCreate(hipsolverhandle); 136 } 137 138 /* Alias hipsolverGetStream to cusolverDnGetStream */ 139 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 140 { 141 return cusolverDnGetStream(handle, stream); 142 } 143 144 /* Alias hipsolverSetStream to cusolverDnSetStream */ 145 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 146 { 147 return cusolveDnSetStream(handle, stream); 148 } 149 #else /* __HIP_PLATFORM_HCC__ */ 150 #include <rocsolver.h> 151 #include <rocblas.h> 152 typedef rocblas_handle hipsolverHandle_t; 153 typedef rocblas_status hipsolverStatus_t; 154 155 /* Alias hipsolverDestroy to rocblas_destroy_handle */ 156 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle) 157 { 158 return rocblas_destroy_handle(hipsolverhandle); 159 } 160 161 /* Alias hipsolverCreate to rocblas_destroy_handle */ 162 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 163 { 164 return rocblas_create_handle(hipsolverhandle); 165 } 166 167 // Alias hipsolverGetStream to rocblas_get_stream 168 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 169 { 170 return rocblas_get_stream(handle, stream); 171 } 172 173 // Alias hipsolverSetStream to rocblas_set_stream 174 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 175 { 176 return rocblas_set_stream(handle, stream); 177 } 178 #endif // __HIP_PLATFORM_NVCC__ 179 #endif /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */ 180 // REMOVE ME 181 PETSC_EXTERN hipStream_t PetscDefaultHipStream; // The default stream used by PETSc 182 PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t *); 183 PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t *); 184 PETSC_EXTERN PetscErrorCode PetscGetCurrentHIPStream(hipStream_t *); 185 186 #endif // PETSC_HAVE_HIP 187 188 // these can also be defined in petscdevice_cuda.h so we undef and define them *only* if the 189 // current compiler is HCC. In this case if petscdevice_cuda.h is included first, the macros 190 // would already be defined, but they would be empty since we cannot be using NVCC at the same 191 // time. 192 #if PetscDefined(USING_HCC) 193 #undef PETSC_HOST_DECL 194 #undef PETSC_DEVICE_DECL 195 #undef PETSC_KERNEL_DECL 196 #undef PETSC_SHAREDMEM_DECL 197 #undef PETSC_FORCEINLINE 198 #undef PETSC_CONSTMEM_DECL 199 200 #define PETSC_HOST_DECL __host__ 201 #define PETSC_DEVICE_DECL __device__ 202 #define PETSC_KERNEL_DECL __global__ 203 #define PETSC_SHAREDMEM_DECL __shared__ 204 #define PETSC_FORCEINLINE __forceinline__ 205 #define PETSC_CONSTMEM_DECL __constant__ 206 #endif 207 208 #ifndef PETSC_HOST_DECL // use HOST_DECL as canary 209 #define PETSC_HOST_DECL 210 #define PETSC_DEVICE_DECL 211 #define PETSC_KERNEL_DECL 212 #define PETSC_SHAREDMEM_DECL 213 #define PETSC_FORCEINLINE inline 214 #define PETSC_CONSTMEM_DECL 215 #endif 216 217 #ifndef PETSC_DEVICE_DEFINED_DECLS_PRIVATE 218 #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE 219 #define PETSC_HOSTDEVICE_DECL PETSC_HOST_DECL PETSC_DEVICE_DECL 220 #define PETSC_DEVICE_INLINE_DECL PETSC_DEVICE_DECL PETSC_FORCEINLINE 221 #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE 222 #endif 223