1 #ifndef PETSCDEVICE_HIP_H 2 #define PETSCDEVICE_HIP_H 3 4 #include <petscdevice.h> 5 #include <petscpkg_version.h> 6 7 #if defined(__HCC__) || (defined(__clang__) && defined(__HIP__)) 8 #define PETSC_USING_HCC 1 9 #endif 10 11 #if PetscDefined(HAVE_HIP) 12 #include <hip/hip_runtime.h> 13 14 #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0) 15 #include <hipblas/hipblas.h> 16 #include <hipsparse/hipsparse.h> 17 #else 18 #include <hipblas.h> 19 #include <hipsparse.h> 20 #endif 21 22 #if PETSC_PKG_HIP_VERSION_LT(5, 4, 0) 23 #define HIPSPARSE_ORDER_COL HIPSPARSE_ORDER_COLUMN 24 #endif 25 26 #if defined(__HIP_PLATFORM_NVCC__) 27 #include <cusolverDn.h> 28 #else // __HIP_PLATFORM_HCC__ 29 #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0) 30 #include <hipsolver/hipsolver.h> 31 #else 32 #include <hipsolver.h> 33 #endif 34 #endif // __HIP_PLATFORM_NVCC__ 35 #include <hip/hip_complex.h> // for hipComplex, hipDoubleComplex 36 37 // REMOVE ME 38 #define WaitForHIP() hipDeviceSynchronize() 39 40 /* hipBLAS, hipSPARSE and hipSolver does not have hip*GetErrorName(). We create one on our own. */ 41 PETSC_EXTERN const char *PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */ 42 PETSC_EXTERN const char *PetscHIPSPARSEGetErrorName(hipsparseStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSPARSE macro */ 43 PETSC_EXTERN const char *PetscHIPSolverGetErrorName(hipsolverStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSOLVER macro */ 44 45 #define PetscCallHIP(...) \ 46 do { \ 47 const hipError_t _p_hip_err__ = __VA_ARGS__; \ 48 if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \ 49 const char *name = hipGetErrorName(_p_hip_err__); \ 50 const char *descr = hipGetErrorString(_p_hip_err__); \ 51 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hip error %d (%s) : %s", (PetscErrorCode)_p_hip_err__, name, descr); \ 52 } \ 53 } while (0) 54 #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__) 55 56 #define PetscHIPCheckLaunch \ 57 do { \ 58 /* Check synchronous errors, i.e. pre-launch */ \ 59 PetscCallHIP(hipGetLastError()); \ 60 /* Check asynchronous errors, i.e. kernel failed (ULF) */ \ 61 PetscCallHIP(hipDeviceSynchronize()); \ 62 } while (0) 63 64 #define PetscCallHIPBLAS(...) \ 65 do { \ 66 const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__; \ 67 if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \ 68 const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \ 69 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipBLAS error %d (%s)", (PetscErrorCode)_p_hipblas_stat__, name); \ 70 } \ 71 } while (0) 72 #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__) 73 74 #if PETSC_PKG_HIP_VERSION_GE(4, 5, 0) 75 /* HIPSPARSE & HIPSOLVER have better functionality with ROCm-4.5 or newer */ 76 #define PetscCallHIPSPARSE(...) \ 77 do { \ 78 const hipsparseStatus_t _p_hipsparse_stat__ = __VA_ARGS__; \ 79 if (PetscUnlikely(_p_hipsparse_stat__ != HIPSPARSE_STATUS_SUCCESS)) { \ 80 const char *name = PetscHIPSPARSEGetErrorName(_p_hipsparse_stat__); \ 81 PetscCheck((_p_hipsparse_stat__ != HIPSPARSE_STATUS_NOT_INITIALIZED) && (_p_hipsparse_stat__ != HIPSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, "hipSPARSE errorcode %d (%s): Reports not initialized or alloc failed; this indicates the GPU has run out resources", (int)_p_hipsparse_stat__, name); \ 82 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSPARSE errorcode %d (%s)", (int)_p_hipsparse_stat__, name); \ 83 } \ 84 } while (0) 85 #define CHKERRHIPSPARSE(...) PetscCallHIPSPARSE(__VA_ARGS__) 86 87 #define PetscCallHIPSOLVER(...) \ 88 do { \ 89 const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \ 90 if (PetscUnlikely(_p_hipsolver_stat__ != HIPSOLVER_STATUS_SUCCESS)) { \ 91 const char *name = PetscHIPSolverGetErrorName(_p_hipsolver_stat__); \ 92 if (((_p_hipsolver_stat__ == HIPSOLVER_STATUS_NOT_INITIALIZED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_ALLOC_FAILED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_HIP)) { \ 93 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \ 94 "hipSolver error %d (%s). " \ 95 "This indicates the GPU may have run out resources", \ 96 (PetscErrorCode)_p_hipsolver_stat__, name); \ 97 } else { \ 98 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSolver error %d (%s)", (PetscErrorCode)_p_hipsolver_stat__, name); \ 99 } \ 100 } \ 101 } while (0) 102 #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__) 103 104 #else /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */ 105 /* hipSolver does not exist yet so we work around it 106 rocSOLVER users rocBLAS for the handle 107 * */ 108 #if defined(__HIP_PLATFORM_NVCC__) 109 #include <cusolverDn.h> 110 typedef cusolverDnHandle_t hipsolverHandle_t; 111 typedef cusolverStatus_t hipsolverStatus_t; 112 113 /* Alias hipsolverDestroy to cusolverDnDestroy */ 114 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle) 115 { 116 return cusolverDnDestroy(hipsolverhandle); 117 } 118 119 /* Alias hipsolverCreate to cusolverDnCreate */ 120 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 121 { 122 return cusolverDnCreate(hipsolverhandle); 123 } 124 125 /* Alias hipsolverGetStream to cusolverDnGetStream */ 126 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 127 { 128 return cusolverDnGetStream(handle, stream); 129 } 130 131 /* Alias hipsolverSetStream to cusolverDnSetStream */ 132 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 133 { 134 return cusolveDnSetStream(handle, stream); 135 } 136 #else /* __HIP_PLATFORM_HCC__ */ 137 #include <rocsolver.h> 138 #include <rocblas.h> 139 typedef rocblas_handle hipsolverHandle_t; 140 typedef rocblas_status hipsolverStatus_t; 141 142 /* Alias hipsolverDestroy to rocblas_destroy_handle */ 143 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle) 144 { 145 return rocblas_destroy_handle(hipsolverhandle); 146 } 147 148 /* Alias hipsolverCreate to rocblas_destroy_handle */ 149 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 150 { 151 return rocblas_create_handle(hipsolverhandle); 152 } 153 154 // Alias hipsolverGetStream to rocblas_get_stream 155 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 156 { 157 return rocblas_get_stream(handle, stream); 158 } 159 160 // Alias hipsolverSetStream to rocblas_set_stream 161 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 162 { 163 return rocblas_set_stream(handle, stream); 164 } 165 #endif // __HIP_PLATFORM_NVCC__ 166 #endif /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */ 167 // REMOVE ME 168 PETSC_EXTERN hipStream_t PetscDefaultHipStream; // The default stream used by PETSc 169 PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t *); 170 PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t *); 171 172 #endif // PETSC_HAVE_HIP 173 174 // these can also be defined in petscdevice_cuda.h so we undef and define them *only* if the 175 // current compiler is HCC. In this case if petscdevice_cuda.h is included first, the macros 176 // would already be defined, but they would be empty since we cannot be using NVCC at the same 177 // time. 178 #if PetscDefined(USING_HCC) 179 #undef PETSC_HOST_DECL 180 #undef PETSC_DEVICE_DECL 181 #undef PETSC_KERNEL_DECL 182 #undef PETSC_SHAREDMEM_DECL 183 #undef PETSC_FORCEINLINE 184 #undef PETSC_CONSTMEM_DECL 185 186 #define PETSC_HOST_DECL __host__ 187 #define PETSC_DEVICE_DECL __device__ 188 #define PETSC_KERNEL_DECL __global__ 189 #define PETSC_SHAREDMEM_DECL __shared__ 190 #define PETSC_FORCEINLINE __forceinline__ 191 #define PETSC_CONSTMEM_DECL __constant__ 192 #endif 193 194 #ifndef PETSC_HOST_DECL // use HOST_DECL as canary 195 #define PETSC_HOST_DECL 196 #define PETSC_DEVICE_DECL 197 #define PETSC_KERNEL_DECL 198 #define PETSC_SHAREDMEM_DECL 199 #define PETSC_FORCEINLINE inline 200 #define PETSC_CONSTMEM_DECL 201 #endif 202 203 #ifndef PETSC_DEVICE_DEFINED_DECLS_PRIVATE 204 #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE 205 #define PETSC_HOSTDEVICE_DECL PETSC_HOST_DECL PETSC_DEVICE_DECL 206 #define PETSC_DEVICE_INLINE_DECL PETSC_DEVICE_DECL PETSC_FORCEINLINE 207 #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE 208 #endif 209 210 #endif // PETSCDEVICE_HIP_H 211