1 #pragma once 2 3 #include <petscdevice.h> 4 #include <petscpkg_version.h> 5 6 #if defined(__HCC__) || (defined(__clang__) && defined(__HIP__)) 7 #define PETSC_USING_HCC 1 8 #endif 9 10 #if PetscDefined(HAVE_HIP) 11 #include <hip/hip_runtime.h> 12 13 #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0) 14 #include <hipblas/hipblas.h> 15 #include <hipsparse/hipsparse.h> 16 #else 17 #include <hipblas.h> 18 #include <hipsparse.h> 19 #endif 20 21 #if PETSC_PKG_HIP_VERSION_LT(5, 4, 0) 22 #define HIPSPARSE_ORDER_COL HIPSPARSE_ORDER_COLUMN 23 #endif 24 25 #if defined(__HIP_PLATFORM_NVCC__) 26 #include <cusolverDn.h> 27 #else // __HIP_PLATFORM_HCC__ 28 #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0) 29 #include <hipsolver/hipsolver.h> 30 #else 31 #include <hipsolver.h> 32 #endif 33 #endif // __HIP_PLATFORM_NVCC__ 34 #include <hip/hip_complex.h> // for hipComplex, hipDoubleComplex 35 36 // REMOVE ME 37 #define WaitForHIP() hipDeviceSynchronize() 38 39 /* hipBLAS, hipSPARSE and hipSolver does not have hip*GetErrorName(). We create one on our own. */ 40 PETSC_EXTERN const char *PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */ 41 PETSC_EXTERN const char *PetscHIPSPARSEGetErrorName(hipsparseStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSPARSE macro */ 42 PETSC_EXTERN const char *PetscHIPSolverGetErrorName(hipsolverStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSOLVER macro */ 43 44 #define PetscCallHIP(...) \ 45 do { \ 46 const hipError_t _p_hip_err__ = __VA_ARGS__; \ 47 if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \ 48 const char *name = hipGetErrorName(_p_hip_err__); \ 49 const char *descr = hipGetErrorString(_p_hip_err__); \ 50 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hip error %d (%s) : %s", (PetscErrorCode)_p_hip_err__, name, descr); \ 51 } \ 52 } while (0) 53 #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__) 54 55 #define PetscHIPCheckLaunch \ 56 do { \ 57 /* Check synchronous errors, i.e. pre-launch */ \ 58 PetscCallHIP(hipGetLastError()); \ 59 /* Check asynchronous errors, i.e. kernel failed (ULF) */ \ 60 PetscCallHIP(hipDeviceSynchronize()); \ 61 } while (0) 62 63 #define PetscCallHIPBLAS(...) \ 64 do { \ 65 const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__; \ 66 if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \ 67 const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \ 68 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipBLAS error %d (%s)", (PetscErrorCode)_p_hipblas_stat__, name); \ 69 } \ 70 } while (0) 71 #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__) 72 73 #if PETSC_PKG_HIP_VERSION_GE(4, 5, 0) 74 /* HIPSPARSE & HIPSOLVER have better functionality with ROCm-4.5 or newer */ 75 #define PetscCallHIPSPARSE(...) \ 76 do { \ 77 const hipsparseStatus_t _p_hipsparse_stat__ = __VA_ARGS__; \ 78 if (PetscUnlikely(_p_hipsparse_stat__ != HIPSPARSE_STATUS_SUCCESS)) { \ 79 const char *name = PetscHIPSPARSEGetErrorName(_p_hipsparse_stat__); \ 80 PetscCheck((_p_hipsparse_stat__ != HIPSPARSE_STATUS_NOT_INITIALIZED) && (_p_hipsparse_stat__ != HIPSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, "hipSPARSE errorcode %d (%s): Reports not initialized or alloc failed; this indicates the GPU has run out resources", (int)_p_hipsparse_stat__, name); \ 81 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSPARSE errorcode %d (%s)", (int)_p_hipsparse_stat__, name); \ 82 } \ 83 } while (0) 84 #define CHKERRHIPSPARSE(...) PetscCallHIPSPARSE(__VA_ARGS__) 85 86 #define PetscCallHIPSOLVER(...) \ 87 do { \ 88 const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \ 89 if (PetscUnlikely(_p_hipsolver_stat__ != HIPSOLVER_STATUS_SUCCESS)) { \ 90 const char *name = PetscHIPSolverGetErrorName(_p_hipsolver_stat__); \ 91 if (((_p_hipsolver_stat__ == HIPSOLVER_STATUS_NOT_INITIALIZED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_ALLOC_FAILED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_HIP)) { \ 92 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \ 93 "hipSolver error %d (%s). " \ 94 "This indicates the GPU may have run out resources", \ 95 (PetscErrorCode)_p_hipsolver_stat__, name); \ 96 } else { \ 97 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSolver error %d (%s)", (PetscErrorCode)_p_hipsolver_stat__, name); \ 98 } \ 99 } \ 100 } while (0) 101 #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__) 102 103 #else /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */ 104 /* hipSolver does not exist yet so we work around it 105 rocSOLVER users rocBLAS for the handle 106 * */ 107 #if defined(__HIP_PLATFORM_NVCC__) 108 #include <cusolverDn.h> 109 typedef cusolverDnHandle_t hipsolverHandle_t; 110 typedef cusolverStatus_t hipsolverStatus_t; 111 112 /* Alias hipsolverDestroy to cusolverDnDestroy */ 113 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle) 114 { 115 return cusolverDnDestroy(hipsolverhandle); 116 } 117 118 /* Alias hipsolverCreate to cusolverDnCreate */ 119 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 120 { 121 return cusolverDnCreate(hipsolverhandle); 122 } 123 124 /* Alias hipsolverGetStream to cusolverDnGetStream */ 125 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 126 { 127 return cusolverDnGetStream(handle, stream); 128 } 129 130 /* Alias hipsolverSetStream to cusolverDnSetStream */ 131 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 132 { 133 return cusolveDnSetStream(handle, stream); 134 } 135 #else /* __HIP_PLATFORM_HCC__ */ 136 #include <rocsolver.h> 137 #include <rocblas.h> 138 typedef rocblas_handle hipsolverHandle_t; 139 typedef rocblas_status hipsolverStatus_t; 140 141 /* Alias hipsolverDestroy to rocblas_destroy_handle */ 142 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle) 143 { 144 return rocblas_destroy_handle(hipsolverhandle); 145 } 146 147 /* Alias hipsolverCreate to rocblas_destroy_handle */ 148 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 149 { 150 return rocblas_create_handle(hipsolverhandle); 151 } 152 153 // Alias hipsolverGetStream to rocblas_get_stream 154 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 155 { 156 return rocblas_get_stream(handle, stream); 157 } 158 159 // Alias hipsolverSetStream to rocblas_set_stream 160 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 161 { 162 return rocblas_set_stream(handle, stream); 163 } 164 #endif // __HIP_PLATFORM_NVCC__ 165 #endif /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */ 166 // REMOVE ME 167 PETSC_EXTERN hipStream_t PetscDefaultHipStream; // The default stream used by PETSc 168 PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t *); 169 PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t *); 170 PETSC_EXTERN PetscErrorCode PetscGetCurrentHIPStream(hipStream_t *); 171 172 #endif // PETSC_HAVE_HIP 173 174 // these can also be defined in petscdevice_cuda.h so we undef and define them *only* if the 175 // current compiler is HCC. In this case if petscdevice_cuda.h is included first, the macros 176 // would already be defined, but they would be empty since we cannot be using NVCC at the same 177 // time. 178 #if PetscDefined(USING_HCC) 179 #undef PETSC_HOST_DECL 180 #undef PETSC_DEVICE_DECL 181 #undef PETSC_KERNEL_DECL 182 #undef PETSC_SHAREDMEM_DECL 183 #undef PETSC_FORCEINLINE 184 #undef PETSC_CONSTMEM_DECL 185 186 #define PETSC_HOST_DECL __host__ 187 #define PETSC_DEVICE_DECL __device__ 188 #define PETSC_KERNEL_DECL __global__ 189 #define PETSC_SHAREDMEM_DECL __shared__ 190 #define PETSC_FORCEINLINE __forceinline__ 191 #define PETSC_CONSTMEM_DECL __constant__ 192 #endif 193 194 #ifndef PETSC_HOST_DECL // use HOST_DECL as canary 195 #define PETSC_HOST_DECL 196 #define PETSC_DEVICE_DECL 197 #define PETSC_KERNEL_DECL 198 #define PETSC_SHAREDMEM_DECL 199 #define PETSC_FORCEINLINE inline 200 #define PETSC_CONSTMEM_DECL 201 #endif 202 203 #ifndef PETSC_DEVICE_DEFINED_DECLS_PRIVATE 204 #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE 205 #define PETSC_HOSTDEVICE_DECL PETSC_HOST_DECL PETSC_DEVICE_DECL 206 #define PETSC_DEVICE_INLINE_DECL PETSC_DEVICE_DECL PETSC_FORCEINLINE 207 #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE 208 #endif 209