xref: /petsc/include/petscdevice_hip.h (revision fbf9dbe564678ed6eff1806adbc4c4f01b9743f4)
1 #ifndef PETSCDEVICE_HIP_H
2 #define PETSCDEVICE_HIP_H
3 
4 #include <petscdevice.h>
5 #include <petscpkg_version.h>
6 
7 #if defined(__HCC__) || (defined(__clang__) && defined(__HIP__))
8   #define PETSC_USING_HCC 1
9 #endif
10 
11 #if PetscDefined(HAVE_HIP)
12   #include <hip/hip_runtime.h>
13 
14   #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
15     #include <hipblas/hipblas.h>
16     #include <hipsparse/hipsparse.h>
17   #else
18     #include <hipblas.h>
19     #include <hipsparse.h>
20   #endif
21 
22   #if PETSC_PKG_HIP_VERSION_LT(5, 4, 0)
23     #define HIPSPARSE_ORDER_COL HIPSPARSE_ORDER_COLUMN
24   #endif
25 
26   #if defined(__HIP_PLATFORM_NVCC__)
27     #include <cusolverDn.h>
28   #else // __HIP_PLATFORM_HCC__
29     #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
30       #include <hipsolver/hipsolver.h>
31     #else
32       #include <hipsolver.h>
33     #endif
34   #endif                       // __HIP_PLATFORM_NVCC__
35   #include <hip/hip_complex.h> // for hipComplex, hipDoubleComplex
36 
37   // REMOVE ME
38   #define WaitForHIP() hipDeviceSynchronize()
39 
40 /* hipBLAS, hipSPARSE and hipSolver does not have hip*GetErrorName(). We create one on our own. */
41 PETSC_EXTERN const char *PetscHIPBLASGetErrorName(hipblasStatus_t);     /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */
42 PETSC_EXTERN const char *PetscHIPSPARSEGetErrorName(hipsparseStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSPARSE macro */
43 PETSC_EXTERN const char *PetscHIPSolverGetErrorName(hipsolverStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSOLVER macro */
44 
45   #define PetscCallHIP(...) \
46     do { \
47       const hipError_t _p_hip_err__ = __VA_ARGS__; \
48       if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \
49         const char *name  = hipGetErrorName(_p_hip_err__); \
50         const char *descr = hipGetErrorString(_p_hip_err__); \
51         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hip error %d (%s) : %s", (PetscErrorCode)_p_hip_err__, name, descr); \
52       } \
53     } while (0)
54   #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__)
55 
56   #define PetscHIPCheckLaunch \
57     do { \
58       /* Check synchronous errors, i.e. pre-launch */ \
59       PetscCallHIP(hipGetLastError()); \
60       /* Check asynchronous errors, i.e. kernel failed (ULF) */ \
61       PetscCallHIP(hipDeviceSynchronize()); \
62     } while (0)
63 
64   #define PetscCallHIPBLAS(...) \
65     do { \
66       const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__; \
67       if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \
68         const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \
69         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipBLAS error %d (%s)", (PetscErrorCode)_p_hipblas_stat__, name); \
70       } \
71     } while (0)
72   #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__)
73 
74   #if PETSC_PKG_HIP_VERSION_GE(4, 5, 0)
75     /* HIPSPARSE & HIPSOLVER have better functionality with ROCm-4.5 or newer */
76     #define PetscCallHIPSPARSE(...) \
77       do { \
78         const hipsparseStatus_t _p_hipsparse_stat__ = __VA_ARGS__; \
79         if (PetscUnlikely(_p_hipsparse_stat__ != HIPSPARSE_STATUS_SUCCESS)) { \
80           const char *name = PetscHIPSPARSEGetErrorName(_p_hipsparse_stat__); \
81           PetscCheck((_p_hipsparse_stat__ != HIPSPARSE_STATUS_NOT_INITIALIZED) && (_p_hipsparse_stat__ != HIPSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, "hipSPARSE errorcode %d (%s): Reports not initialized or alloc failed; this indicates the GPU has run out resources", (int)_p_hipsparse_stat__, name); \
82           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSPARSE errorcode %d (%s)", (int)_p_hipsparse_stat__, name); \
83         } \
84       } while (0)
85     #define CHKERRHIPSPARSE(...) PetscCallHIPSPARSE(__VA_ARGS__)
86 
87     #define PetscCallHIPSOLVER(...) \
88       do { \
89         const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \
90         if (PetscUnlikely(_p_hipsolver_stat__ != HIPSOLVER_STATUS_SUCCESS)) { \
91           const char *name = PetscHIPSolverGetErrorName(_p_hipsolver_stat__); \
92           if (((_p_hipsolver_stat__ == HIPSOLVER_STATUS_NOT_INITIALIZED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_ALLOC_FAILED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_HIP)) { \
93             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
94                     "hipSolver error %d (%s). " \
95                     "This indicates the GPU may have run out resources", \
96                     (PetscErrorCode)_p_hipsolver_stat__, name); \
97           } else { \
98             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSolver error %d (%s)", (PetscErrorCode)_p_hipsolver_stat__, name); \
99           } \
100         } \
101       } while (0)
102     #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__)
103 
104   #else /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
105     /* hipSolver does not exist yet so we work around it
106   rocSOLVER users rocBLAS for the handle
107   * */
108     #if defined(__HIP_PLATFORM_NVCC__)
109       #include <cusolverDn.h>
110 typedef cusolverDnHandle_t hipsolverHandle_t;
111 typedef cusolverStatus_t   hipsolverStatus_t;
112 
113 /* Alias hipsolverDestroy to cusolverDnDestroy */
114 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle)
115 {
116   return cusolverDnDestroy(hipsolverhandle);
117 }
118 
119 /* Alias hipsolverCreate to cusolverDnCreate */
120 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
121 {
122   return cusolverDnCreate(hipsolverhandle);
123 }
124 
125 /* Alias hipsolverGetStream to cusolverDnGetStream */
126 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
127 {
128   return cusolverDnGetStream(handle, stream);
129 }
130 
131 /* Alias hipsolverSetStream to cusolverDnSetStream */
132 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
133 {
134   return cusolveDnSetStream(handle, stream);
135 }
136     #else /* __HIP_PLATFORM_HCC__ */
137       #include <rocsolver.h>
138       #include <rocblas.h>
139 typedef rocblas_handle hipsolverHandle_t;
140 typedef rocblas_status hipsolverStatus_t;
141 
142 /* Alias hipsolverDestroy to rocblas_destroy_handle */
143 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle)
144 {
145   return rocblas_destroy_handle(hipsolverhandle);
146 }
147 
148 /* Alias hipsolverCreate to rocblas_destroy_handle */
149 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
150 {
151   return rocblas_create_handle(hipsolverhandle);
152 }
153 
154 // Alias hipsolverGetStream to rocblas_get_stream
155 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
156 {
157   return rocblas_get_stream(handle, stream);
158 }
159 
160 // Alias hipsolverSetStream to rocblas_set_stream
161 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
162 {
163   return rocblas_set_stream(handle, stream);
164 }
165     #endif // __HIP_PLATFORM_NVCC__
166   #endif   /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
167 // REMOVE ME
168 PETSC_EXTERN hipStream_t    PetscDefaultHipStream; // The default stream used by PETSc
169 PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t *);
170 PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t *);
171 
172 #endif // PETSC_HAVE_HIP
173 
174 // these can also be defined in petscdevice_cuda.h so we undef and define them *only* if the
175 // current compiler is HCC. In this case if petscdevice_cuda.h is included first, the macros
176 // would already be defined, but they would be empty since we cannot be using NVCC at the same
177 // time.
178 #if PetscDefined(USING_HCC)
179   #undef PETSC_HOST_DECL
180   #undef PETSC_DEVICE_DECL
181   #undef PETSC_KERNEL_DECL
182   #undef PETSC_SHAREDMEM_DECL
183   #undef PETSC_FORCEINLINE
184   #undef PETSC_CONSTMEM_DECL
185 
186   #define PETSC_HOST_DECL      __host__
187   #define PETSC_DEVICE_DECL    __device__
188   #define PETSC_KERNEL_DECL    __global__
189   #define PETSC_SHAREDMEM_DECL __shared__
190   #define PETSC_FORCEINLINE    __forceinline__
191   #define PETSC_CONSTMEM_DECL  __constant__
192 #endif
193 
194 #ifndef PETSC_HOST_DECL // use HOST_DECL as canary
195   #define PETSC_HOST_DECL
196   #define PETSC_DEVICE_DECL
197   #define PETSC_KERNEL_DECL
198   #define PETSC_SHAREDMEM_DECL
199   #define PETSC_FORCEINLINE inline
200   #define PETSC_CONSTMEM_DECL
201 #endif
202 
203 #ifndef PETSC_DEVICE_DEFINED_DECLS_PRIVATE
204   #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE
205   #define PETSC_HOSTDEVICE_DECL        PETSC_HOST_DECL PETSC_DEVICE_DECL
206   #define PETSC_DEVICE_INLINE_DECL     PETSC_DEVICE_DECL PETSC_FORCEINLINE
207   #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE
208 #endif
209 
210 #endif // PETSCDEVICE_HIP_H
211