xref: /petsc/include/petscdevice_hip.h (revision 84467f862f2de26368b63ea79dccd665bcda30cd)
1 #pragma once
2 
3 #include <petscdevice.h>
4 #include <petscpkg_version.h>
5 
6 #if defined(__HCC__) || (defined(__clang__) && defined(__HIP__))
7   #define PETSC_USING_HCC 1
8 #endif
9 
10 #if PetscDefined(HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 
13   #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
14 
15     // cupmScalarPtrCast() returns hip{Float,Double}Complex while hipBLAS uses hipBlas{Float,Double}Complex, causing many VecCUPM errors like
16     // error: no matching function for call to 'cupmBlasXdot'.
17     // Before rocm-6.0, one can define ROCM_MATHLIBS_API_USE_HIP_COMPLEX to force rocm to 'typedef hipDoubleComplex hipBlasDoubleComplex' for example.
18     // Since then, ROCM_MATHLIBS_API_USE_HIP_COMPLEX is deprecated, and one can define HIPBLAS_V2 to use version 2 of hipBLAS that directly use hipDoubleComplex etc.
19     // Per AMD, HIPBLAS_V2 will be removed in the future so that hipBLAS only provides updated APIs (but not yet in 6.2.2 as of Sep. 27, 2024).
20     //
21     // see https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.0.0/functions.html#complex-datatypes
22     // and https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.2.2/functions.html#hipblas-v2-and-deprecations
23     #if PETSC_PKG_HIP_VERSION_GE(6, 0, 0)
24       #define HIPBLAS_V2
25     #else
26       #define ROCM_MATHLIBS_API_USE_HIP_COMPLEX
27     #endif
28     #include <hipblas/hipblas.h>
29     #include <hipsparse/hipsparse.h>
30   #else
31     #include <hipblas.h>
32     #include <hipsparse.h>
33   #endif
34 
35   #if PETSC_PKG_HIP_VERSION_LT(5, 4, 0)
36     #define HIPSPARSE_ORDER_COL HIPSPARSE_ORDER_COLUMN
37   #endif
38 
39   #if defined(__HIP_PLATFORM_NVCC__)
40     #include <cusolverDn.h>
41   #else // __HIP_PLATFORM_HCC__
42     #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
43       #include <hipsolver/hipsolver.h>
44     #else
45       #include <hipsolver.h>
46     #endif
47   #endif                       // __HIP_PLATFORM_NVCC__
48   #include <hip/hip_complex.h> // for hipComplex, hipDoubleComplex
49 
50   // REMOVE ME
51   #define WaitForHIP() hipDeviceSynchronize()
52 
53 /* hipBLAS, hipSPARSE and hipSolver does not have hip*GetErrorName(). We create one on our own. */
54 PETSC_EXTERN const char *PetscHIPBLASGetErrorName(hipblasStatus_t);     /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */
55 PETSC_EXTERN const char *PetscHIPSPARSEGetErrorName(hipsparseStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSPARSE macro */
56 PETSC_EXTERN const char *PetscHIPSolverGetErrorName(hipsolverStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSOLVER macro */
57 
58   #define PetscCallHIP(...) \
59     do { \
60       const hipError_t _p_hip_err__ = __VA_ARGS__; \
61       if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \
62         const char *name  = hipGetErrorName(_p_hip_err__); \
63         const char *descr = hipGetErrorString(_p_hip_err__); \
64         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hip error %d (%s) : %s", (PetscErrorCode)_p_hip_err__, name, descr); \
65       } \
66     } while (0)
67   #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__)
68 
69   #define PetscHIPCheckLaunch \
70     do { \
71       /* Check synchronous errors, i.e. pre-launch */ \
72       PetscCallHIP(hipGetLastError()); \
73       /* Check asynchronous errors, i.e. kernel failed (ULF) */ \
74       PetscCallHIP(hipDeviceSynchronize()); \
75     } while (0)
76 
77   #define PetscCallHIPBLAS(...) \
78     do { \
79       const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__; \
80       if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \
81         const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \
82         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipBLAS error %d (%s)", (PetscErrorCode)_p_hipblas_stat__, name); \
83       } \
84     } while (0)
85   #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__)
86 
87   #if PETSC_PKG_HIP_VERSION_GE(4, 5, 0)
88     /* HIPSPARSE & HIPSOLVER have better functionality with ROCm-4.5 or newer */
89     #define PetscCallHIPSPARSE(...) \
90       do { \
91         const hipsparseStatus_t _p_hipsparse_stat__ = __VA_ARGS__; \
92         if (PetscUnlikely(_p_hipsparse_stat__ != HIPSPARSE_STATUS_SUCCESS)) { \
93           const char *name = PetscHIPSPARSEGetErrorName(_p_hipsparse_stat__); \
94           PetscCheck((_p_hipsparse_stat__ != HIPSPARSE_STATUS_NOT_INITIALIZED) && (_p_hipsparse_stat__ != HIPSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, "hipSPARSE errorcode %d (%s): Reports not initialized or alloc failed; this indicates the GPU has run out resources", (int)_p_hipsparse_stat__, name); \
95           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSPARSE errorcode %d (%s)", (int)_p_hipsparse_stat__, name); \
96         } \
97       } while (0)
98     #define CHKERRHIPSPARSE(...) PetscCallHIPSPARSE(__VA_ARGS__)
99 
100     #define PetscCallHIPSOLVER(...) \
101       do { \
102         const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \
103         if (PetscUnlikely(_p_hipsolver_stat__ != HIPSOLVER_STATUS_SUCCESS)) { \
104           const char *name = PetscHIPSolverGetErrorName(_p_hipsolver_stat__); \
105           if (((_p_hipsolver_stat__ == HIPSOLVER_STATUS_NOT_INITIALIZED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_ALLOC_FAILED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_HIP)) { \
106             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
107                     "hipSolver error %d (%s). " \
108                     "This indicates the GPU may have run out resources", \
109                     (PetscErrorCode)_p_hipsolver_stat__, name); \
110           } else { \
111             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSolver error %d (%s)", (PetscErrorCode)_p_hipsolver_stat__, name); \
112           } \
113         } \
114       } while (0)
115     #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__)
116 
117   #else /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
118     /* hipSolver does not exist yet so we work around it
119   rocSOLVER users rocBLAS for the handle
120   * */
121     #if defined(__HIP_PLATFORM_NVCC__)
122       #include <cusolverDn.h>
123 typedef cusolverDnHandle_t hipsolverHandle_t;
124 typedef cusolverStatus_t   hipsolverStatus_t;
125 
126 /* Alias hipsolverDestroy to cusolverDnDestroy */
127 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle)
128 {
129   return cusolverDnDestroy(hipsolverhandle);
130 }
131 
132 /* Alias hipsolverCreate to cusolverDnCreate */
133 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
134 {
135   return cusolverDnCreate(hipsolverhandle);
136 }
137 
138 /* Alias hipsolverGetStream to cusolverDnGetStream */
139 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
140 {
141   return cusolverDnGetStream(handle, stream);
142 }
143 
144 /* Alias hipsolverSetStream to cusolverDnSetStream */
145 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
146 {
147   return cusolveDnSetStream(handle, stream);
148 }
149     #else /* __HIP_PLATFORM_HCC__ */
150       #include <rocsolver.h>
151       #include <rocblas.h>
152 typedef rocblas_handle hipsolverHandle_t;
153 typedef rocblas_status hipsolverStatus_t;
154 
155 /* Alias hipsolverDestroy to rocblas_destroy_handle */
156 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle)
157 {
158   return rocblas_destroy_handle(hipsolverhandle);
159 }
160 
161 /* Alias hipsolverCreate to rocblas_destroy_handle */
162 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
163 {
164   return rocblas_create_handle(hipsolverhandle);
165 }
166 
167 // Alias hipsolverGetStream to rocblas_get_stream
168 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
169 {
170   return rocblas_get_stream(handle, stream);
171 }
172 
173 // Alias hipsolverSetStream to rocblas_set_stream
174 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
175 {
176   return rocblas_set_stream(handle, stream);
177 }
178     #endif // __HIP_PLATFORM_NVCC__
179   #endif   /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
180 // REMOVE ME
181 PETSC_EXTERN hipStream_t    PetscDefaultHipStream; // The default stream used by PETSc
182 PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t *);
183 PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t *);
184 PETSC_EXTERN PetscErrorCode PetscGetCurrentHIPStream(hipStream_t *);
185 
186 #endif // PETSC_HAVE_HIP
187 
188 // these can also be defined in petscdevice_cuda.h so we undef and define them *only* if the
189 // current compiler is HCC. In this case if petscdevice_cuda.h is included first, the macros
190 // would already be defined, but they would be empty since we cannot be using NVCC at the same
191 // time.
192 #if PetscDefined(USING_HCC)
193   #undef PETSC_HOST_DECL
194   #undef PETSC_DEVICE_DECL
195   #undef PETSC_KERNEL_DECL
196   #undef PETSC_SHAREDMEM_DECL
197   #undef PETSC_FORCEINLINE
198   #undef PETSC_CONSTMEM_DECL
199 
200   #define PETSC_HOST_DECL      __host__
201   #define PETSC_DEVICE_DECL    __device__
202   #define PETSC_KERNEL_DECL    __global__
203   #define PETSC_SHAREDMEM_DECL __shared__
204   #define PETSC_FORCEINLINE    __forceinline__
205   #define PETSC_CONSTMEM_DECL  __constant__
206 #endif
207 
208 #ifndef PETSC_HOST_DECL // use HOST_DECL as canary
209   #define PETSC_HOST_DECL
210   #define PETSC_DEVICE_DECL
211   #define PETSC_KERNEL_DECL
212   #define PETSC_SHAREDMEM_DECL
213   #define PETSC_FORCEINLINE inline
214   #define PETSC_CONSTMEM_DECL
215 #endif
216 
217 #ifndef PETSC_DEVICE_DEFINED_DECLS_PRIVATE
218   #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE
219   #define PETSC_HOSTDEVICE_DECL        PETSC_HOST_DECL PETSC_DEVICE_DECL
220   #define PETSC_DEVICE_INLINE_DECL     PETSC_DEVICE_DECL PETSC_FORCEINLINE
221   #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE
222 #endif
223