xref: /petsc/include/petscdevice_hip.h (revision 9d47de495d3c23378050c1b4a410c12a375cb6c6)
1 #pragma once
2 
3 #include <petscdevice.h>
4 #include <petscpkg_version.h>
5 
6 /* MANSEC = Sys */
7 /* SUBMANSEC = Device */
8 
9 #if defined(__HCC__) || (defined(__clang__) && defined(__HIP__))
10   #define PETSC_USING_HCC 1
11 #endif
12 
13 #if PetscDefined(HAVE_HIP)
14   #include <hip/hip_runtime.h>
15 
16   #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
17 
18     // cupmScalarPtrCast() returns hip{Float,Double}Complex while hipBLAS uses hipBlas{Float,Double}Complex, causing many VecCUPM errors like
19     // error: no matching function for call to 'cupmBlasXdot'.
20     // Before rocm-6.0, one can define ROCM_MATHLIBS_API_USE_HIP_COMPLEX to force rocm to 'typedef hipDoubleComplex hipBlasDoubleComplex' for example.
21     // Since then, ROCM_MATHLIBS_API_USE_HIP_COMPLEX is deprecated, and one can define HIPBLAS_V2 to use version 2 of hipBLAS that directly use hipDoubleComplex etc.
22     // Per AMD, HIPBLAS_V2 will be removed in the future so that hipBLAS only provides updated APIs (but not yet in 6.2.2 as of Sep. 27, 2024).
23     //
24     // see https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.0.0/functions.html#complex-datatypes
25     // and https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.2.2/functions.html#hipblas-v2-and-deprecations
26     #if PETSC_PKG_HIP_VERSION_GE(6, 0, 0)
27       #define HIPBLAS_V2
28     #else
29       #define ROCM_MATHLIBS_API_USE_HIP_COMPLEX
30     #endif
31     #include <hipblas/hipblas.h>
32     #include <hipsparse/hipsparse.h>
33   #else
34     #include <hipblas.h>
35     #include <hipsparse.h>
36   #endif
37 
38   #if PETSC_PKG_HIP_VERSION_LT(5, 4, 0)
39     #define HIPSPARSE_ORDER_COL HIPSPARSE_ORDER_COLUMN
40   #endif
41 
42   #if defined(__HIP_PLATFORM_NVCC__)
43     #include <cusolverDn.h>
44   #else // __HIP_PLATFORM_HCC__
45     #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
46       #include <hipsolver/hipsolver.h>
47     #else
48       #include <hipsolver.h>
49     #endif
50   #endif                       // __HIP_PLATFORM_NVCC__
51   #include <hip/hip_complex.h> // for hipComplex, hipDoubleComplex
52 
53   // REMOVE ME
54   #define WaitForHIP() hipDeviceSynchronize()
55 
56 /* hipBLAS, hipSPARSE and hipSolver does not have hip*GetErrorName(). We create one on our own. */
57 PETSC_EXTERN const char *PetscHIPBLASGetErrorName(hipblasStatus_t);     /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */
58 PETSC_EXTERN const char *PetscHIPSPARSEGetErrorName(hipsparseStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSPARSE macro */
59 PETSC_EXTERN const char *PetscHIPSolverGetErrorName(hipsolverStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSOLVER macro */
60 
61   #define PetscCallHIP(...) \
62     do { \
63       const hipError_t _p_hip_err__ = __VA_ARGS__; \
64       if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \
65         const char *name  = hipGetErrorName(_p_hip_err__); \
66         const char *descr = hipGetErrorString(_p_hip_err__); \
67         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hip error %d (%s) : %s", (PetscErrorCode)_p_hip_err__, name, descr); \
68       } \
69     } while (0)
70   #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__)
71 
72   #define PetscHIPCheckLaunch \
73     do { \
74       /* Check synchronous errors, i.e. pre-launch */ \
75       PetscCallHIP(hipGetLastError()); \
76       /* Check asynchronous errors, i.e. kernel failed (ULF) */ \
77       PetscCallHIP(hipDeviceSynchronize()); \
78     } while (0)
79 
80   #define PetscCallHIPBLAS(...) \
81     do { \
82       const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__; \
83       if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \
84         const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \
85         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipBLAS error %d (%s)", (PetscErrorCode)_p_hipblas_stat__, name); \
86       } \
87     } while (0)
88   #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__)
89 
90   #if PETSC_PKG_HIP_VERSION_GE(4, 5, 0)
91     /* HIPSPARSE & HIPSOLVER have better functionality with ROCm-4.5 or newer */
92     #define PetscCallHIPSPARSE(...) \
93       do { \
94         const hipsparseStatus_t _p_hipsparse_stat__ = __VA_ARGS__; \
95         if (PetscUnlikely(_p_hipsparse_stat__ != HIPSPARSE_STATUS_SUCCESS)) { \
96           const char *name = PetscHIPSPARSEGetErrorName(_p_hipsparse_stat__); \
97           PetscCheck((_p_hipsparse_stat__ != HIPSPARSE_STATUS_NOT_INITIALIZED) && (_p_hipsparse_stat__ != HIPSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, "hipSPARSE errorcode %d (%s): Reports not initialized or alloc failed; this indicates the GPU has run out resources", (int)_p_hipsparse_stat__, name); \
98           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSPARSE errorcode %d (%s)", (int)_p_hipsparse_stat__, name); \
99         } \
100       } while (0)
101     #define CHKERRHIPSPARSE(...) PetscCallHIPSPARSE(__VA_ARGS__)
102 
103     #define PetscCallHIPSOLVER(...) \
104       do { \
105         const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \
106         if (PetscUnlikely(_p_hipsolver_stat__ != HIPSOLVER_STATUS_SUCCESS)) { \
107           const char *name = PetscHIPSolverGetErrorName(_p_hipsolver_stat__); \
108           if (((_p_hipsolver_stat__ == HIPSOLVER_STATUS_NOT_INITIALIZED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_ALLOC_FAILED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_HIP)) { \
109             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
110                     "hipSolver error %d (%s). " \
111                     "This indicates the GPU may have run out resources", \
112                     (PetscErrorCode)_p_hipsolver_stat__, name); \
113           } else { \
114             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSolver error %d (%s)", (PetscErrorCode)_p_hipsolver_stat__, name); \
115           } \
116         } \
117       } while (0)
118     #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__)
119 
120   #else /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
121     /* hipSolver does not exist yet so we work around it
122   rocSOLVER users rocBLAS for the handle
123   * */
124     #if defined(__HIP_PLATFORM_NVCC__)
125       #include <cusolverDn.h>
126 typedef cusolverDnHandle_t hipsolverHandle_t;
127 typedef cusolverStatus_t   hipsolverStatus_t;
128 
129 /* Alias hipsolverDestroy to cusolverDnDestroy */
hipsolverDestroy(hipsolverHandle_t * hipsolverhandle)130 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle)
131 {
132   return cusolverDnDestroy(hipsolverhandle);
133 }
134 
135 /* Alias hipsolverCreate to cusolverDnCreate */
hipsolverCreate(hipsolverHandle_t * hipsolverhandle)136 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
137 {
138   return cusolverDnCreate(hipsolverhandle);
139 }
140 
141 /* Alias hipsolverGetStream to cusolverDnGetStream */
hipsolverGetStream(hipsolverHandle_t handle,hipStream_t * stream)142 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
143 {
144   return cusolverDnGetStream(handle, stream);
145 }
146 
147 /* Alias hipsolverSetStream to cusolverDnSetStream */
hipsolverSetStream(hipsolverHandle_t handle,hipStream_t stream)148 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
149 {
150   return cusolveDnSetStream(handle, stream);
151 }
152     #else /* __HIP_PLATFORM_HCC__ */
153       #include <rocsolver.h>
154       #include <rocblas.h>
155 typedef rocblas_handle hipsolverHandle_t;
156 typedef rocblas_status hipsolverStatus_t;
157 
158 /* Alias hipsolverDestroy to rocblas_destroy_handle */
hipsolverDestroy(hipsolverHandle_t hipsolverhandle)159 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle)
160 {
161   return rocblas_destroy_handle(hipsolverhandle);
162 }
163 
164 /* Alias hipsolverCreate to rocblas_destroy_handle */
hipsolverCreate(hipsolverHandle_t * hipsolverhandle)165 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
166 {
167   return rocblas_create_handle(hipsolverhandle);
168 }
169 
170 // Alias hipsolverGetStream to rocblas_get_stream
hipsolverGetStream(hipsolverHandle_t handle,hipStream_t * stream)171 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
172 {
173   return rocblas_get_stream(handle, stream);
174 }
175 
176 // Alias hipsolverSetStream to rocblas_set_stream
hipsolverSetStream(hipsolverHandle_t handle,hipStream_t stream)177 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
178 {
179   return rocblas_set_stream(handle, stream);
180 }
181     #endif // __HIP_PLATFORM_NVCC__
182   #endif   /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
183 // REMOVE ME
184 PETSC_EXTERN hipStream_t    PetscDefaultHipStream; // The default stream used by PETSc
185 PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t *);
186 PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t *);
187 PETSC_EXTERN PetscErrorCode PetscGetCurrentHIPStream(hipStream_t *);
188 
189 #endif // PETSC_HAVE_HIP
190 
191 // these can also be defined in petscdevice_cuda.h so we undef and define them *only* if the
192 // current compiler is HCC. In this case if petscdevice_cuda.h is included first, the macros
193 // would already be defined, but they would be empty since we cannot be using NVCC at the same
194 // time.
195 #if PetscDefined(USING_HCC)
196   #undef PETSC_HOST_DECL
197   #undef PETSC_DEVICE_DECL
198   #undef PETSC_KERNEL_DECL
199   #undef PETSC_SHAREDMEM_DECL
200   #undef PETSC_FORCEINLINE
201   #undef PETSC_CONSTMEM_DECL
202 
203   #define PETSC_HOST_DECL      __host__
204   #define PETSC_DEVICE_DECL    __device__
205   #define PETSC_KERNEL_DECL    __global__
206   #define PETSC_SHAREDMEM_DECL __shared__
207   #define PETSC_FORCEINLINE    __forceinline__
208   #define PETSC_CONSTMEM_DECL  __constant__
209 #endif
210 
211 #if !defined(PETSC_HOST_DECL) // use HOST_DECL as canary
212   #define PETSC_HOST_DECL
213   #define PETSC_DEVICE_DECL
214   #define PETSC_KERNEL_DECL
215   #define PETSC_SHAREDMEM_DECL
216   #define PETSC_FORCEINLINE inline
217   #define PETSC_CONSTMEM_DECL
218 #endif
219 
220 #if !defined(PETSC_DEVICE_DEFINED_DECLS_PRIVATE)
221   #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE
222   #define PETSC_HOSTDEVICE_DECL        PETSC_HOST_DECL PETSC_DEVICE_DECL
223   #define PETSC_DEVICE_INLINE_DECL     PETSC_DEVICE_DECL PETSC_FORCEINLINE
224   #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE
225 #endif
226