1a4963045SJacob Faibussowitsch #pragma once
20e6b6b59SJacob Faibussowitsch
30e6b6b59SJacob Faibussowitsch #include <petscdevice.h>
40e6b6b59SJacob Faibussowitsch #include <petscpkg_version.h>
50e6b6b59SJacob Faibussowitsch
6ce78bad3SBarry Smith /* MANSEC = Sys */
7ce78bad3SBarry Smith /* SUBMANSEC = Device */
8ce78bad3SBarry Smith
90e6b6b59SJacob Faibussowitsch #if defined(__HCC__) || (defined(__clang__) && defined(__HIP__))
100e6b6b59SJacob Faibussowitsch #define PETSC_USING_HCC 1
110e6b6b59SJacob Faibussowitsch #endif
120e6b6b59SJacob Faibussowitsch
130e6b6b59SJacob Faibussowitsch #if PetscDefined(HAVE_HIP)
140e6b6b59SJacob Faibussowitsch #include <hip/hip_runtime.h>
150e6b6b59SJacob Faibussowitsch
160e6b6b59SJacob Faibussowitsch #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
17731341e5SJunchao Zhang
18731341e5SJunchao Zhang // cupmScalarPtrCast() returns hip{Float,Double}Complex while hipBLAS uses hipBlas{Float,Double}Complex, causing many VecCUPM errors like
19731341e5SJunchao Zhang // error: no matching function for call to 'cupmBlasXdot'.
20731341e5SJunchao Zhang // Before rocm-6.0, one can define ROCM_MATHLIBS_API_USE_HIP_COMPLEX to force rocm to 'typedef hipDoubleComplex hipBlasDoubleComplex' for example.
21731341e5SJunchao Zhang // Since then, ROCM_MATHLIBS_API_USE_HIP_COMPLEX is deprecated, and one can define HIPBLAS_V2 to use version 2 of hipBLAS that directly use hipDoubleComplex etc.
22731341e5SJunchao Zhang // Per AMD, HIPBLAS_V2 will be removed in the future so that hipBLAS only provides updated APIs (but not yet in 6.2.2 as of Sep. 27, 2024).
23731341e5SJunchao Zhang //
24731341e5SJunchao Zhang // see https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.0.0/functions.html#complex-datatypes
25731341e5SJunchao Zhang // and https://rocm.docs.amd.com/projects/hipBLAS/en/docs-6.2.2/functions.html#hipblas-v2-and-deprecations
26731341e5SJunchao Zhang #if PETSC_PKG_HIP_VERSION_GE(6, 0, 0)
27731341e5SJunchao Zhang #define HIPBLAS_V2
28731341e5SJunchao Zhang #else
29731341e5SJunchao Zhang #define ROCM_MATHLIBS_API_USE_HIP_COMPLEX
30731341e5SJunchao Zhang #endif
310e6b6b59SJacob Faibussowitsch #include <hipblas/hipblas.h>
3247d993e7Ssuyashtn #include <hipsparse/hipsparse.h>
330e6b6b59SJacob Faibussowitsch #else
340e6b6b59SJacob Faibussowitsch #include <hipblas.h>
3547d993e7Ssuyashtn #include <hipsparse.h>
360e6b6b59SJacob Faibussowitsch #endif
370e6b6b59SJacob Faibussowitsch
38c0d63f2fSJustin Chang #if PETSC_PKG_HIP_VERSION_LT(5, 4, 0)
39c0d63f2fSJustin Chang #define HIPSPARSE_ORDER_COL HIPSPARSE_ORDER_COLUMN
40c0d63f2fSJustin Chang #endif
41c0d63f2fSJustin Chang
420e6b6b59SJacob Faibussowitsch #if defined(__HIP_PLATFORM_NVCC__)
430e6b6b59SJacob Faibussowitsch #include <cusolverDn.h>
440e6b6b59SJacob Faibussowitsch #else // __HIP_PLATFORM_HCC__
450e6b6b59SJacob Faibussowitsch #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
4647d993e7Ssuyashtn #include <hipsolver/hipsolver.h>
470e6b6b59SJacob Faibussowitsch #else
4847d993e7Ssuyashtn #include <hipsolver.h>
490e6b6b59SJacob Faibussowitsch #endif
500e6b6b59SJacob Faibussowitsch #endif // __HIP_PLATFORM_NVCC__
510e6b6b59SJacob Faibussowitsch #include <hip/hip_complex.h> // for hipComplex, hipDoubleComplex
520e6b6b59SJacob Faibussowitsch
530e6b6b59SJacob Faibussowitsch // REMOVE ME
540e6b6b59SJacob Faibussowitsch #define WaitForHIP() hipDeviceSynchronize()
550e6b6b59SJacob Faibussowitsch
5647d993e7Ssuyashtn /* hipBLAS, hipSPARSE and hipSolver does not have hip*GetErrorName(). We create one on our own. */
570e6b6b59SJacob Faibussowitsch PETSC_EXTERN const char *PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */
5847d993e7Ssuyashtn PETSC_EXTERN const char *PetscHIPSPARSEGetErrorName(hipsparseStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSPARSE macro */
5947d993e7Ssuyashtn PETSC_EXTERN const char *PetscHIPSolverGetErrorName(hipsolverStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPSOLVER macro */
600e6b6b59SJacob Faibussowitsch
610e6b6b59SJacob Faibussowitsch #define PetscCallHIP(...) \
620e6b6b59SJacob Faibussowitsch do { \
630e6b6b59SJacob Faibussowitsch const hipError_t _p_hip_err__ = __VA_ARGS__; \
640e6b6b59SJacob Faibussowitsch if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \
650e6b6b59SJacob Faibussowitsch const char *name = hipGetErrorName(_p_hip_err__); \
660e6b6b59SJacob Faibussowitsch const char *descr = hipGetErrorString(_p_hip_err__); \
670e6b6b59SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hip error %d (%s) : %s", (PetscErrorCode)_p_hip_err__, name, descr); \
680e6b6b59SJacob Faibussowitsch } \
690e6b6b59SJacob Faibussowitsch } while (0)
700e6b6b59SJacob Faibussowitsch #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__)
710e6b6b59SJacob Faibussowitsch
7247d993e7Ssuyashtn #define PetscHIPCheckLaunch \
7347d993e7Ssuyashtn do { \
7447d993e7Ssuyashtn /* Check synchronous errors, i.e. pre-launch */ \
7547d993e7Ssuyashtn PetscCallHIP(hipGetLastError()); \
7647d993e7Ssuyashtn /* Check asynchronous errors, i.e. kernel failed (ULF) */ \
7747d993e7Ssuyashtn PetscCallHIP(hipDeviceSynchronize()); \
7847d993e7Ssuyashtn } while (0)
7947d993e7Ssuyashtn
800e6b6b59SJacob Faibussowitsch #define PetscCallHIPBLAS(...) \
810e6b6b59SJacob Faibussowitsch do { \
820e6b6b59SJacob Faibussowitsch const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__; \
830e6b6b59SJacob Faibussowitsch if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \
840e6b6b59SJacob Faibussowitsch const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \
850e6b6b59SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipBLAS error %d (%s)", (PetscErrorCode)_p_hipblas_stat__, name); \
860e6b6b59SJacob Faibussowitsch } \
870e6b6b59SJacob Faibussowitsch } while (0)
880e6b6b59SJacob Faibussowitsch #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__)
890e6b6b59SJacob Faibussowitsch
9047d993e7Ssuyashtn #if PETSC_PKG_HIP_VERSION_GE(4, 5, 0)
9147d993e7Ssuyashtn /* HIPSPARSE & HIPSOLVER have better functionality with ROCm-4.5 or newer */
9247d993e7Ssuyashtn #define PetscCallHIPSPARSE(...) \
9347d993e7Ssuyashtn do { \
9447d993e7Ssuyashtn const hipsparseStatus_t _p_hipsparse_stat__ = __VA_ARGS__; \
9547d993e7Ssuyashtn if (PetscUnlikely(_p_hipsparse_stat__ != HIPSPARSE_STATUS_SUCCESS)) { \
9647d993e7Ssuyashtn const char *name = PetscHIPSPARSEGetErrorName(_p_hipsparse_stat__); \
9747d993e7Ssuyashtn PetscCheck((_p_hipsparse_stat__ != HIPSPARSE_STATUS_NOT_INITIALIZED) && (_p_hipsparse_stat__ != HIPSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, "hipSPARSE errorcode %d (%s): Reports not initialized or alloc failed; this indicates the GPU has run out resources", (int)_p_hipsparse_stat__, name); \
9847d993e7Ssuyashtn SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSPARSE errorcode %d (%s)", (int)_p_hipsparse_stat__, name); \
9947d993e7Ssuyashtn } \
10047d993e7Ssuyashtn } while (0)
10147d993e7Ssuyashtn #define CHKERRHIPSPARSE(...) PetscCallHIPSPARSE(__VA_ARGS__)
10247d993e7Ssuyashtn
1030e6b6b59SJacob Faibussowitsch #define PetscCallHIPSOLVER(...) \
1040e6b6b59SJacob Faibussowitsch do { \
1050e6b6b59SJacob Faibussowitsch const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \
10647d993e7Ssuyashtn if (PetscUnlikely(_p_hipsolver_stat__ != HIPSOLVER_STATUS_SUCCESS)) { \
10747d993e7Ssuyashtn const char *name = PetscHIPSolverGetErrorName(_p_hipsolver_stat__); \
10847d993e7Ssuyashtn if (((_p_hipsolver_stat__ == HIPSOLVER_STATUS_NOT_INITIALIZED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_ALLOC_FAILED) || (_p_hipsolver_stat__ == HIPSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_HIP)) { \
10947d993e7Ssuyashtn SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
11047d993e7Ssuyashtn "hipSolver error %d (%s). " \
11147d993e7Ssuyashtn "This indicates the GPU may have run out resources", \
11247d993e7Ssuyashtn (PetscErrorCode)_p_hipsolver_stat__, name); \
11347d993e7Ssuyashtn } else { \
11447d993e7Ssuyashtn SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipSolver error %d (%s)", (PetscErrorCode)_p_hipsolver_stat__, name); \
11547d993e7Ssuyashtn } \
11647d993e7Ssuyashtn } \
1170e6b6b59SJacob Faibussowitsch } while (0)
1180e6b6b59SJacob Faibussowitsch #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__)
1190e6b6b59SJacob Faibussowitsch
12047d993e7Ssuyashtn #else /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
1210e6b6b59SJacob Faibussowitsch /* hipSolver does not exist yet so we work around it
1220e6b6b59SJacob Faibussowitsch rocSOLVER users rocBLAS for the handle
1230e6b6b59SJacob Faibussowitsch * */
1240e6b6b59SJacob Faibussowitsch #if defined(__HIP_PLATFORM_NVCC__)
12547d993e7Ssuyashtn #include <cusolverDn.h>
1260e6b6b59SJacob Faibussowitsch typedef cusolverDnHandle_t hipsolverHandle_t;
1270e6b6b59SJacob Faibussowitsch typedef cusolverStatus_t hipsolverStatus_t;
1280e6b6b59SJacob Faibussowitsch
1290e6b6b59SJacob Faibussowitsch /* Alias hipsolverDestroy to cusolverDnDestroy */
hipsolverDestroy(hipsolverHandle_t * hipsolverhandle)130d71ae5a4SJacob Faibussowitsch static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle)
131d71ae5a4SJacob Faibussowitsch {
1320e6b6b59SJacob Faibussowitsch return cusolverDnDestroy(hipsolverhandle);
1330e6b6b59SJacob Faibussowitsch }
1340e6b6b59SJacob Faibussowitsch
1350e6b6b59SJacob Faibussowitsch /* Alias hipsolverCreate to cusolverDnCreate */
hipsolverCreate(hipsolverHandle_t * hipsolverhandle)136d71ae5a4SJacob Faibussowitsch static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
137d71ae5a4SJacob Faibussowitsch {
1380e6b6b59SJacob Faibussowitsch return cusolverDnCreate(hipsolverhandle);
1390e6b6b59SJacob Faibussowitsch }
1400e6b6b59SJacob Faibussowitsch
1410e6b6b59SJacob Faibussowitsch /* Alias hipsolverGetStream to cusolverDnGetStream */
hipsolverGetStream(hipsolverHandle_t handle,hipStream_t * stream)142d71ae5a4SJacob Faibussowitsch static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
143d71ae5a4SJacob Faibussowitsch {
1440e6b6b59SJacob Faibussowitsch return cusolverDnGetStream(handle, stream);
1450e6b6b59SJacob Faibussowitsch }
1460e6b6b59SJacob Faibussowitsch
1470e6b6b59SJacob Faibussowitsch /* Alias hipsolverSetStream to cusolverDnSetStream */
hipsolverSetStream(hipsolverHandle_t handle,hipStream_t stream)148d71ae5a4SJacob Faibussowitsch static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
149d71ae5a4SJacob Faibussowitsch {
1500e6b6b59SJacob Faibussowitsch return cusolveDnSetStream(handle, stream);
1510e6b6b59SJacob Faibussowitsch }
1520e6b6b59SJacob Faibussowitsch #else /* __HIP_PLATFORM_HCC__ */
15347d993e7Ssuyashtn #include <rocsolver.h>
15447d993e7Ssuyashtn #include <rocblas.h>
1550e6b6b59SJacob Faibussowitsch typedef rocblas_handle hipsolverHandle_t;
1560e6b6b59SJacob Faibussowitsch typedef rocblas_status hipsolverStatus_t;
1570e6b6b59SJacob Faibussowitsch
1580e6b6b59SJacob Faibussowitsch /* Alias hipsolverDestroy to rocblas_destroy_handle */
hipsolverDestroy(hipsolverHandle_t hipsolverhandle)159d71ae5a4SJacob Faibussowitsch static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle)
160d71ae5a4SJacob Faibussowitsch {
1610e6b6b59SJacob Faibussowitsch return rocblas_destroy_handle(hipsolverhandle);
1620e6b6b59SJacob Faibussowitsch }
1630e6b6b59SJacob Faibussowitsch
1640e6b6b59SJacob Faibussowitsch /* Alias hipsolverCreate to rocblas_destroy_handle */
hipsolverCreate(hipsolverHandle_t * hipsolverhandle)165d71ae5a4SJacob Faibussowitsch static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
166d71ae5a4SJacob Faibussowitsch {
1670e6b6b59SJacob Faibussowitsch return rocblas_create_handle(hipsolverhandle);
1680e6b6b59SJacob Faibussowitsch }
1690e6b6b59SJacob Faibussowitsch
1700e6b6b59SJacob Faibussowitsch // Alias hipsolverGetStream to rocblas_get_stream
hipsolverGetStream(hipsolverHandle_t handle,hipStream_t * stream)171d71ae5a4SJacob Faibussowitsch static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
172d71ae5a4SJacob Faibussowitsch {
1730e6b6b59SJacob Faibussowitsch return rocblas_get_stream(handle, stream);
1740e6b6b59SJacob Faibussowitsch }
1750e6b6b59SJacob Faibussowitsch
1760e6b6b59SJacob Faibussowitsch // Alias hipsolverSetStream to rocblas_set_stream
hipsolverSetStream(hipsolverHandle_t handle,hipStream_t stream)177d71ae5a4SJacob Faibussowitsch static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
178d71ae5a4SJacob Faibussowitsch {
1790e6b6b59SJacob Faibussowitsch return rocblas_set_stream(handle, stream);
1800e6b6b59SJacob Faibussowitsch }
1810e6b6b59SJacob Faibussowitsch #endif // __HIP_PLATFORM_NVCC__
18247d993e7Ssuyashtn #endif /* PETSC_PKG_HIP_VERSION_GE(4,5,0) */
1830e6b6b59SJacob Faibussowitsch // REMOVE ME
1840e6b6b59SJacob Faibussowitsch PETSC_EXTERN hipStream_t PetscDefaultHipStream; // The default stream used by PETSc
1850e6b6b59SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t *);
1860e6b6b59SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t *);
1875c127019SJunchao Zhang PETSC_EXTERN PetscErrorCode PetscGetCurrentHIPStream(hipStream_t *);
1880e6b6b59SJacob Faibussowitsch
1890e6b6b59SJacob Faibussowitsch #endif // PETSC_HAVE_HIP
1900e6b6b59SJacob Faibussowitsch
19115af11aaSJacob Faibussowitsch // these can also be defined in petscdevice_cuda.h so we undef and define them *only* if the
19215af11aaSJacob Faibussowitsch // current compiler is HCC. In this case if petscdevice_cuda.h is included first, the macros
19315af11aaSJacob Faibussowitsch // would already be defined, but they would be empty since we cannot be using NVCC at the same
19415af11aaSJacob Faibussowitsch // time.
1950e6b6b59SJacob Faibussowitsch #if PetscDefined(USING_HCC)
19615af11aaSJacob Faibussowitsch #undef PETSC_HOST_DECL
19715af11aaSJacob Faibussowitsch #undef PETSC_DEVICE_DECL
19815af11aaSJacob Faibussowitsch #undef PETSC_KERNEL_DECL
19915af11aaSJacob Faibussowitsch #undef PETSC_SHAREDMEM_DECL
20015af11aaSJacob Faibussowitsch #undef PETSC_FORCEINLINE
20115af11aaSJacob Faibussowitsch #undef PETSC_CONSTMEM_DECL
20215af11aaSJacob Faibussowitsch
2030e6b6b59SJacob Faibussowitsch #define PETSC_HOST_DECL __host__
2040e6b6b59SJacob Faibussowitsch #define PETSC_DEVICE_DECL __device__
2050e6b6b59SJacob Faibussowitsch #define PETSC_KERNEL_DECL __global__
2060e6b6b59SJacob Faibussowitsch #define PETSC_SHAREDMEM_DECL __shared__
2070e6b6b59SJacob Faibussowitsch #define PETSC_FORCEINLINE __forceinline__
2080e6b6b59SJacob Faibussowitsch #define PETSC_CONSTMEM_DECL __constant__
20915af11aaSJacob Faibussowitsch #endif
21015af11aaSJacob Faibussowitsch
211*beceaeb6SBarry Smith #if !defined(PETSC_HOST_DECL) // use HOST_DECL as canary
2120e6b6b59SJacob Faibussowitsch #define PETSC_HOST_DECL
2130e6b6b59SJacob Faibussowitsch #define PETSC_DEVICE_DECL
2140e6b6b59SJacob Faibussowitsch #define PETSC_KERNEL_DECL
2150e6b6b59SJacob Faibussowitsch #define PETSC_SHAREDMEM_DECL
2160e6b6b59SJacob Faibussowitsch #define PETSC_FORCEINLINE inline
2170e6b6b59SJacob Faibussowitsch #define PETSC_CONSTMEM_DECL
21815af11aaSJacob Faibussowitsch #endif
2190e6b6b59SJacob Faibussowitsch
220*beceaeb6SBarry Smith #if !defined(PETSC_DEVICE_DEFINED_DECLS_PRIVATE)
22115af11aaSJacob Faibussowitsch #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE
2220e6b6b59SJacob Faibussowitsch #define PETSC_HOSTDEVICE_DECL PETSC_HOST_DECL PETSC_DEVICE_DECL
2230e6b6b59SJacob Faibussowitsch #define PETSC_DEVICE_INLINE_DECL PETSC_DEVICE_DECL PETSC_FORCEINLINE
2240e6b6b59SJacob Faibussowitsch #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE
22515af11aaSJacob Faibussowitsch #endif
226