xref: /petsc/include/petscdevice_cuda.h (revision 21e3ffae2f3b73c0bd738cf6d0a809700fc04bb0)
1 #ifndef PETSCDEVICE_CUDA_H
2 #define PETSCDEVICE_CUDA_H
3 
4 #include <petscdevice.h>
5 #include <petscpkg_version.h>
6 
7 #if defined(__NVCC__) || defined(__CUDACC__)
8   #define PETSC_USING_NVCC 1
9 #endif
10 
11 #if PetscDefined(HAVE_CUDA)
12   #include <cuda.h>
13   #include <cuda_runtime.h>
14   #include <cublas_v2.h>
15   #include <cusolverDn.h>
16   #include <cusolverSp.h>
17   #include <cufft.h>
18 
19 /* cuBLAS does not have cublasGetErrorName(). We create one on our own. */
20 PETSC_EXTERN const char *PetscCUBLASGetErrorName(cublasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRCUBLAS macro */
21 PETSC_EXTERN const char *PetscCUSolverGetErrorName(cusolverStatus_t);
22 PETSC_EXTERN const char *PetscCUFFTGetErrorName(cufftResult);
23 
24   /* REMOVE ME */
25   #define WaitForCUDA() cudaDeviceSynchronize()
26 
27   /* CUDART_VERSION = 1000 x major + 10 x minor version */
28 
29   /* Could not find exactly which CUDART_VERSION introduced cudaGetErrorName. At least it was in CUDA 8.0 (Sep. 2016) */
30   #if PETSC_PKG_CUDA_VERSION_GE(8, 0, 0)
31     #define PetscCallCUDAVoid(...) \
32       do { \
33         const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
34         PetscCheckAbort(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d (%s) : %s", (PetscErrorCode)_p_cuda_err__, cudaGetErrorName(_p_cuda_err__), cudaGetErrorString(_p_cuda_err__)); \
35       } while (0)
36 
37     #define PetscCallCUDA(...) \
38       do { \
39         const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
40         PetscCheck(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d (%s) : %s", (PetscErrorCode)_p_cuda_err__, cudaGetErrorName(_p_cuda_err__), cudaGetErrorString(_p_cuda_err__)); \
41       } while (0)
42   #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
43     #define PetscCallCUDA(...) \
44       do { \
45         const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
46         PetscCheck(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d", (PetscErrorCode)_p_cuda_err__); \
47       } while (0)
48 
49     #define PetscCallCUDAVoid(...) \
50       do { \
51         const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
52         PetscCheckAbort(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d", (PetscErrorCode)_p_cuda_err__); \
53       } while (0)
54   #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
55   #define CHKERRCUDA(...) PetscCallCUDA(__VA_ARGS__)
56 
57   #define PetscCUDACheckLaunch \
58     do { \
59       /* Check synchronous errors, i.e. pre-launch */ \
60       PetscCallCUDA(cudaGetLastError()); \
61       /* Check asynchronous errors, i.e. kernel failed (ULF) */ \
62       PetscCallCUDA(cudaDeviceSynchronize()); \
63     } while (0)
64 
65   #define PetscCallCUBLAS(...) \
66     do { \
67       const cublasStatus_t _p_cublas_stat__ = __VA_ARGS__; \
68       if (PetscUnlikely(_p_cublas_stat__ != CUBLAS_STATUS_SUCCESS)) { \
69         const char *name = PetscCUBLASGetErrorName(_p_cublas_stat__); \
70         if (((_p_cublas_stat__ == CUBLAS_STATUS_NOT_INITIALIZED) || (_p_cublas_stat__ == CUBLAS_STATUS_ALLOC_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
71           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
72                   "cuBLAS error %d (%s). " \
73                   "Reports not initialized or alloc failed; " \
74                   "this indicates the GPU may have run out resources", \
75                   (PetscErrorCode)_p_cublas_stat__, name); \
76         } else { \
77           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuBLAS error %d (%s)", (PetscErrorCode)_p_cublas_stat__, name); \
78         } \
79       } \
80     } while (0)
81   #define CHKERRCUBLAS(...) PetscCallCUBLAS(__VA_ARGS__)
82 
83   #if (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) /* According to cuda/10.1.168 on OLCF Summit */
84     #define PetscCallCUSPARSE(...) \
85       do { \
86         const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \
87         if (PetscUnlikely(_p_cusparse_stat__)) { \
88           const char *name  = cusparseGetErrorName(_p_cusparse_stat__); \
89           const char *descr = cusparseGetErrorString(_p_cusparse_stat__); \
90           PetscCheck((_p_cusparse_stat__ != CUSPARSE_STATUS_NOT_INITIALIZED) && (_p_cusparse_stat__ != CUSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
91                      "cuSPARSE errorcode %d (%s) : %s.; " \
92                      "this indicates the GPU has run out resources", \
93                      (int)_p_cusparse_stat__, name, descr); \
94           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSPARSE errorcode %d (%s) : %s", (int)_p_cusparse_stat__, name, descr); \
95         } \
96       } while (0)
97   #else /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
98     #define PetscCallCUSPARSE(...) \
99       do { \
100         const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \
101         PetscCheck(_p_cusparse_stat__ == CUSPARSE_STATUS_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSPARSE errorcode %d", (PetscErrorCode)_p_cusparse_stat__); \
102       } while (0)
103   #endif /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
104   #define CHKERRCUSPARSE(...) PetscCallCUSPARSE(__VA_ARGS__)
105 
106   #define PetscCallCUSOLVER(...) \
107     do { \
108       const cusolverStatus_t _p_cusolver_stat__ = __VA_ARGS__; \
109       if (PetscUnlikely(_p_cusolver_stat__ != CUSOLVER_STATUS_SUCCESS)) { \
110         const char *name = PetscCUSolverGetErrorName(_p_cusolver_stat__); \
111         if (((_p_cusolver_stat__ == CUSOLVER_STATUS_NOT_INITIALIZED) || (_p_cusolver_stat__ == CUSOLVER_STATUS_ALLOC_FAILED) || (_p_cusolver_stat__ == CUSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
112           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
113                   "cuSolver error %d (%s). " \
114                   "This indicates the GPU may have run out resources", \
115                   (PetscErrorCode)_p_cusolver_stat__, name); \
116         } else { \
117           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSolver error %d (%s)", (PetscErrorCode)_p_cusolver_stat__, name); \
118         } \
119       } \
120     } while (0)
121   #define CHKERRCUSOLVER(...) PetscCallCUSOLVER(__VA_ARGS__)
122 
123   #define PetscCallCUFFT(...) \
124     do { \
125       const cufftResult_t _p_cufft_stat__ = __VA_ARGS__; \
126       if (PetscUnlikely(_p_cufft_stat__ != CUFFT_SUCCESS)) { \
127         const char *name = PetscCUFFTGetErrorName(_p_cufft_stat__); \
128         if (((_p_cufft_stat__ == CUFFT_SETUP_FAILED) || (_p_cufft_stat__ == CUFFT_ALLOC_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
129           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
130                   "cuFFT error %d (%s). " \
131                   "Reports not initialized or alloc failed; " \
132                   "this indicates the GPU has run out resources", \
133                   (PetscErrorCode)_p_cufft_stat__, name); \
134         } else { \
135           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuFFT error %d (%s)", (PetscErrorCode)_p_cufft_stat__, name); \
136         } \
137       } \
138     } while (0)
139   #define CHKERRCUFFT(...) PetscCallCUFFT(__VA_ARGS__)
140 
141   #define PetscCallCURAND(...) \
142     do { \
143       const curandStatus_t _p_curand_stat__ = __VA_ARGS__; \
144       if (PetscUnlikely(_p_curand_stat__ != CURAND_STATUS_SUCCESS)) { \
145         if (((_p_curand_stat__ == CURAND_STATUS_INITIALIZATION_FAILED) || (_p_curand_stat__ == CURAND_STATUS_ALLOCATION_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
146           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
147                   "cuRAND error %d. " \
148                   "Reports not initialized or alloc failed; " \
149                   "this indicates the GPU has run out resources", \
150                   (PetscErrorCode)_p_curand_stat__); \
151         } else { \
152           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuRand error %d", (PetscErrorCode)_p_curand_stat__); \
153         } \
154       } \
155     } while (0)
156   #define CHKERRCURAND(...) PetscCallCURAND(__VA_ARGS__)
157 
158 PETSC_EXTERN cudaStream_t   PetscDefaultCudaStream; // The default stream used by PETSc
159 PETSC_EXTERN PetscErrorCode PetscCUBLASGetHandle(cublasHandle_t *);
160 PETSC_EXTERN PetscErrorCode PetscCUSOLVERDnGetHandle(cusolverDnHandle_t *);
161 
162 #endif // PETSC_HAVE_CUDA
163 
164 // these can also be defined in petscdevice_hip.h so we undef and define them *only* if the
165 // current compiler is NVCC. In this case if petscdevice_hip.h is included first, the macros
166 // would already be defined, but they would be empty since we cannot be using HCC at the same
167 // time.
168 #if PetscDefined(USING_NVCC)
169   #undef PETSC_HOST_DECL
170   #undef PETSC_DEVICE_DECL
171   #undef PETSC_KERNEL_DECL
172   #undef PETSC_SHAREDMEM_DECL
173   #undef PETSC_FORCEINLINE
174   #undef PETSC_CONSTMEM_DECL
175 
176   #define PETSC_HOST_DECL      __host__
177   #define PETSC_DEVICE_DECL    __device__
178   #define PETSC_KERNEL_DECL    __global__
179   #define PETSC_SHAREDMEM_DECL __shared__
180   #define PETSC_FORCEINLINE    __forceinline__
181   #define PETSC_CONSTMEM_DECL  __constant__
182 #endif
183 
184 #ifndef PETSC_HOST_DECL // use HOST_DECL as canary
185   #define PETSC_HOST_DECL
186   #define PETSC_DEVICE_DECL
187   #define PETSC_KERNEL_DECL
188   #define PETSC_SHAREDMEM_DECL
189   #define PETSC_FORCEINLINE inline
190   #define PETSC_CONSTMEM_DECL
191 #endif
192 
193 #ifndef PETSC_DEVICE_DEFINED_DECLS_PRIVATE
194   #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE
195   #define PETSC_HOSTDEVICE_DECL        PETSC_HOST_DECL PETSC_DEVICE_DECL
196   #define PETSC_DEVICE_INLINE_DECL     PETSC_DEVICE_DECL PETSC_FORCEINLINE
197   #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE
198 #endif
199 
200 #endif // PETSCDEVICE_CUDA_H
201