xref: /petsc/include/petscdevice_cuda.h (revision 750b007cd8d816cecd9de99077bb0a703b4cf61a)
1 #ifndef PETSCDEVICE_CUDA_H
2 #define PETSCDEVICE_CUDA_H
3 
4 #include <petscdevice.h>
5 #include <petscpkg_version.h>
6 
7 #if defined(__NVCC__) || defined(__CUDACC__)
8 #define PETSC_USING_NVCC 1
9 #endif
10 
11 #if PetscDefined(HAVE_CUDA)
12 #include <cuda.h>
13 #include <cuda_runtime.h>
14 #include <cublas_v2.h>
15 #include <cusolverDn.h>
16 #include <cusolverSp.h>
17 #include <cufft.h>
18 
19 /* cuBLAS does not have cublasGetErrorName(). We create one on our own. */
20 PETSC_EXTERN const char *PetscCUBLASGetErrorName(cublasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRCUBLAS macro */
21 PETSC_EXTERN const char *PetscCUSolverGetErrorName(cusolverStatus_t);
22 PETSC_EXTERN const char *PetscCUFFTGetErrorName(cufftResult);
23 
24 /* REMOVE ME */
25 #define WaitForCUDA() cudaDeviceSynchronize()
26 
27 /* CUDART_VERSION = 1000 x major + 10 x minor version */
28 
29 /* Could not find exactly which CUDART_VERSION introduced cudaGetErrorName. At least it was in CUDA 8.0 (Sep. 2016) */
30 #if PETSC_PKG_CUDA_VERSION_GE(8, 0, 0)
31 #define PetscCallCUDAVoid(...) \
32   do { \
33     const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
34     PetscCheckAbort(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d (%s) : %s", (PetscErrorCode)_p_cuda_err__, cudaGetErrorName(_p_cuda_err__), cudaGetErrorString(_p_cuda_err__)); \
35   } while (0)
36 
37 #define PetscCallCUDA(...) \
38   do { \
39     const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
40     PetscCheck(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d (%s) : %s", (PetscErrorCode)_p_cuda_err__, cudaGetErrorName(_p_cuda_err__), cudaGetErrorString(_p_cuda_err__)); \
41   } while (0)
42 #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
43 #define PetscCallCUDA(...) \
44   do { \
45     const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
46     PetscCheck(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d", (PetscErrorCode)_p_cuda_err__); \
47   } while (0)
48 
49 #define PetscCallCUDAVoid(...) \
50   do { \
51     const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
52     PetscCheckAbort(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d", (PetscErrorCode)_p_cuda_err__); \
53   } while (0)
54 #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
55 #define CHKERRCUDA(...) PetscCallCUDA(__VA_ARGS__)
56 
57 #define PetscCUDACheckLaunch \
58   do { \
59     /* Check synchronous errors, i.e. pre-launch */ \
60     PetscCallCUDA(cudaGetLastError()); \
61     /* Check asynchronous errors, i.e. kernel failed (ULF) */ \
62     PetscCallCUDA(cudaDeviceSynchronize()); \
63   } while (0)
64 
65 #define PetscCallCUBLAS(...) \
66   do { \
67     const cublasStatus_t _p_cublas_stat__ = __VA_ARGS__; \
68     if (PetscUnlikely(_p_cublas_stat__ != CUBLAS_STATUS_SUCCESS)) { \
69       const char *name = PetscCUBLASGetErrorName(_p_cublas_stat__); \
70       if (((_p_cublas_stat__ == CUBLAS_STATUS_NOT_INITIALIZED) || (_p_cublas_stat__ == CUBLAS_STATUS_ALLOC_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
71         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
72                 "cuBLAS error %d (%s). " \
73                 "Reports not initialized or alloc failed; " \
74                 "this indicates the GPU may have run out resources", \
75                 (PetscErrorCode)_p_cublas_stat__, name); \
76       } else { \
77         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuBLAS error %d (%s)", (PetscErrorCode)_p_cublas_stat__, name); \
78       } \
79     } \
80   } while (0)
81 #define CHKERRCUBLAS(...) PetscCallCUBLAS(__VA_ARGS__)
82 
83 #if (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) /* According to cuda/10.1.168 on OLCF Summit */
84 #define PetscCallCUSPARSE(...) \
85   do { \
86     const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \
87     if (PetscUnlikely(_p_cusparse_stat__)) { \
88       const char *name  = cusparseGetErrorName(_p_cusparse_stat__); \
89       const char *descr = cusparseGetErrorString(_p_cusparse_stat__); \
90       PetscCheck((_p_cusparse_stat__ != CUSPARSE_STATUS_NOT_INITIALIZED) && (_p_cusparse_stat__ != CUSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
91                  "cuSPARSE errorcode %d (%s) : %s.; " \
92                  "this indicates the GPU has run out resources", \
93                  (int)_p_cusparse_stat__, name, descr); \
94       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSPARSE errorcode %d (%s) : %s", (int)_p_cusparse_stat__, name, descr); \
95     } \
96   } while (0)
97 #else /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
98 #define PetscCallCUSPARSE(...) \
99   do { \
100     const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \
101     PetscCheck(_p_cusparse_stat__ == CUSPARSE_STATUS_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSPARSE errorcode %d", (PetscErrorCode)_p_cusparse_stat__); \
102   } while (0)
103 #endif /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
104 #define CHKERRCUSPARSE(...) PetscCallCUSPARSE(__VA_ARGS__)
105 
106 #define PetscCallCUSOLVER(...) \
107   do { \
108     const cusolverStatus_t _p_cusolver_stat__ = __VA_ARGS__; \
109     if (PetscUnlikely(_p_cusolver_stat__ != CUSOLVER_STATUS_SUCCESS)) { \
110       const char *name = PetscCUSolverGetErrorName(_p_cusolver_stat__); \
111       if (((_p_cusolver_stat__ == CUSOLVER_STATUS_NOT_INITIALIZED) || (_p_cusolver_stat__ == CUSOLVER_STATUS_ALLOC_FAILED) || (_p_cusolver_stat__ == CUSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
112         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
113                 "cuSolver error %d (%s). " \
114                 "This indicates the GPU may have run out resources", \
115                 (PetscErrorCode)_p_cusolver_stat__, name); \
116       } else { \
117         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSolver error %d (%s)", (PetscErrorCode)_p_cusolver_stat__, name); \
118       } \
119     } \
120   } while (0)
121 #define CHKERRCUSOLVER(...) PetscCallCUSOLVER(__VA_ARGS__)
122 
123 #define PetscCallCUFFT(...) \
124   do { \
125     const cufftResult_t _p_cufft_stat__ = __VA_ARGS__; \
126     if (PetscUnlikely(_p_cufft_stat__ != CUFFT_SUCCESS)) { \
127       const char *name = PetscCUFFTGetErrorName(_p_cufft_stat__); \
128       if (((_p_cufft_stat__ == CUFFT_SETUP_FAILED) || (_p_cufft_stat__ == CUFFT_ALLOC_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
129         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
130                 "cuFFT error %d (%s). " \
131                 "Reports not initialized or alloc failed; " \
132                 "this indicates the GPU has run out resources", \
133                 (PetscErrorCode)_p_cufft_stat__, name); \
134       } else { \
135         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuFFT error %d (%s)", (PetscErrorCode)_p_cufft_stat__, name); \
136       } \
137     } \
138   } while (0)
139 #define CHKERRCUFFT(...) PetscCallCUFFT(__VA_ARGS__)
140 
141 #define PetscCallCURAND(...) \
142   do { \
143     const curandStatus_t _p_curand_stat__ = __VA_ARGS__; \
144     if (PetscUnlikely(_p_curand_stat__ != CURAND_STATUS_SUCCESS)) { \
145       if (((_p_curand_stat__ == CURAND_STATUS_INITIALIZATION_FAILED) || (_p_curand_stat__ == CURAND_STATUS_ALLOCATION_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
146         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
147                 "cuRAND error %d. " \
148                 "Reports not initialized or alloc failed; " \
149                 "this indicates the GPU has run out resources", \
150                 (PetscErrorCode)_p_curand_stat__); \
151       } else { \
152         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuRand error %d", (PetscErrorCode)_p_curand_stat__); \
153       } \
154     } \
155   } while (0)
156 #define CHKERRCURAND(...) PetscCallCURAND(__VA_ARGS__)
157 
158 PETSC_EXTERN cudaStream_t   PetscDefaultCudaStream; // The default stream used by PETSc
159 PETSC_EXTERN PetscErrorCode PetscCUBLASGetHandle(cublasHandle_t *);
160 PETSC_EXTERN PetscErrorCode PetscCUSOLVERDnGetHandle(cusolverDnHandle_t *);
161 
162 #endif // PETSC_HAVE_CUDA
163 
164 // these can also be defined in petscdevice_hip.h
165 #ifndef PETSC_DEVICE_DEFINED_DECLS_PRIVATE
166 #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE
167 #if PetscDefined(USING_NVCC)
168 #define PETSC_HOST_DECL      __host__
169 #define PETSC_DEVICE_DECL    __device__
170 #define PETSC_KERNEL_DECL    __global__
171 #define PETSC_SHAREDMEM_DECL __shared__
172 #define PETSC_FORCEINLINE    __forceinline__
173 #define PETSC_CONSTMEM_DECL  __constant__
174 #else
175 #define PETSC_HOST_DECL
176 #define PETSC_DEVICE_DECL
177 #define PETSC_KERNEL_DECL
178 #define PETSC_SHAREDMEM_DECL
179 #define PETSC_FORCEINLINE inline
180 #define PETSC_CONSTMEM_DECL
181 #endif // PETSC_USING_NVCC
182 
183 #define PETSC_HOSTDEVICE_DECL        PETSC_HOST_DECL PETSC_DEVICE_DECL
184 #define PETSC_DEVICE_INLINE_DECL     PETSC_DEVICE_DECL PETSC_FORCEINLINE
185 #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE
186 #endif // PETSC_DEVICE_DEFINED_DECLS_PRIVATE
187 
188 #endif // PETSCDEVICE_CUDA_H
189