xref: /petsc/include/petscdevice.h (revision a69119a591a03a9d906b29c0a4e9802e4d7c9795)
1 #if !defined(PETSCDEVICE_H)
2 #define PETSCDEVICE_H
3 
4 #include <petscsys.h>
5 #include <petscdevicetypes.h>
6 #include <petscpkg_version.h>
7 
8 #if defined(PETSC_HAVE_CUDA)
9 #include <cuda.h>
10 #include <cuda_runtime.h>
11 #include <cublas_v2.h>
12 #include <cusolverDn.h>
13 #include <cusolverSp.h>
14 #include <cufft.h>
15 
16 /* cuBLAS does not have cublasGetErrorName(). We create one on our own. */
17 PETSC_EXTERN const char *PetscCUBLASGetErrorName(cublasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRCUBLAS macro */
18 PETSC_EXTERN const char *PetscCUSolverGetErrorName(cusolverStatus_t);
19 PETSC_EXTERN const char *PetscCUFFTGetErrorName(cufftResult);
20 
21 /* REMOVE ME */
22 #define WaitForCUDA() cudaDeviceSynchronize()
23 
24 /* CUDART_VERSION = 1000 x major + 10 x minor version */
25 
26 /* Could not find exactly which CUDART_VERSION introduced cudaGetErrorName. At least it was in CUDA 8.0 (Sep. 2016) */
27 #if PETSC_PKG_CUDA_VERSION_GE(8, 0, 0)
28 #define PetscCallCUDA(...) \
29   do { \
30     const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
31     if (PetscUnlikely(_p_cuda_err__ != cudaSuccess)) { \
32       const char *name  = cudaGetErrorName(_p_cuda_err__); \
33       const char *descr = cudaGetErrorString(_p_cuda_err__); \
34       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d (%s) : %s", (PetscErrorCode)_p_cuda_err__, name, descr); \
35     } \
36   } while (0)
37 #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
38 #define PetscCallCUDA(...) \
39   do { \
40     const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
41     PetscCheck(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d", (PetscErrorCode)_p_cuda_err__); \
42   } while (0)
43 #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
44 #define CHKERRCUDA(...) PetscCallCUDA(__VA_ARGS__)
45 
46 #if PETSC_PKG_CUDA_VERSION_GE(8, 0, 0)
47 #define PetscCallCUDAVoid(...) \
48   do { \
49     const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
50     PetscCheckAbort(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d (%s) : %s", (PetscErrorCode)_p_cuda_err__, cudaGetErrorName(_p_cuda_err__), cudaGetErrorString(_p_cuda_err__)); \
51   } while (0)
52 #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
53 #define PetscCallCUDAVoid(...) \
54   do { \
55     const cudaError_t _p_cuda_err__ = __VA_ARGS__; \
56     PetscCheckAbort(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d", (PetscErrorCode)_p_cuda_err__); \
57   } while (0)
58 #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
59 
60 #define PetscCUDACheckLaunch \
61   do { \
62     /* Check synchronous errors, i.e. pre-launch */ \
63     PetscCallCUDA(cudaGetLastError()); \
64     /* Check asynchronous errors, i.e. kernel failed (ULF) */ \
65     PetscCallCUDA(cudaDeviceSynchronize()); \
66   } while (0)
67 
68 #define PetscCallCUBLAS(...) \
69   do { \
70     const cublasStatus_t _p_cublas_stat__ = __VA_ARGS__; \
71     if (PetscUnlikely(_p_cublas_stat__ != CUBLAS_STATUS_SUCCESS)) { \
72       const char *name = PetscCUBLASGetErrorName(_p_cublas_stat__); \
73       if (((_p_cublas_stat__ == CUBLAS_STATUS_NOT_INITIALIZED) || (_p_cublas_stat__ == CUBLAS_STATUS_ALLOC_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
74         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
75                 "cuBLAS error %d (%s). " \
76                 "Reports not initialized or alloc failed; " \
77                 "this indicates the GPU may have run out resources", \
78                 (PetscErrorCode)_p_cublas_stat__, name); \
79       } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuBLAS error %d (%s)", (PetscErrorCode)_p_cublas_stat__, name); \
80     } \
81   } while (0)
82 #define CHKERRCUBLAS(...) PetscCallCUBLAS(__VA_ARGS__)
83 
84 #if (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) /* According to cuda/10.1.168 on OLCF Summit */
85 #define PetscCallCUSPARSE(...) \
86   do { \
87     const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \
88     if (PetscUnlikely(_p_cusparse_stat__)) { \
89       const char *name  = cusparseGetErrorName(_p_cusparse_stat__); \
90       const char *descr = cusparseGetErrorString(_p_cusparse_stat__); \
91       PetscCheck((_p_cusparse_stat__ != CUSPARSE_STATUS_NOT_INITIALIZED) && (_p_cusparse_stat__ != CUSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, "cuSPARSE errorcode %d (%s) : %s. Reports not initialized or alloc failed; this indicates the GPU has run out resources", (int)_p_cusparse_stat__, name, descr); \
92       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSPARSE errorcode %d (%s) : %s", (int)_p_cusparse_stat__, name, descr); \
93     } \
94   } while (0)
95 #else /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
96 #define PetscCallCUSPARSE(...) \
97   do { \
98     const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \
99     PetscCheck(_p_cusparse_stat__ == CUSPARSE_STATUS_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSPARSE errorcode %d", (PetscErrorCode)_p_cusparse_stat__); \
100   } while (0)
101 #endif /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
102 #define CHKERRCUSPARSE(...) PetscCallCUSPARSE(__VA_ARGS__)
103 
104 #define PetscCallCUSOLVER(...) \
105   do { \
106     const cusolverStatus_t _p_cusolver_stat__ = __VA_ARGS__; \
107     if (PetscUnlikely(_p_cusolver_stat__ != CUSOLVER_STATUS_SUCCESS)) { \
108       const char *name = PetscCUSolverGetErrorName(_p_cusolver_stat__); \
109       if (((_p_cusolver_stat__ == CUSOLVER_STATUS_NOT_INITIALIZED) || (_p_cusolver_stat__ == CUSOLVER_STATUS_ALLOC_FAILED) || (_p_cusolver_stat__ == CUSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
110         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
111                 "cuSolver error %d (%s). " \
112                 "This indicates the GPU may have run out resources", \
113                 (PetscErrorCode)_p_cusolver_stat__, name); \
114       } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSolver error %d (%s)", (PetscErrorCode)_p_cusolver_stat__, name); \
115     } \
116   } while (0)
117 #define CHKERRCUSOLVER(...) PetscCallCUSOLVER(__VA_ARGS__)
118 
119 #define PetscCallCUFFT(...) \
120   do { \
121     const cufftResult_t _p_cufft_stat__ = __VA_ARGS__; \
122     if (PetscUnlikely(_p_cufft_stat__ != CUFFT_SUCCESS)) { \
123       const char *name = PetscCUFFTGetErrorName(_p_cufft_stat__); \
124       if (((_p_cufft_stat__ == CUFFT_SETUP_FAILED) || (_p_cufft_stat__ == CUFFT_ALLOC_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
125         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
126                 "cuFFT error %d (%s). " \
127                 "Reports not initialized or alloc failed; " \
128                 "this indicates the GPU has run out resources", \
129                 (PetscErrorCode)_p_cufft_stat__, name); \
130       } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuFFT error %d (%s)", (PetscErrorCode)_p_cufft_stat__, name); \
131     } \
132   } while (0)
133 #define CHKERRCUFFT(...) PetscCallCUFFT(__VA_ARGS__)
134 
135 #define PetscCallCURAND(...) \
136   do { \
137     const curandStatus_t _p_curand_stat__ = __VA_ARGS__; \
138     if (PetscUnlikely(_p_curand_stat__ != CURAND_STATUS_SUCCESS)) { \
139       if (((_p_curand_stat__ == CURAND_STATUS_INITIALIZATION_FAILED) || (_p_curand_stat__ == CURAND_STATUS_ALLOCATION_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \
140         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \
141                 "cuRAND error %d. " \
142                 "Reports not initialized or alloc failed; " \
143                 "this indicates the GPU has run out resources", \
144                 (PetscErrorCode)_p_curand_stat__); \
145       } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuRand error %d", (PetscErrorCode)_p_curand_stat__); \
146     } \
147   } while (0)
148 #define CHKERRCURAND(...) PetscCallCURAND(__VA_ARGS__)
149 
150 PETSC_EXTERN cudaStream_t PetscDefaultCudaStream; /* The default stream used by PETSc */
151 
152 PETSC_EXTERN PetscErrorCode PetscCUBLASGetHandle(cublasHandle_t *);
153 PETSC_EXTERN PetscErrorCode PetscCUSOLVERDnGetHandle(cusolverDnHandle_t *);
154 #endif /* PETSC_HAVE_CUDA */
155 
156 #if defined(PETSC_HAVE_HIP)
157 #include <hip/hip_runtime.h>
158 #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
159 #include <hipblas/hipblas.h>
160 #else
161 #include <hipblas.h>
162 #endif
163 #if defined(__HIP_PLATFORM_NVCC__)
164 #include <cusolverDn.h>
165 #else /* __HIP_PLATFORM_HCC__ */
166 #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0)
167 #include <rocsolver/rocsolver.h>
168 #else
169 #include <rocsolver.h>
170 #endif
171 #endif /* __HIP_PLATFORM_NVCC__ */
172 
173 /* REMOVE ME */
174 #define WaitForHIP() hipDeviceSynchronize()
175 
176 /* hipBLAS does not have hipblasGetErrorName(). We create one on our own. */
177 PETSC_EXTERN const char *PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */
178 
179 #define PetscCallHIP(...) \
180   do { \
181     const hipError_t _p_hip_err__ = __VA_ARGS__; \
182     if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \
183       const char *name  = hipGetErrorName(_p_hip_err__); \
184       const char *descr = hipGetErrorString(_p_hip_err__); \
185       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hip error %d (%s) : %s", (PetscErrorCode)_p_hip_err__, name, descr); \
186     } \
187   } while (0)
188 #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__)
189 
190 #define PetscCallHIPBLAS(...) \
191   do { \
192     const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__; \
193     if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \
194       const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \
195       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipBLAS error %d (%s)", (PetscErrorCode)_p_hipblas_stat__, name); \
196     } \
197   } while (0)
198 #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__)
199 
200 /* TODO: SEK:  Need to figure out the hipsolver issues */
201 #define PetscCallHIPSOLVER(...) \
202   do { \
203     const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \
204     PetscCheck(!_p_hipsolver_stat__, PETSC_COMM_SELF, PETSC_ERR_GPU, "HIPSOLVER error %d", (PetscErrorCode)_p_hipsolver_stat__); \
205   } while (0)
206 #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__)
207 
208 /* hipSolver does not exist yet so we work around it
209  rocSOLVER users rocBLAS for the handle
210  * */
211 #if defined(__HIP_PLATFORM_NVCC__)
212 typedef cusolverDnHandle_t hipsolverHandle_t;
213 typedef cusolverStatus_t   hipsolverStatus_t;
214 
215 /* Alias hipsolverDestroy to cusolverDnDestroy */
216 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle) {
217   return cusolverDnDestroy(hipsolverhandle);
218 }
219 
220 /* Alias hipsolverCreate to cusolverDnCreate */
221 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) {
222   return cusolverDnCreate(hipsolverhandle);
223 }
224 
225 /* Alias hipsolverGetStream to cusolverDnGetStream */
226 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) {
227   return cusolverDnGetStream(handle, stream);
228 }
229 
230 /* Alias hipsolverSetStream to cusolverDnSetStream */
231 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) {
232   return cusolveDnSetStream(handle, stream);
233 }
234 #else                                           /* __HIP_PLATFORM_HCC__ */
235 typedef rocblas_handle hipsolverHandle_t;
236 typedef rocblas_status hipsolverStatus_t;
237 
238 /* Alias hipsolverDestroy to rocblas_destroy_handle */
239 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle) {
240   return rocblas_destroy_handle(hipsolverhandle);
241 }
242 
243 /* Alias hipsolverCreate to rocblas_destroy_handle */
244 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) {
245   return rocblas_create_handle(hipsolverhandle);
246 }
247 
248 /* Alias hipsolverGetStream to rocblas_get_stream */
249 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) {
250   return rocblas_get_stream(handle, stream);
251 }
252 
253 /* Alias hipsolverSetStream to rocblas_set_stream */
254 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) {
255   return rocblas_set_stream(handle, stream);
256 }
257 #endif                                          /* __HIP_PLATFORM_NVCC__ */
258 PETSC_EXTERN hipStream_t PetscDefaultHipStream; /* The default stream used by PETSc */
259 
260 PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t *);
261 PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t *);
262 #endif /* PETSC_HAVE_HIP */
263 
264 /* Cannot use the device context api without C++ */
265 #if defined(PETSC_HAVE_CXX)
266 PETSC_EXTERN PetscErrorCode PetscDeviceInitializePackage(void);
267 PETSC_EXTERN PetscErrorCode PetscDeviceFinalizePackage(void);
268 
269 /* PetscDevice */
270 PETSC_EXTERN PetscErrorCode PetscDeviceInitialize(PetscDeviceType);
271 PETSC_EXTERN PetscBool      PetscDeviceInitialized(PetscDeviceType);
272 PETSC_EXTERN PetscErrorCode PetscDeviceCreate(PetscDeviceType, PetscInt, PetscDevice *);
273 PETSC_EXTERN PetscErrorCode PetscDeviceConfigure(PetscDevice);
274 PETSC_EXTERN PetscErrorCode PetscDeviceView(PetscDevice, PetscViewer);
275 PETSC_EXTERN PetscErrorCode PetscDeviceDestroy(PetscDevice *);
276 PETSC_EXTERN PetscErrorCode PetscDeviceGetDeviceId(PetscDevice, PetscInt *);
277 
278 /* PetscDeviceContext */
279 PETSC_EXTERN PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *);
280 PETSC_EXTERN PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *);
281 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext, PetscDevice);
282 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext, PetscDevice *);
283 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext, PetscStreamType);
284 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext, PetscStreamType *);
285 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext);
286 PETSC_EXTERN PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext, PetscDeviceContext *);
287 PETSC_EXTERN PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext, PetscBool *);
288 PETSC_EXTERN PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext, PetscDeviceContext);
289 PETSC_EXTERN PetscErrorCode PetscDeviceContextFork(PetscDeviceContext, PetscInt, PetscDeviceContext **);
290 PETSC_EXTERN PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext, PetscInt, PetscDeviceContextJoinMode, PetscDeviceContext **);
291 PETSC_EXTERN PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext);
292 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *);
293 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext);
294 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm, const char[], PetscDeviceContext);
295 #else
296 #define PetscDeviceInitialize(...)  0
297 #define PetscDeviceInitialized(...) PETSC_FALSE
298 #endif /* PETSC_HAVE_CXX */
299 
300 #endif /* PETSCDEVICE_H */
301