xref: /petsc/include/petscdevice.h (revision 3f74a92befc20eaeb39901f2e4f048a9a7b2ca2a)
1 #if !defined(PETSCDEVICE_H)
2 #define PETSCDEVICE_H
3 
4 #include <petscsys.h>
5 #include <petscdevicetypes.h>
6 #include <petscpkg_version.h>
7 
8 #if defined(PETSC_HAVE_CUDA)
9 #include <cuda.h>
10 #include <cuda_runtime.h>
11 #include <cublas_v2.h>
12 #include <cusolverDn.h>
13 #include <cusolverSp.h>
14 #include <cufft.h>
15 
16 /* cuBLAS does not have cublasGetErrorName(). We create one on our own. */
17 PETSC_EXTERN const char* PetscCUBLASGetErrorName(cublasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRCUBLAS macro */
18 PETSC_EXTERN const char* PetscCUSolverGetErrorName(cusolverStatus_t);
19 PETSC_EXTERN const char* PetscCUFFTGetErrorName(cufftResult);
20 
21 /* REMOVE ME */
22 #define WaitForCUDA() cudaDeviceSynchronize()
23 
24 /* CUDART_VERSION = 1000 x major + 10 x minor version */
25 
26 /* Could not find exactly which CUDART_VERSION introduced cudaGetErrorName. At least it was in CUDA 8.0 (Sep. 2016) */
27 #if PETSC_PKG_CUDA_VERSION_GE(8,0,0)
28 #define PetscCallCUDA(...) do {                                         \
29     const cudaError_t _p_cuda_err__ = __VA_ARGS__;                      \
30     if (PetscUnlikely(_p_cuda_err__ != cudaSuccess)) {                  \
31       const char *name  = cudaGetErrorName(_p_cuda_err__);              \
32       const char *descr = cudaGetErrorString(_p_cuda_err__);            \
33       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d (%s) : %s",  \
34               (PetscErrorCode)_p_cuda_err__,name,descr);                \
35     }                                                                   \
36   } while (0)
37 #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
38 #define PetscCallCUDA(...) do {                                                                \
39   const cudaError_t _p_cuda_err__ = __VA_ARGS__;                                               \
40   PetscCheck(_p_cuda_err__ == cudaSuccess,PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d",(PetscErrorCode)_p_cuda_err__); \
41 } while (0)
42 #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
43 #define CHKERRCUDA(...) PetscCallCUDA(__VA_ARGS__)
44 
45 #if PETSC_PKG_CUDA_VERSION_GE(8,0,0)
46 #define PetscCallCUDAVoid(...) do {                                     \
47   const cudaError_t _p_cuda_err__ = __VA_ARGS__;                        \
48   PetscCheckAbort(_p_cuda_err__ == cudaSuccess,PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d (%s) : %s",(PetscErrorCode)_p_cuda_err__,cudaGetErrorName(_p_cuda_err__),cudaGetErrorString(_p_cuda_err__));  \
49 } while (0)
50 #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
51 #define PetscCallCUDAVoid(...) do {                                     \
52   const cudaError_t _p_cuda_err__ = __VA_ARGS__;                        \
53   PetscCheckAbort(_p_cuda_err__ == cudaSuccess,PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d",(PetscErrorCode)_p_cuda_err__); \
54 } while (0)
55 #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
56 
57 #define PetscCUDACheckLaunch                                            \
58 do {                                                                    \
59  /* Check synchronous errors, i.e. pre-launch */                        \
60   PetscCallCUDA(cudaGetLastError());                                    \
61  /* Check asynchronous errors, i.e. kernel failed (ULF) */              \
62  PetscCallCUDA(cudaDeviceSynchronize());                                \
63  } while (0)
64 
65 #define PetscCallCUBLAS(...) do {                                       \
66     const cublasStatus_t _p_cublas_stat__ = __VA_ARGS__;                \
67     if (PetscUnlikely(_p_cublas_stat__ != CUBLAS_STATUS_SUCCESS)) {     \
68       const char *name = PetscCUBLASGetErrorName(_p_cublas_stat__);     \
69       if (((_p_cublas_stat__ == CUBLAS_STATUS_NOT_INITIALIZED) ||       \
70            (_p_cublas_stat__ == CUBLAS_STATUS_ALLOC_FAILED))   &&       \
71           PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {                  \
72         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,                 \
73                 "cuBLAS error %d (%s). "                                \
74                 "Reports not initialized or alloc failed; "             \
75                 "this indicates the GPU may have run out resources",    \
76                 (PetscErrorCode)_p_cublas_stat__,name);                 \
77       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuBLAS error %d (%s)",   \
78                 (PetscErrorCode)_p_cublas_stat__,name);                 \
79     }                                                                   \
80   } while (0)
81 #define CHKERRCUBLAS(...) PetscCallCUBLAS(__VA_ARGS__)
82 
83 #if (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) /* According to cuda/10.1.168 on OLCF Summit */
84 #define PetscCallCUSPARSE(...)\
85 do {\
86   const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__;\
87   if (PetscUnlikely(_p_cusparse_stat__)) {\
88     const char *name  = cusparseGetErrorName(_p_cusparse_stat__);\
89     const char *descr = cusparseGetErrorString(_p_cusparse_stat__);\
90     PetscCheck((_p_cusparse_stat__ != CUSPARSE_STATUS_NOT_INITIALIZED) && (_p_cusparse_stat__ != CUSPARSE_STATUS_ALLOC_FAILED),PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,"cuSPARSE errorcode %d (%s) : %s. Reports not initialized or alloc failed; this indicates the GPU has run out resources",(int)_p_cusparse_stat__,name,descr); \
91     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuSPARSE errorcode %d (%s) : %s",(int)_p_cusparse_stat__,name,descr);\
92   }\
93 } while (0)
94 #else  /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
95 #define PetscCallCUSPARSE(...) do { \
96   const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \
97   PetscCheck(_p_cusparse_stat__ == CUSPARSE_STATUS_SUCCESS,PETSC_COMM_SELF,PETSC_ERR_GPU,"cuSPARSE errorcode %d",(PetscErrorCode)_p_cusparse_stat__); \
98   } while (0)
99 #endif /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
100 #define CHKERRCUSPARSE(...) PetscCallCUSPARSE(__VA_ARGS__)
101 
102 #define PetscCallCUSOLVER(...) do {                                     \
103     const cusolverStatus_t _p_cusolver_stat__ = __VA_ARGS__;            \
104     if (PetscUnlikely(_p_cusolver_stat__ != CUSOLVER_STATUS_SUCCESS)) { \
105       const char *name = PetscCUSolverGetErrorName(_p_cusolver_stat__); \
106       if (((_p_cusolver_stat__ == CUSOLVER_STATUS_NOT_INITIALIZED) ||   \
107            (_p_cusolver_stat__ == CUSOLVER_STATUS_ALLOC_FAILED)    ||   \
108            (_p_cusolver_stat__ == CUSOLVER_STATUS_INTERNAL_ERROR)) &&   \
109           PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {                  \
110         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,                 \
111                 "cuSolver error %d (%s). "                              \
112                 "This indicates the GPU may have run out resources",    \
113                 (PetscErrorCode)_p_cusolver_stat__,name);               \
114       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuSolver error %d (%s)", \
115                 (PetscErrorCode)_p_cusolver_stat__,name);               \
116     }                                                                   \
117   } while (0)
118 #define CHKERRCUSOLVER(...) PetscCallCUSOLVER(__VA_ARGS__)
119 
120 #define PetscCallCUFFT(...)   do {                                      \
121     const cufftResult_t _p_cufft_stat__ = __VA_ARGS__;                  \
122     if (PetscUnlikely(_p_cufft_stat__ != CUFFT_SUCCESS)) {              \
123       const char *name = PetscCUFFTGetErrorName(_p_cufft_stat__);       \
124       if (((_p_cufft_stat__ == CUFFT_SETUP_FAILED)  ||                  \
125            (_p_cufft_stat__ == CUFFT_ALLOC_FAILED)) &&                  \
126           PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {                  \
127         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,                 \
128                 "cuFFT error %d (%s). "                                 \
129                 "Reports not initialized or alloc failed; "             \
130                 "this indicates the GPU has run out resources",         \
131                 (PetscErrorCode)_p_cufft_stat__,name);                  \
132       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuFFT error %d (%s)",    \
133                 (PetscErrorCode)_p_cufft_stat__,name);                  \
134     }                                                                   \
135   } while (0)
136 #define CHKERRCUFFT(...) PetscCallCUFFT(__VA_ARGS__)
137 
138 #define PetscCallCURAND(...)  do {                                      \
139     const curandStatus_t _p_curand_stat__ = __VA_ARGS__;                \
140     if (PetscUnlikely(_p_curand_stat__ != CURAND_STATUS_SUCCESS)) {     \
141       if (((_p_curand_stat__ == CURAND_STATUS_INITIALIZATION_FAILED) || \
142            (_p_curand_stat__ == CURAND_STATUS_ALLOCATION_FAILED))    && \
143           PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {                  \
144         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,                 \
145                 "cuRAND error %d. "                                     \
146                 "Reports not initialized or alloc failed; "             \
147                 "this indicates the GPU has run out resources",         \
148                 (PetscErrorCode)_p_curand_stat__);                      \
149       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,                          \
150                 "cuRand error %d",(PetscErrorCode)_p_curand_stat__);    \
151     }                                                                   \
152   } while (0)
153 #define CHKERRCURAND(...) PetscCallCURAND(__VA_ARGS__)
154 
155 PETSC_EXTERN cudaStream_t   PetscDefaultCudaStream; /* The default stream used by PETSc */
156 
157 PETSC_EXTERN PetscErrorCode PetscCUBLASGetHandle(cublasHandle_t*);
158 PETSC_EXTERN PetscErrorCode PetscCUSOLVERDnGetHandle(cusolverDnHandle_t*);
159 #endif /* PETSC_HAVE_CUDA */
160 
161 #if defined(PETSC_HAVE_HIP)
162 #include <hip/hip_runtime.h>
163 #if PETSC_PKG_HIP_VERSION_GE(5,2,0)
164   #include <hipblas/hipblas.h>
165 #else
166   #include <hipblas.h>
167 #endif
168 #if defined(__HIP_PLATFORM_NVCC__)
169 #include <cusolverDn.h>
170 #else /* __HIP_PLATFORM_HCC__ */
171 #if PETSC_PKG_HIP_VERSION_GE(5,2,0)
172   #include <rocsolver/rocsolver.h>
173 #else
174   #include <rocsolver.h>
175 #endif
176 #endif /* __HIP_PLATFORM_NVCC__ */
177 
178 /* REMOVE ME */
179 #define WaitForHIP() hipDeviceSynchronize()
180 
181 /* hipBLAS does not have hipblasGetErrorName(). We create one on our own. */
182 PETSC_EXTERN const char* PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */
183 
184 #define PetscCallHIP(...)     do {                                      \
185     const hipError_t _p_hip_err__ = __VA_ARGS__;                        \
186     if (PetscUnlikely(_p_hip_err__ != hipSuccess)) {                    \
187       const char *name  = hipGetErrorName(_p_hip_err__);                \
188       const char *descr = hipGetErrorString(_p_hip_err__);              \
189       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"hip error %d (%s) : %s",   \
190               (PetscErrorCode)_p_hip_err__,name,descr);                 \
191     }                                                                   \
192   } while (0)
193 #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__)
194 
195 #define PetscCallHIPBLAS(...) do {                                      \
196     const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__;              \
197     if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) {   \
198       const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__);   \
199       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"hipBLAS error %d (%s)",    \
200               (PetscErrorCode)_p_hipblas_stat__,name);                  \
201     }                                                                   \
202   } while (0)
203 #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__)
204 
205 /* TODO: SEK:  Need to figure out the hipsolver issues */
206 #define PetscCallHIPSOLVER(...) do { \
207     const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \
208     PetscCheck(!_p_hipsolver_stat__,PETSC_COMM_SELF,PETSC_ERR_GPU,"HIPSOLVER error %d",(PetscErrorCode)_p_hipsolver_stat__); \
209   } while (0)
210 #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__)
211 
212 /* hipSolver does not exist yet so we work around it
213  rocSOLVER users rocBLAS for the handle
214  * */
215 #if defined(__HIP_PLATFORM_NVCC__)
216 typedef cusolverDnHandle_t hipsolverHandle_t;
217 typedef cusolverStatus_t   hipsolverStatus_t;
218 
219 /* Alias hipsolverDestroy to cusolverDnDestroy */
220 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle)
221 {
222   return cusolverDnDestroy(hipsolverhandle);
223 }
224 
225 /* Alias hipsolverCreate to cusolverDnCreate */
226 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
227 {
228   return cusolverDnCreate(hipsolverhandle);
229 }
230 
231 /* Alias hipsolverGetStream to cusolverDnGetStream */
232 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
233 {
234   return cusolverDnGetStream(handle,stream);
235 }
236 
237 /* Alias hipsolverSetStream to cusolverDnSetStream */
238 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
239 {
240   return cusolveDnSetStream(handle,stream);
241 }
242 #else /* __HIP_PLATFORM_HCC__ */
243 typedef rocblas_handle hipsolverHandle_t;
244 typedef rocblas_status hipsolverStatus_t;
245 
246 /* Alias hipsolverDestroy to rocblas_destroy_handle */
247 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t  hipsolverhandle)
248 {
249   return rocblas_destroy_handle(hipsolverhandle);
250 }
251 
252 /* Alias hipsolverCreate to rocblas_destroy_handle */
253 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
254 {
255   return rocblas_create_handle(hipsolverhandle);
256 }
257 
258 /* Alias hipsolverGetStream to rocblas_get_stream */
259 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
260 {
261   return rocblas_get_stream(handle,stream);
262 }
263 
264 /* Alias hipsolverSetStream to rocblas_set_stream */
265 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
266 {
267   return rocblas_set_stream(handle,stream);
268 }
269 #endif /* __HIP_PLATFORM_NVCC__ */
270 PETSC_EXTERN hipStream_t    PetscDefaultHipStream; /* The default stream used by PETSc */
271 
272 PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t*);
273 PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t*);
274 #endif /* PETSC_HAVE_HIP */
275 
276 /* Cannot use the device context api without C++ */
277 #if defined(PETSC_HAVE_CXX)
278 PETSC_EXTERN PetscErrorCode PetscDeviceInitializePackage(void);
279 PETSC_EXTERN PetscErrorCode PetscDeviceFinalizePackage(void);
280 
281 /* PetscDevice */
282 PETSC_EXTERN PetscErrorCode PetscDeviceInitialize(PetscDeviceType);
283 PETSC_EXTERN PetscBool      PetscDeviceInitialized(PetscDeviceType);
284 PETSC_EXTERN PetscErrorCode PetscDeviceCreate(PetscDeviceType,PetscInt,PetscDevice*);
285 PETSC_EXTERN PetscErrorCode PetscDeviceConfigure(PetscDevice);
286 PETSC_EXTERN PetscErrorCode PetscDeviceView(PetscDevice,PetscViewer);
287 PETSC_EXTERN PetscErrorCode PetscDeviceDestroy(PetscDevice*);
288 PETSC_EXTERN PetscErrorCode PetscDeviceGetDeviceId(PetscDevice,PetscInt*);
289 
290 /* PetscDeviceContext */
291 PETSC_EXTERN PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext*);
292 PETSC_EXTERN PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext*);
293 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext,PetscDevice);
294 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext,PetscDevice*);
295 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext,PetscStreamType);
296 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext,PetscStreamType*);
297 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext);
298 PETSC_EXTERN PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext,PetscDeviceContext*);
299 PETSC_EXTERN PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext,PetscBool*);
300 PETSC_EXTERN PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext,PetscDeviceContext);
301 PETSC_EXTERN PetscErrorCode PetscDeviceContextFork(PetscDeviceContext,PetscInt,PetscDeviceContext**);
302 PETSC_EXTERN PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext,PetscInt,PetscDeviceContextJoinMode,PetscDeviceContext**);
303 PETSC_EXTERN PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext);
304 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext*);
305 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext);
306 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm,const char[],PetscDeviceContext);
307 #else
308 #define PetscDeviceInitialize(...)  0
309 #define PetscDeviceInitialized(...) PETSC_FALSE
310 #endif /* PETSC_HAVE_CXX */
311 
312 #endif /* PETSCDEVICE_H */
313