xref: /petsc/include/petscdevice.h (revision 98921bda46e76d7aaed9e0138c5ff9d0ce93f355)
1 #if !defined(PETSCDEVICE_H)
2 #define PETSCDEVICE_H
3 
4 #include <petscsys.h>
5 #include <petscdevicetypes.h>
6 #include <petscpkg_version.h>
7 
8 #if defined(PETSC_HAVE_CUDA)
9 #include <cuda.h>
10 #include <cuda_runtime.h>
11 #include <cublas_v2.h>
12 #include <cusolverDn.h>
13 #include <cusolverSp.h>
14 #include <cufft.h>
15 
16 /* cuBLAS does not have cublasGetErrorName(). We create one on our own. */
17 PETSC_EXTERN const char* PetscCUBLASGetErrorName(cublasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRCUBLAS macro */
18 PETSC_EXTERN const char* PetscCUSolverGetErrorName(cusolverStatus_t);
19 PETSC_EXTERN const char* PetscCUFFTGetErrorName(cufftResult);
20 
21 /* REMOVE ME */
22 #define WaitForCUDA() cudaDeviceSynchronize()
23 
24 /* CUDART_VERSION = 1000 x major + 10 x minor version */
25 
26 /* Could not find exactly which CUDART_VERSION introduced cudaGetErrorName. At least it was in CUDA 8.0 (Sep. 2016) */
27 #if PETSC_PKG_CUDA_VERSION_GE(8,0,0)
28 #define CHKERRCUDA(cerr) do {                                           \
29     const cudaError_t _p_cuda_err__ = cerr;                             \
30     if (PetscUnlikely(_p_cuda_err__ != cudaSuccess)) {                  \
31       const char *name  = cudaGetErrorName(_p_cuda_err__);              \
32       const char *descr = cudaGetErrorString(_p_cuda_err__);            \
33       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d (%s) : %s",  \
34               (PetscErrorCode)_p_cuda_err__,name,descr);                \
35     }                                                                   \
36   } while (0)
37 #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
38 #define CHKERRCUDA(cerr) do {                                                                  \
39     const cudaError_t _p_cuda_err__ = cerr;                                                    \
40     if (PetscUnlikely(_p_cuda_err__ != cudaSuccess)) {                                         \
41       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d",(PetscErrorCode)_p_cuda_err__);    \
42     }                                                                                          \
43   } while (0)
44 #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
45 
46 #define CHKERRCUBLAS(stat)   do {                                       \
47     const cublasStatus_t _p_cublas_stat__ = stat;                       \
48     if (PetscUnlikely(_p_cublas_stat__ != CUBLAS_STATUS_SUCCESS)) {     \
49       const char *name = PetscCUBLASGetErrorName(_p_cublas_stat__);     \
50       if (((_p_cublas_stat__ == CUBLAS_STATUS_NOT_INITIALIZED) ||       \
51            (_p_cublas_stat__ == CUBLAS_STATUS_ALLOC_FAILED))   &&       \
52           PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {                  \
53         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,                 \
54                 "cuBLAS error %d (%s). "                                \
55                 "Reports not initialized or alloc failed; "             \
56                 "this indicates the GPU may have run out resources",    \
57                 (PetscErrorCode)_p_cublas_stat__,name);                 \
58       } else {                                                          \
59         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuBLAS error %d (%s)",   \
60                 (PetscErrorCode)_p_cublas_stat__,name);                 \
61       }                                                                 \
62     }                                                                   \
63   } while (0)
64 
65 #if (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) /* According to cuda/10.1.168 on OLCF Summit */
66 #define CHKERRCUSPARSE(stat)\
67 do {\
68   if (PetscUnlikely(stat)) {\
69     const char *name  = cusparseGetErrorName(stat);\
70     const char *descr = cusparseGetErrorString(stat);\
71     if ((stat == CUSPARSE_STATUS_NOT_INITIALIZED) || (stat == CUSPARSE_STATUS_ALLOC_FAILED)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,"cuSPARSE errorcode %d (%s) : %s. Reports not initialized or alloc failed; this indicates the GPU has run out resources",(int)stat,name,descr); \
72     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuSPARSE errorcode %d (%s) : %s",(int)stat,name,descr);\
73   }\
74 } while (0)
75 #else  /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
76 #define CHKERRCUSPARSE(stat) do {if (PetscUnlikely(stat)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuSPARSE errorcode %d",(int)stat);} while (0)
77 #endif /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */
78 
79 #define CHKERRCUSOLVER(stat) do {                                       \
80     const cusolverStatus_t _p_cusolver_stat__ = stat;                   \
81     if (PetscUnlikely(_p_cusolver_stat__ != CUSOLVER_STATUS_SUCCESS)) { \
82       const char *name = PetscCUSolverGetErrorName(_p_cusolver_stat__); \
83       if (((_p_cusolver_stat__ == CUSOLVER_STATUS_NOT_INITIALIZED) ||   \
84            (_p_cusolver_stat__ == CUSOLVER_STATUS_ALLOC_FAILED)    ||   \
85            (_p_cusolver_stat__ == CUSOLVER_STATUS_INTERNAL_ERROR)) &&   \
86           PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {                  \
87         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,                 \
88                 "cuSolver error %d (%s). "                              \
89                 "This indicates the GPU may have run out resources",    \
90                 (PetscErrorCode)_p_cusolver_stat__,name);               \
91       } else {                                                          \
92         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuSolver error %d (%s)", \
93                 (PetscErrorCode)_p_cusolver_stat__,name);               \
94       }                                                                 \
95     }                                                                   \
96   } while (0)
97 
98 #define CHKERRCUFFT(res)     do {                                       \
99     const cufftResult_t _p_cufft_stat__ = res;                          \
100     if (PetscUnlikely(_p_cufft_stat__ != CUFFT_SUCCESS)) {              \
101       const char *name = PetscCUFFTGetErrorName(_p_cufft_stat__);       \
102       if (((_p_cufft_stat__ == CUFFT_SETUP_FAILED)  ||                  \
103            (_p_cufft_stat__ == CUFFT_ALLOC_FAILED)) &&                  \
104           PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {                  \
105         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,                 \
106                 "cuFFT error %d (%s). "                                 \
107                 "Reports not initialized or alloc failed; "             \
108                 "this indicates the GPU has run out resources",         \
109                 (PetscErrorCode)_p_cufft_stat__,name);                  \
110       } else {                                                          \
111         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuFFT error %d (%s)",    \
112                 (PetscErrorCode)_p_cufft_stat__,name);                  \
113       }                                                                 \
114     }                                                                   \
115   } while (0)
116 
117 #define CHKERRCURAND(stat)   do {                                       \
118     const curandStatus_t _p_curand_stat__ = stat;                       \
119     if (PetscUnlikely(_p_curand_stat__ != CURAND_STATUS_SUCCESS)) {     \
120       if (((_p_curand_stat__ == CURAND_STATUS_INITIALIZATION_FAILED) || \
121            (_p_curand_stat__ == CURAND_STATUS_ALLOCATION_FAILED))    && \
122           PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {                  \
123         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,                 \
124                 "cuRAND error %d. "                                     \
125                 "Reports not initialized or alloc failed; "             \
126                 "this indicates the GPU has run out resources",         \
127                 (PetscErrorCode)_p_curand_stat__);                      \
128       } else {                                                          \
129         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,                          \
130                 "cuRand error %d",(PetscErrorCode)_p_curand_stat__);    \
131       }                                                                 \
132     }                                                                   \
133   } while (0)
134 
135 PETSC_EXTERN cudaStream_t   PetscDefaultCudaStream; /* The default stream used by PETSc */
136 
137 PETSC_EXTERN PetscErrorCode PetscCUBLASGetHandle(cublasHandle_t*);
138 PETSC_EXTERN PetscErrorCode PetscCUSOLVERDnGetHandle(cusolverDnHandle_t*);
139 #endif /* PETSC_HAVE_CUDA */
140 
141 #if defined(PETSC_HAVE_HIP)
142 #include <hip/hip_runtime.h>
143 #include <hipblas.h>
144 #if defined(__HIP_PLATFORM_NVCC__)
145 #include <cusolverDn.h>
146 #else /* __HIP_PLATFORM_HCC__ */
147 #include <rocsolver.h>
148 #endif /* __HIP_PLATFORM_NVCC__ */
149 
150 /* REMOVE ME */
151 #define WaitForHIP() hipDeviceSynchronize()
152 
153 /* hipBLAS does not have hipblasGetErrorName(). We create one on our own. */
154 PETSC_EXTERN const char* PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */
155 
156 #define CHKERRHIP(cerr)     do {                                        \
157     const hipError_t _p_hip_err__ = cerr;                               \
158     if (PetscUnlikely(_p_hip_err__ != hipSuccess)) {                    \
159       const char *name  = hipGetErrorName(_p_hip_err__);                \
160       const char *descr = hipGetErrorString(_p_hip_err__);              \
161       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"hip error %d (%s) : %s",   \
162               (PetscErrorCode)_p_hip_err__,name,descr);                 \
163     }                                                                   \
164   } while (0)
165 
166 #define CHKERRHIPBLAS(stat) do {                                        \
167     const hipblasStatus_t _p_hipblas_stat__ = stat;                     \
168     if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) {   \
169       const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__);   \
170       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"hipBLAS error %d (%s)",    \
171               (PetscErrorCode)_p_hipblas_stat__,name);                  \
172     }                                                                   \
173   } while (0)
174 
175 /* TODO: SEK:  Need to figure out the hipsolver issues */
176 #define CHKERRHIPSOLVER(stat) do {                                              \
177     const hipsolverStatus_t _p_hipsolver_stat__ = stat;                         \
178     if (PetscUnlikely(_p_hipsolver_stat__ /* != HIPSOLVER_STATUS_SUCCESS */)) { \
179       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"HIPSOLVER error %d",               \
180               (PetscErrorCode)_p_hipsolver_stat__);                             \
181     }                                                                           \
182   } while (0)
183 
184 /* hipSolver does not exist yet so we work around it
185  rocSOLVER users rocBLAS for the handle
186  * */
187 #if defined(__HIP_PLATFORM_NVCC__)
188 typedef cusolverDnHandle_t hipsolverHandle_t;
189 typedef cusolverStatus_t   hipsolverStatus_t;
190 
191 /* Alias hipsolverDestroy to cusolverDnDestroy */
192 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle)
193 {
194   return cusolverDnDestroy(hipsolverhandle);
195 }
196 
197 /* Alias hipsolverCreate to cusolverDnCreate */
198 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
199 {
200   return cusolverDnCreate(hipsolverhandle);
201 }
202 
203 /* Alias hipsolverGetStream to cusolverDnGetStream */
204 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
205 {
206   return cusolverDnGetStream(handle,stream);
207 }
208 
209 /* Alias hipsolverSetStream to cusolverDnSetStream */
210 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
211 {
212   return cusolveDnSetStream(handle,stream);
213 }
214 #else /* __HIP_PLATFORM_HCC__ */
215 typedef rocblas_handle hipsolverHandle_t;
216 typedef rocblas_status hipsolverStatus_t;
217 
218 /* Alias hipsolverDestroy to rocblas_destroy_handle */
219 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t  hipsolverhandle)
220 {
221   return rocblas_destroy_handle(hipsolverhandle);
222 }
223 
224 /* Alias hipsolverCreate to rocblas_destroy_handle */
225 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
226 {
227   return rocblas_create_handle(hipsolverhandle);
228 }
229 
230 /* Alias hipsolverGetStream to rocblas_get_stream */
231 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
232 {
233   return rocblas_get_stream(handle,stream);
234 }
235 
236 /* Alias hipsolverSetStream to rocblas_set_stream */
237 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
238 {
239   return rocblas_set_stream(handle,stream);
240 }
241 #endif /* __HIP_PLATFORM_NVCC__ */
242 PETSC_EXTERN hipStream_t    PetscDefaultHipStream; /* The default stream used by PETSc */
243 
244 PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t*);
245 PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t*);
246 #endif /* PETSC_HAVE_HIP */
247 
248 /* Cannot use the device context api without C++11 */
249 #if defined(PETSC_HAVE_CXX_DIALECT_CXX11)
250 PETSC_EXTERN PetscErrorCode PetscDeviceInitializePackage(void);
251 PETSC_EXTERN PetscErrorCode PetscDeviceFinalizePackage(void);
252 
253 /* PetscDevice */
254 PETSC_EXTERN PetscErrorCode PetscDeviceInitialize(PetscDeviceType);
255 PETSC_EXTERN PetscBool      PetscDeviceInitialized(PetscDeviceType);
256 PETSC_EXTERN PetscErrorCode PetscDeviceCreate(PetscDeviceType,PetscInt,PetscDevice*);
257 PETSC_EXTERN PetscErrorCode PetscDeviceConfigure(PetscDevice);
258 PETSC_EXTERN PetscErrorCode PetscDeviceView(PetscDevice,PetscViewer);
259 PETSC_EXTERN PetscErrorCode PetscDeviceDestroy(PetscDevice*);
260 
261 /* PetscDeviceContext */
262 PETSC_EXTERN PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext*);
263 PETSC_EXTERN PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext*);
264 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext,PetscDevice);
265 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext,PetscDevice*);
266 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext,PetscStreamType);
267 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext,PetscStreamType*);
268 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext);
269 PETSC_EXTERN PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext,PetscDeviceContext*);
270 PETSC_EXTERN PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext,PetscBool*);
271 PETSC_EXTERN PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext,PetscDeviceContext);
272 PETSC_EXTERN PetscErrorCode PetscDeviceContextFork(PetscDeviceContext,PetscInt,PetscDeviceContext**);
273 PETSC_EXTERN PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext,PetscInt,PetscDeviceContextJoinMode,PetscDeviceContext**);
274 PETSC_EXTERN PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext);
275 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext*);
276 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext);
277 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm,const char[],PetscDeviceContext);
278 #endif /* PETSC_HAVE_CXX_DIALECT_CXX11 */
279 
280 #endif /* PETSCDEVICE_H */
281