xref: /petsc/include/petscdevice.h (revision 0f1503a698da0451f8f2e3e02a165d4e1f526457)
1 #if !defined(PETSCDEVICE_H)
2 #define PETSCDEVICE_H
3 
4 #include <petscsys.h>
5 #include <petscdevicetypes.h>
6 #include <petscpkg_version.h>
7 
8 #if defined(PETSC_HAVE_CUDA)
9 #include <cuda.h>
10 #include <cuda_runtime.h>
11 #include <cublas_v2.h>
12 #include <cusolverDn.h>
13 #include <cusolverSp.h>
14 #include <cufft.h>
15 
16 /* cuBLAS does not have cublasGetErrorName(). We create one on our own. */
17 PETSC_EXTERN const char* PetscCUBLASGetErrorName(cublasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRCUBLAS macro */
18 PETSC_EXTERN const char* PetscCUSolverGetErrorName(cusolverStatus_t);
19 PETSC_EXTERN const char* PetscCUFFTGetErrorName(cufftResult);
20 
21 /* REMOVE ME */
22 #define WaitForCUDA() cudaDeviceSynchronize()
23 
24 /* CUDART_VERSION = 1000 x major + 10 x minor version */
25 
26 /* Could not find exactly which CUDART_VERSION introduced cudaGetErrorName. At least it was in CUDA 8.0 (Sep. 2016) */
27 #if PETSC_PKG_CUDA_VERSION_GE(8,0,0)
28 #define CHKERRCUDA(cerr) do {                                           \
29     const cudaError_t _p_cuda_err__ = cerr;                             \
30     if (PetscUnlikely(_p_cuda_err__ != cudaSuccess)) {                  \
31       const char *name  = cudaGetErrorName(_p_cuda_err__);              \
32       const char *descr = cudaGetErrorString(_p_cuda_err__);            \
33       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d (%s) : %s", \
34                (PetscErrorCode)_p_cuda_err__,name,descr);               \
35     }                                                                   \
36   } while (0)
37 #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
38 #define CHKERRCUDA(cerr) do {                                   \
39     const cudaError_t _p_cuda_err__ = cerr;                     \
40     if (PetscUnlikely(_p_cuda_err__ != cudaSuccess)) {          \
41       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d",   \
42                (PetscErrorCode)_p_cuda_err__);                  \
43     }                                                           \
44   } while (0)
45 #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */
46 
47 #define CHKERRCUBLAS(stat)   do {                                       \
48     const cublasStatus_t _p_cublas_stat__ = stat;                       \
49     if (PetscUnlikely(_p_cublas_stat__ != CUBLAS_STATUS_SUCCESS)) {     \
50       const char *name = PetscCUBLASGetErrorName(_p_cublas_stat__);     \
51       if (((_p_cublas_stat__ == CUBLAS_STATUS_NOT_INITIALIZED) ||       \
52            (_p_cublas_stat__ == CUBLAS_STATUS_ALLOC_FAILED))   &&       \
53           PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {                  \
54         SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,                \
55                  "cuBLAS error %d (%s). "                               \
56                  "Reports not initialized or alloc failed; "            \
57                  "this indicates the GPU may have run out resources",   \
58                  (PetscErrorCode)_p_cublas_stat__,name);                \
59       } else {                                                          \
60         SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuBLAS error %d (%s)",  \
61                  (PetscErrorCode)_p_cublas_stat__,name);                \
62       }                                                                 \
63     }                                                                   \
64   } while (0)
65 
66 #define CHKERRCUSOLVER(stat) do {                                       \
67     const cusolverStatus_t _p_cusolver_stat__ = stat;                   \
68     if (PetscUnlikely(_p_cusolver_stat__ != CUSOLVER_STATUS_SUCCESS)) { \
69       const char *name = PetscCUSolverGetErrorName(_p_cusolver_stat__); \
70       if (((_p_cusolver_stat__ == CUSOLVER_STATUS_NOT_INITIALIZED) ||   \
71            (_p_cusolver_stat__ == CUSOLVER_STATUS_ALLOC_FAILED)    ||   \
72            (_p_cusolver_stat__ == CUSOLVER_STATUS_INTERNAL_ERROR)) &&   \
73           PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {                  \
74         SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,                \
75                  "cuSolver error %d (%s). "                             \
76                  "This indicates the GPU may have run out resources",   \
77                  (PetscErrorCode)_p_cusolver_stat__,name);              \
78       } else {                                                          \
79         SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU,                         \
80                  "cuSolver error %d (%s)",                              \
81                  (PetscErrorCode)_p_cusolver_stat__,name);              \
82       }                                                                 \
83     }                                                                   \
84   } while (0)
85 
86 #define CHKERRCUFFT(res)     do {                                       \
87     const cufftResult_t _p_cufft_stat__ = res;                          \
88     if (PetscUnlikely(_p_cufft_stat__ != CUFFT_SUCCESS)) {              \
89       const char *name = PetscCUFFTGetErrorName(_p_cufft_stat__);       \
90       if (((_p_cufft_stat__ == CUFFT_SETUP_FAILED)  ||                  \
91            (_p_cufft_stat__ == CUFFT_ALLOC_FAILED)) &&                  \
92           PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {                  \
93         SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,                \
94                  "cuFFT error %d (%s). "                                \
95                  "Reports not initialized or alloc failed; "            \
96                  "this indicates the GPU has run out resources",        \
97                  (PetscErrorCode)_p_cufft_stat__,name);                 \
98       } else {                                                          \
99         SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU,                         \
100                  "cuFFT error %d (%s)",                                 \
101                  (PetscErrorCode)_p_cufft_stat__,name);                 \
102       }                                                                 \
103     }                                                                   \
104   } while (0)
105 
106 #define CHKERRCURAND(stat)   do {                                       \
107     const curandStatus_t _p_curand_stat__ = stat;                       \
108     if (PetscUnlikely(_p_curand_stat__ != CURAND_STATUS_SUCCESS)) {     \
109       if (((_p_curand_stat__ == CURAND_STATUS_INITIALIZATION_FAILED) || \
110            (_p_curand_stat__ == CURAND_STATUS_ALLOCATION_FAILED))    && \
111           PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {                  \
112         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,                \
113                  "cuRAND error %d. "                                    \
114                  "Reports not initialized or alloc failed; "            \
115                  "this indicates the GPU has run out resources",        \
116                  (PetscErrorCode)_p_curand_stat__);                     \
117       } else {                                                          \
118         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_GPU,                         \
119                  "cuRand error %d",(PetscErrorCode)_p_curand_stat__);   \
120       }                                                                 \
121     }                                                                   \
122   } while (0)
123 
124 PETSC_EXTERN cudaStream_t   PetscDefaultCudaStream; /* The default stream used by PETSc */
125 
126 PETSC_EXTERN PetscErrorCode PetscCUBLASGetHandle(cublasHandle_t*);
127 PETSC_EXTERN PetscErrorCode PetscCUSOLVERDnGetHandle(cusolverDnHandle_t*);
128 #endif /* PETSC_HAVE_CUDA */
129 
130 #if defined(PETSC_HAVE_HIP)
131 #include <hip/hip_runtime.h>
132 #include <hipblas.h>
133 #if defined(__HIP_PLATFORM_NVCC__)
134 #include <cusolverDn.h>
135 #else /* __HIP_PLATFORM_HCC__ */
136 #include <rocsolver.h>
137 #endif /* __HIP_PLATFORM_NVCC__ */
138 
139 /* REMOVE ME */
140 #define WaitForHIP() hipDeviceSynchronize()
141 
142 /* hipBLAS does not have hipblasGetErrorName(). We create one on our own. */
143 PETSC_EXTERN const char* PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */
144 
145 #define CHKERRHIP(cerr)     do {                                        \
146     const hipError_t _p_hip_err__ = cerr;                               \
147     if (PetscUnlikely(_p_hip_err__ != hipSuccess)) {                    \
148       const char *name  = hipGetErrorName(_p_hip_err__);                \
149       const char *descr = hipGetErrorString(_p_hip_err__);              \
150       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_GPU,"hip error %d (%s) : %s",  \
151                (PetscErrorCode)_p_hip_err__,name,descr);                \
152     }                                                                   \
153   } while (0)
154 
155 #define CHKERRHIPBLAS(stat) do {                                        \
156     const hipblasStatus_t _p_hipblas_stat__ = stat;                     \
157     if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) {   \
158       const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__);   \
159       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU,"hipBLAS error %d (%s)",   \
160                (PetscErrorCode)_p_hipblas_stat__,name);                 \
161     }                                                                   \
162   } while (0)
163 
164 /* TODO: SEK:  Need to figure out the hipsolver issues */
165 #define CHKERRHIPSOLVER(stat) do {                                      \
166     const hipsolverStatus_t _p_hipsolver_stat__ = stat;                 \
167     if (PetscUnlikely(_p_hipsolver_stat__ /* != HIPSOLVER_STATUS_SUCCESS */)) { \
168       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_GPU,"HIPSOLVER error %d",      \
169                (PetscErrorCode)_p_hipsolver_stat__);                    \
170     }                                                                   \
171   } while (0)
172 
173 /* hipSolver does not exist yet so we work around it
174    rocSOLVER users rocBLAS for the handle
175  * */
176 #if defined(__HIP_PLATFORM_NVCC__)
177 typedef cusolverDnHandle_t hipsolverHandle_t;
178 typedef cusolverStatus_t   hipsolverStatus_t;
179 
180 /* Alias hipsolverDestroy to cusolverDnDestroy */
181 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle)
182 {
183   return cusolverDnDestroy(hipsolverhandle)
184 }
185 
186 /* Alias hipsolverCreate to cusolverDnCreate */
187 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
188 {
189   return cusolverDnCreate(hipsolverhandle)
190 }
191 
192 /* Alias hipsolverGetStream to cusolverDnGetStream */
193 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
194 {
195   return cusolverDnGetStream(handle,stream);
196 }
197 
198 /* Alias hipsolverSetStream to cusolverDnSetStream */
199 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
200 {
201   return cusolveDnSetStream(handle,stream);
202 }
203 #else /* __HIP_PLATFORM_HCC__ */
204 typedef rocblas_handle hipsolverHandle_t;
205 typedef rocblas_status hipsolverStatus_t;
206 
207 /* Alias hipsolverDestroy to rocblas_destroy_handle */
208 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t  hipsolverhandle)
209 {
210   return rocblas_destroy_handle(hipsolverhandle);
211 }
212 
213 /* Alias hipsolverCreate to rocblas_destroy_handle */
214 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle)
215 {
216   return rocblas_create_handle(hipsolverhandle);
217 }
218 
219 /* Alias hipsolverGetStream to rocblas_get_stream */
220 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream)
221 {
222   return rocblas_get_stream(handle,stream);
223 }
224 
225 /* Alias hipsolverSetStream to rocblas_set_stream */
226 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream)
227 {
228   return rocblas_set_stream(handle,stream);
229 }
230 #endif /* __HIP_PLATFORM_NVCC__ */
231 PETSC_EXTERN hipStream_t    PetscDefaultHipStream; /* The default stream used by PETSc */
232 
233 PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t*);
234 PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t*);
235 #endif /* PETSC_HAVE_HIP */
236 
237 /* Cannot use the device context api without C++11 */
238 #if defined(PETSC_HAVE_CXX_DIALECT_CXX11)
239 PETSC_EXTERN PetscErrorCode PetscDeviceInitializePackage(void);
240 PETSC_EXTERN PetscErrorCode PetscDeviceFinalizePackage(void);
241 
242 /* PetscDevice */
243 PETSC_EXTERN PetscErrorCode PetscDeviceInitialize(PetscDeviceType);
244 PETSC_EXTERN PetscBool      PetscDeviceInitialized(PetscDeviceType);
245 PETSC_EXTERN PetscErrorCode PetscDeviceCreate(PetscDeviceType,PetscInt,PetscDevice*);
246 PETSC_EXTERN PetscErrorCode PetscDeviceConfigure(PetscDevice);
247 PETSC_EXTERN PetscErrorCode PetscDeviceView(PetscDevice,PetscViewer);
248 PETSC_EXTERN PetscErrorCode PetscDeviceDestroy(PetscDevice*);
249 
250 /* PetscDeviceContext */
251 PETSC_EXTERN PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext*);
252 PETSC_EXTERN PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext*);
253 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext,PetscDevice);
254 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext,PetscDevice*);
255 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext,PetscStreamType);
256 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext,PetscStreamType*);
257 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext);
258 PETSC_EXTERN PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext,PetscDeviceContext*);
259 PETSC_EXTERN PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext,PetscBool*);
260 PETSC_EXTERN PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext,PetscDeviceContext);
261 PETSC_EXTERN PetscErrorCode PetscDeviceContextFork(PetscDeviceContext,PetscInt,PetscDeviceContext**);
262 PETSC_EXTERN PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext,PetscInt,PetscDeviceContextJoinMode,PetscDeviceContext**);
263 PETSC_EXTERN PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext);
264 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext*);
265 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext);
266 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm,const char[],PetscDeviceContext);
267 #endif /* PETSC_HAVE_CXX_DIALECT_CXX11 */
268 #endif /* PETSCDEVICE_H */
269