1 #if !defined(PETSCDEVICE_H) 2 #define PETSCDEVICE_H 3 4 #include <petscsys.h> 5 #include <petscdevicetypes.h> 6 #include <petscpkg_version.h> 7 8 #if defined(PETSC_HAVE_CUDA) 9 #include <cuda.h> 10 #include <cuda_runtime.h> 11 #include <cublas_v2.h> 12 #include <cusolverDn.h> 13 #include <cusolverSp.h> 14 #include <cufft.h> 15 16 /* cuBLAS does not have cublasGetErrorName(). We create one on our own. */ 17 PETSC_EXTERN const char* PetscCUBLASGetErrorName(cublasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRCUBLAS macro */ 18 PETSC_EXTERN const char* PetscCUSolverGetErrorName(cusolverStatus_t); 19 PETSC_EXTERN const char* PetscCUFFTGetErrorName(cufftResult); 20 21 /* REMOVE ME */ 22 #define WaitForCUDA() cudaDeviceSynchronize() 23 24 /* CUDART_VERSION = 1000 x major + 10 x minor version */ 25 26 /* Could not find exactly which CUDART_VERSION introduced cudaGetErrorName. At least it was in CUDA 8.0 (Sep. 2016) */ 27 #if PETSC_PKG_CUDA_VERSION_GE(8,0,0) 28 #define PetscCallCUDA(...) do { \ 29 const cudaError_t _p_cuda_err__ = __VA_ARGS__; \ 30 if (PetscUnlikely(_p_cuda_err__ != cudaSuccess)) { \ 31 const char *name = cudaGetErrorName(_p_cuda_err__); \ 32 const char *descr = cudaGetErrorString(_p_cuda_err__); \ 33 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d (%s) : %s", \ 34 (PetscErrorCode)_p_cuda_err__,name,descr); \ 35 } \ 36 } while (0) 37 #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */ 38 #define PetscCallCUDA(...) do { \ 39 const cudaError_t _p_cuda_err__ = __VA_ARGS__; \ 40 PetscCheck(_p_cuda_err__ == cudaSuccess,PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d",(PetscErrorCode)_p_cuda_err__); \ 41 } while (0) 42 #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */ 43 #define CHKERRCUDA(...) PetscCallCUDA(__VA_ARGS__) 44 45 #if PETSC_PKG_CUDA_VERSION_GE(8,0,0) 46 #define PetscCallCUDAVoid(...) do { \ 47 const cudaError_t _p_cuda_err__ = __VA_ARGS__; \ 48 PetscCheckAbort(_p_cuda_err__ == cudaSuccess,PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d (%s) : %s",(PetscErrorCode)_p_cuda_err__,cudaGetErrorName(_p_cuda_err__),cudaGetErrorString(_p_cuda_err__)); \ 49 } while (0) 50 #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */ 51 #define PetscCallCUDAVoid(...) do { \ 52 const cudaError_t _p_cuda_err__ = __VA_ARGS__; \ 53 PetscCheckAbort(_p_cuda_err__ == cudaSuccess,PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d",(PetscErrorCode)_p_cuda_err__); \ 54 } while (0) 55 #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */ 56 57 #define PetscCUDACheckLaunch \ 58 do { \ 59 /* Check synchronous errors, i.e. pre-launch */ \ 60 PetscCallCUDA(cudaGetLastError()); \ 61 /* Check asynchronous errors, i.e. kernel failed (ULF) */ \ 62 PetscCallCUDA(cudaDeviceSynchronize()); \ 63 } while (0) 64 65 #define PetscCallCUBLAS(...) do { \ 66 const cublasStatus_t _p_cublas_stat__ = __VA_ARGS__; \ 67 if (PetscUnlikely(_p_cublas_stat__ != CUBLAS_STATUS_SUCCESS)) { \ 68 const char *name = PetscCUBLASGetErrorName(_p_cublas_stat__); \ 69 if (((_p_cublas_stat__ == CUBLAS_STATUS_NOT_INITIALIZED) || \ 70 (_p_cublas_stat__ == CUBLAS_STATUS_ALLOC_FAILED)) && \ 71 PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 72 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE, \ 73 "cuBLAS error %d (%s). " \ 74 "Reports not initialized or alloc failed; " \ 75 "this indicates the GPU may have run out resources", \ 76 (PetscErrorCode)_p_cublas_stat__,name); \ 77 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuBLAS error %d (%s)", \ 78 (PetscErrorCode)_p_cublas_stat__,name); \ 79 } \ 80 } while (0) 81 #define CHKERRCUBLAS(...) PetscCallCUBLAS(__VA_ARGS__) 82 83 #if (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) /* According to cuda/10.1.168 on OLCF Summit */ 84 #define PetscCallCUSPARSE(...)\ 85 do {\ 86 const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__;\ 87 if (PetscUnlikely(_p_cusparse_stat__)) {\ 88 const char *name = cusparseGetErrorName(_p_cusparse_stat__);\ 89 const char *descr = cusparseGetErrorString(_p_cusparse_stat__);\ 90 PetscCheck((_p_cusparse_stat__ != CUSPARSE_STATUS_NOT_INITIALIZED) && (_p_cusparse_stat__ != CUSPARSE_STATUS_ALLOC_FAILED),PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,"cuSPARSE errorcode %d (%s) : %s. Reports not initialized or alloc failed; this indicates the GPU has run out resources",(int)_p_cusparse_stat__,name,descr); \ 91 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuSPARSE errorcode %d (%s) : %s",(int)_p_cusparse_stat__,name,descr);\ 92 }\ 93 } while (0) 94 #else /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */ 95 #define PetscCallCUSPARSE(...) do { \ 96 const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \ 97 PetscCheck(_p_cusparse_stat__ == CUSPARSE_STATUS_SUCCESS,PETSC_COMM_SELF,PETSC_ERR_GPU,"cuSPARSE errorcode %d",(PetscErrorCode)_p_cusparse_stat__); \ 98 } while (0) 99 #endif /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */ 100 #define CHKERRCUSPARSE(...) PetscCallCUSPARSE(__VA_ARGS__) 101 102 #define PetscCallCUSOLVER(...) do { \ 103 const cusolverStatus_t _p_cusolver_stat__ = __VA_ARGS__; \ 104 if (PetscUnlikely(_p_cusolver_stat__ != CUSOLVER_STATUS_SUCCESS)) { \ 105 const char *name = PetscCUSolverGetErrorName(_p_cusolver_stat__); \ 106 if (((_p_cusolver_stat__ == CUSOLVER_STATUS_NOT_INITIALIZED) || \ 107 (_p_cusolver_stat__ == CUSOLVER_STATUS_ALLOC_FAILED) || \ 108 (_p_cusolver_stat__ == CUSOLVER_STATUS_INTERNAL_ERROR)) && \ 109 PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 110 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE, \ 111 "cuSolver error %d (%s). " \ 112 "This indicates the GPU may have run out resources", \ 113 (PetscErrorCode)_p_cusolver_stat__,name); \ 114 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuSolver error %d (%s)", \ 115 (PetscErrorCode)_p_cusolver_stat__,name); \ 116 } \ 117 } while (0) 118 #define CHKERRCUSOLVER(...) PetscCallCUSOLVER(__VA_ARGS__) 119 120 #define PetscCallCUFFT(...) do { \ 121 const cufftResult_t _p_cufft_stat__ = __VA_ARGS__; \ 122 if (PetscUnlikely(_p_cufft_stat__ != CUFFT_SUCCESS)) { \ 123 const char *name = PetscCUFFTGetErrorName(_p_cufft_stat__); \ 124 if (((_p_cufft_stat__ == CUFFT_SETUP_FAILED) || \ 125 (_p_cufft_stat__ == CUFFT_ALLOC_FAILED)) && \ 126 PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 127 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE, \ 128 "cuFFT error %d (%s). " \ 129 "Reports not initialized or alloc failed; " \ 130 "this indicates the GPU has run out resources", \ 131 (PetscErrorCode)_p_cufft_stat__,name); \ 132 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuFFT error %d (%s)", \ 133 (PetscErrorCode)_p_cufft_stat__,name); \ 134 } \ 135 } while (0) 136 #define CHKERRCUFFT(...) PetscCallCUFFT(__VA_ARGS__) 137 138 #define PetscCallCURAND(...) do { \ 139 const curandStatus_t _p_curand_stat__ = __VA_ARGS__; \ 140 if (PetscUnlikely(_p_curand_stat__ != CURAND_STATUS_SUCCESS)) { \ 141 if (((_p_curand_stat__ == CURAND_STATUS_INITIALIZATION_FAILED) || \ 142 (_p_curand_stat__ == CURAND_STATUS_ALLOCATION_FAILED)) && \ 143 PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 144 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE, \ 145 "cuRAND error %d. " \ 146 "Reports not initialized or alloc failed; " \ 147 "this indicates the GPU has run out resources", \ 148 (PetscErrorCode)_p_curand_stat__); \ 149 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU, \ 150 "cuRand error %d",(PetscErrorCode)_p_curand_stat__); \ 151 } \ 152 } while (0) 153 #define CHKERRCURAND(...) PetscCallCURAND(__VA_ARGS__) 154 155 PETSC_EXTERN cudaStream_t PetscDefaultCudaStream; /* The default stream used by PETSc */ 156 157 PETSC_EXTERN PetscErrorCode PetscCUBLASGetHandle(cublasHandle_t*); 158 PETSC_EXTERN PetscErrorCode PetscCUSOLVERDnGetHandle(cusolverDnHandle_t*); 159 #endif /* PETSC_HAVE_CUDA */ 160 161 #if defined(PETSC_HAVE_HIP) 162 #include <hip/hip_runtime.h> 163 #include <hipblas.h> 164 #if defined(__HIP_PLATFORM_NVCC__) 165 #include <cusolverDn.h> 166 #else /* __HIP_PLATFORM_HCC__ */ 167 #include <rocsolver.h> 168 #endif /* __HIP_PLATFORM_NVCC__ */ 169 170 /* REMOVE ME */ 171 #define WaitForHIP() hipDeviceSynchronize() 172 173 /* hipBLAS does not have hipblasGetErrorName(). We create one on our own. */ 174 PETSC_EXTERN const char* PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */ 175 176 #define PetscCallHIP(...) do { \ 177 const hipError_t _p_hip_err__ = __VA_ARGS__; \ 178 if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \ 179 const char *name = hipGetErrorName(_p_hip_err__); \ 180 const char *descr = hipGetErrorString(_p_hip_err__); \ 181 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"hip error %d (%s) : %s", \ 182 (PetscErrorCode)_p_hip_err__,name,descr); \ 183 } \ 184 } while (0) 185 #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__) 186 187 #define PetscCallHIPBLAS(...) do { \ 188 const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__; \ 189 if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \ 190 const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \ 191 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"hipBLAS error %d (%s)", \ 192 (PetscErrorCode)_p_hipblas_stat__,name); \ 193 } \ 194 } while (0) 195 #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__) 196 197 /* TODO: SEK: Need to figure out the hipsolver issues */ 198 #define PetscCallHIPSOLVER(...) do { \ 199 const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \ 200 PetscCheck(!_p_hipsolver_stat__,PETSC_COMM_SELF,PETSC_ERR_GPU,"HIPSOLVER error %d",(PetscErrorCode)_p_hipsolver_stat__); \ 201 } while (0) 202 #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__) 203 204 /* hipSolver does not exist yet so we work around it 205 rocSOLVER users rocBLAS for the handle 206 * */ 207 #if defined(__HIP_PLATFORM_NVCC__) 208 typedef cusolverDnHandle_t hipsolverHandle_t; 209 typedef cusolverStatus_t hipsolverStatus_t; 210 211 /* Alias hipsolverDestroy to cusolverDnDestroy */ 212 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle) 213 { 214 return cusolverDnDestroy(hipsolverhandle); 215 } 216 217 /* Alias hipsolverCreate to cusolverDnCreate */ 218 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 219 { 220 return cusolverDnCreate(hipsolverhandle); 221 } 222 223 /* Alias hipsolverGetStream to cusolverDnGetStream */ 224 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 225 { 226 return cusolverDnGetStream(handle,stream); 227 } 228 229 /* Alias hipsolverSetStream to cusolverDnSetStream */ 230 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 231 { 232 return cusolveDnSetStream(handle,stream); 233 } 234 #else /* __HIP_PLATFORM_HCC__ */ 235 typedef rocblas_handle hipsolverHandle_t; 236 typedef rocblas_status hipsolverStatus_t; 237 238 /* Alias hipsolverDestroy to rocblas_destroy_handle */ 239 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle) 240 { 241 return rocblas_destroy_handle(hipsolverhandle); 242 } 243 244 /* Alias hipsolverCreate to rocblas_destroy_handle */ 245 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 246 { 247 return rocblas_create_handle(hipsolverhandle); 248 } 249 250 /* Alias hipsolverGetStream to rocblas_get_stream */ 251 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 252 { 253 return rocblas_get_stream(handle,stream); 254 } 255 256 /* Alias hipsolverSetStream to rocblas_set_stream */ 257 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 258 { 259 return rocblas_set_stream(handle,stream); 260 } 261 #endif /* __HIP_PLATFORM_NVCC__ */ 262 PETSC_EXTERN hipStream_t PetscDefaultHipStream; /* The default stream used by PETSc */ 263 264 PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t*); 265 PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t*); 266 #endif /* PETSC_HAVE_HIP */ 267 268 /* Cannot use the device context api without C++ */ 269 #if defined(PETSC_HAVE_CXX) 270 PETSC_EXTERN PetscErrorCode PetscDeviceInitializePackage(void); 271 PETSC_EXTERN PetscErrorCode PetscDeviceFinalizePackage(void); 272 273 /* PetscDevice */ 274 PETSC_EXTERN PetscErrorCode PetscDeviceInitialize(PetscDeviceType); 275 PETSC_EXTERN PetscBool PetscDeviceInitialized(PetscDeviceType); 276 PETSC_EXTERN PetscErrorCode PetscDeviceCreate(PetscDeviceType,PetscInt,PetscDevice*); 277 PETSC_EXTERN PetscErrorCode PetscDeviceConfigure(PetscDevice); 278 PETSC_EXTERN PetscErrorCode PetscDeviceView(PetscDevice,PetscViewer); 279 PETSC_EXTERN PetscErrorCode PetscDeviceDestroy(PetscDevice*); 280 PETSC_EXTERN PetscErrorCode PetscDeviceGetDeviceId(PetscDevice,PetscInt*); 281 282 /* PetscDeviceContext */ 283 PETSC_EXTERN PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext*); 284 PETSC_EXTERN PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext*); 285 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext,PetscDevice); 286 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext,PetscDevice*); 287 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext,PetscStreamType); 288 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext,PetscStreamType*); 289 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext); 290 PETSC_EXTERN PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext,PetscDeviceContext*); 291 PETSC_EXTERN PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext,PetscBool*); 292 PETSC_EXTERN PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext,PetscDeviceContext); 293 PETSC_EXTERN PetscErrorCode PetscDeviceContextFork(PetscDeviceContext,PetscInt,PetscDeviceContext**); 294 PETSC_EXTERN PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext,PetscInt,PetscDeviceContextJoinMode,PetscDeviceContext**); 295 PETSC_EXTERN PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext); 296 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext*); 297 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext); 298 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm,const char[],PetscDeviceContext); 299 #else 300 #define PetscDeviceInitialize(...) 0 301 #define PetscDeviceInitialized(...) PETSC_FALSE 302 #endif /* PETSC_HAVE_CXX */ 303 304 #endif /* PETSCDEVICE_H */ 305