1 #if !defined(PETSCDEVICE_H) 2 #define PETSCDEVICE_H 3 4 #include <petscsys.h> 5 #include <petscdevicetypes.h> 6 #include <petscpkg_version.h> 7 8 #if defined(PETSC_HAVE_CUDA) 9 #include <cuda.h> 10 #include <cuda_runtime.h> 11 #include <cublas_v2.h> 12 #include <cusolverDn.h> 13 #include <cusolverSp.h> 14 #include <cufft.h> 15 16 /* cuBLAS does not have cublasGetErrorName(). We create one on our own. */ 17 PETSC_EXTERN const char *PetscCUBLASGetErrorName(cublasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRCUBLAS macro */ 18 PETSC_EXTERN const char *PetscCUSolverGetErrorName(cusolverStatus_t); 19 PETSC_EXTERN const char *PetscCUFFTGetErrorName(cufftResult); 20 21 /* REMOVE ME */ 22 #define WaitForCUDA() cudaDeviceSynchronize() 23 24 /* CUDART_VERSION = 1000 x major + 10 x minor version */ 25 26 /* Could not find exactly which CUDART_VERSION introduced cudaGetErrorName. At least it was in CUDA 8.0 (Sep. 2016) */ 27 #if PETSC_PKG_CUDA_VERSION_GE(8, 0, 0) 28 #define PetscCallCUDA(...) \ 29 do { \ 30 const cudaError_t _p_cuda_err__ = __VA_ARGS__; \ 31 if (PetscUnlikely(_p_cuda_err__ != cudaSuccess)) { \ 32 const char *name = cudaGetErrorName(_p_cuda_err__); \ 33 const char *descr = cudaGetErrorString(_p_cuda_err__); \ 34 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d (%s) : %s", (PetscErrorCode)_p_cuda_err__, name, descr); \ 35 } \ 36 } while (0) 37 #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */ 38 #define PetscCallCUDA(...) \ 39 do { \ 40 const cudaError_t _p_cuda_err__ = __VA_ARGS__; \ 41 PetscCheck(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d", (PetscErrorCode)_p_cuda_err__); \ 42 } while (0) 43 #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */ 44 #define CHKERRCUDA(...) PetscCallCUDA(__VA_ARGS__) 45 46 #if PETSC_PKG_CUDA_VERSION_GE(8, 0, 0) 47 #define PetscCallCUDAVoid(...) \ 48 do { \ 49 const cudaError_t _p_cuda_err__ = __VA_ARGS__; \ 50 PetscCheckAbort(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d (%s) : %s", (PetscErrorCode)_p_cuda_err__, cudaGetErrorName(_p_cuda_err__), cudaGetErrorString(_p_cuda_err__)); \ 51 } while (0) 52 #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */ 53 #define PetscCallCUDAVoid(...) \ 54 do { \ 55 const cudaError_t _p_cuda_err__ = __VA_ARGS__; \ 56 PetscCheckAbort(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d", (PetscErrorCode)_p_cuda_err__); \ 57 } while (0) 58 #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */ 59 60 #define PetscCUDACheckLaunch \ 61 do { \ 62 /* Check synchronous errors, i.e. pre-launch */ \ 63 PetscCallCUDA(cudaGetLastError()); \ 64 /* Check asynchronous errors, i.e. kernel failed (ULF) */ \ 65 PetscCallCUDA(cudaDeviceSynchronize()); \ 66 } while (0) 67 68 #define PetscCallCUBLAS(...) \ 69 do { \ 70 const cublasStatus_t _p_cublas_stat__ = __VA_ARGS__; \ 71 if (PetscUnlikely(_p_cublas_stat__ != CUBLAS_STATUS_SUCCESS)) { \ 72 const char *name = PetscCUBLASGetErrorName(_p_cublas_stat__); \ 73 if (((_p_cublas_stat__ == CUBLAS_STATUS_NOT_INITIALIZED) || (_p_cublas_stat__ == CUBLAS_STATUS_ALLOC_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 74 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \ 75 "cuBLAS error %d (%s). " \ 76 "Reports not initialized or alloc failed; " \ 77 "this indicates the GPU may have run out resources", \ 78 (PetscErrorCode)_p_cublas_stat__, name); \ 79 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuBLAS error %d (%s)", (PetscErrorCode)_p_cublas_stat__, name); \ 80 } \ 81 } while (0) 82 #define CHKERRCUBLAS(...) PetscCallCUBLAS(__VA_ARGS__) 83 84 #if (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) /* According to cuda/10.1.168 on OLCF Summit */ 85 #define PetscCallCUSPARSE(...) \ 86 do { \ 87 const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \ 88 if (PetscUnlikely(_p_cusparse_stat__)) { \ 89 const char *name = cusparseGetErrorName(_p_cusparse_stat__); \ 90 const char *descr = cusparseGetErrorString(_p_cusparse_stat__); \ 91 PetscCheck((_p_cusparse_stat__ != CUSPARSE_STATUS_NOT_INITIALIZED) && (_p_cusparse_stat__ != CUSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, "cuSPARSE errorcode %d (%s) : %s. Reports not initialized or alloc failed; this indicates the GPU has run out resources", (int)_p_cusparse_stat__, name, descr); \ 92 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSPARSE errorcode %d (%s) : %s", (int)_p_cusparse_stat__, name, descr); \ 93 } \ 94 } while (0) 95 #else /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */ 96 #define PetscCallCUSPARSE(...) \ 97 do { \ 98 const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \ 99 PetscCheck(_p_cusparse_stat__ == CUSPARSE_STATUS_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSPARSE errorcode %d", (PetscErrorCode)_p_cusparse_stat__); \ 100 } while (0) 101 #endif /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */ 102 #define CHKERRCUSPARSE(...) PetscCallCUSPARSE(__VA_ARGS__) 103 104 #define PetscCallCUSOLVER(...) \ 105 do { \ 106 const cusolverStatus_t _p_cusolver_stat__ = __VA_ARGS__; \ 107 if (PetscUnlikely(_p_cusolver_stat__ != CUSOLVER_STATUS_SUCCESS)) { \ 108 const char *name = PetscCUSolverGetErrorName(_p_cusolver_stat__); \ 109 if (((_p_cusolver_stat__ == CUSOLVER_STATUS_NOT_INITIALIZED) || (_p_cusolver_stat__ == CUSOLVER_STATUS_ALLOC_FAILED) || (_p_cusolver_stat__ == CUSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 110 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \ 111 "cuSolver error %d (%s). " \ 112 "This indicates the GPU may have run out resources", \ 113 (PetscErrorCode)_p_cusolver_stat__, name); \ 114 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSolver error %d (%s)", (PetscErrorCode)_p_cusolver_stat__, name); \ 115 } \ 116 } while (0) 117 #define CHKERRCUSOLVER(...) PetscCallCUSOLVER(__VA_ARGS__) 118 119 #define PetscCallCUFFT(...) \ 120 do { \ 121 const cufftResult_t _p_cufft_stat__ = __VA_ARGS__; \ 122 if (PetscUnlikely(_p_cufft_stat__ != CUFFT_SUCCESS)) { \ 123 const char *name = PetscCUFFTGetErrorName(_p_cufft_stat__); \ 124 if (((_p_cufft_stat__ == CUFFT_SETUP_FAILED) || (_p_cufft_stat__ == CUFFT_ALLOC_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 125 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \ 126 "cuFFT error %d (%s). " \ 127 "Reports not initialized or alloc failed; " \ 128 "this indicates the GPU has run out resources", \ 129 (PetscErrorCode)_p_cufft_stat__, name); \ 130 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuFFT error %d (%s)", (PetscErrorCode)_p_cufft_stat__, name); \ 131 } \ 132 } while (0) 133 #define CHKERRCUFFT(...) PetscCallCUFFT(__VA_ARGS__) 134 135 #define PetscCallCURAND(...) \ 136 do { \ 137 const curandStatus_t _p_curand_stat__ = __VA_ARGS__; \ 138 if (PetscUnlikely(_p_curand_stat__ != CURAND_STATUS_SUCCESS)) { \ 139 if (((_p_curand_stat__ == CURAND_STATUS_INITIALIZATION_FAILED) || (_p_curand_stat__ == CURAND_STATUS_ALLOCATION_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 140 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \ 141 "cuRAND error %d. " \ 142 "Reports not initialized or alloc failed; " \ 143 "this indicates the GPU has run out resources", \ 144 (PetscErrorCode)_p_curand_stat__); \ 145 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuRand error %d", (PetscErrorCode)_p_curand_stat__); \ 146 } \ 147 } while (0) 148 #define CHKERRCURAND(...) PetscCallCURAND(__VA_ARGS__) 149 150 PETSC_EXTERN cudaStream_t PetscDefaultCudaStream; /* The default stream used by PETSc */ 151 152 PETSC_EXTERN PetscErrorCode PetscCUBLASGetHandle(cublasHandle_t *); 153 PETSC_EXTERN PetscErrorCode PetscCUSOLVERDnGetHandle(cusolverDnHandle_t *); 154 #endif /* PETSC_HAVE_CUDA */ 155 156 #if defined(PETSC_HAVE_HIP) 157 #include <hip/hip_runtime.h> 158 #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0) 159 #include <hipblas/hipblas.h> 160 #else 161 #include <hipblas.h> 162 #endif 163 #if defined(__HIP_PLATFORM_NVCC__) 164 #include <cusolverDn.h> 165 #else /* __HIP_PLATFORM_HCC__ */ 166 #if PETSC_PKG_HIP_VERSION_GE(5, 2, 0) 167 #include <rocsolver/rocsolver.h> 168 #else 169 #include <rocsolver.h> 170 #endif 171 #endif /* __HIP_PLATFORM_NVCC__ */ 172 173 /* REMOVE ME */ 174 #define WaitForHIP() hipDeviceSynchronize() 175 176 /* hipBLAS does not have hipblasGetErrorName(). We create one on our own. */ 177 PETSC_EXTERN const char *PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */ 178 179 #define PetscCallHIP(...) \ 180 do { \ 181 const hipError_t _p_hip_err__ = __VA_ARGS__; \ 182 if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \ 183 const char *name = hipGetErrorName(_p_hip_err__); \ 184 const char *descr = hipGetErrorString(_p_hip_err__); \ 185 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hip error %d (%s) : %s", (PetscErrorCode)_p_hip_err__, name, descr); \ 186 } \ 187 } while (0) 188 #define CHKERRHIP(...) PetscCallHIP(__VA_ARGS__) 189 190 #define PetscCallHIPBLAS(...) \ 191 do { \ 192 const hipblasStatus_t _p_hipblas_stat__ = __VA_ARGS__; \ 193 if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \ 194 const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \ 195 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "hipBLAS error %d (%s)", (PetscErrorCode)_p_hipblas_stat__, name); \ 196 } \ 197 } while (0) 198 #define CHKERRHIPBLAS(...) PetscCallHIPBLAS(__VA_ARGS__) 199 200 /* TODO: SEK: Need to figure out the hipsolver issues */ 201 #define PetscCallHIPSOLVER(...) \ 202 do { \ 203 const hipsolverStatus_t _p_hipsolver_stat__ = __VA_ARGS__; \ 204 PetscCheck(!_p_hipsolver_stat__, PETSC_COMM_SELF, PETSC_ERR_GPU, "HIPSOLVER error %d", (PetscErrorCode)_p_hipsolver_stat__); \ 205 } while (0) 206 #define CHKERRHIPSOLVER(...) PetscCallHIPSOLVER(__VA_ARGS__) 207 208 /* hipSolver does not exist yet so we work around it 209 rocSOLVER users rocBLAS for the handle 210 * */ 211 #if defined(__HIP_PLATFORM_NVCC__) 212 typedef cusolverDnHandle_t hipsolverHandle_t; 213 typedef cusolverStatus_t hipsolverStatus_t; 214 215 /* Alias hipsolverDestroy to cusolverDnDestroy */ 216 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle) { 217 return cusolverDnDestroy(hipsolverhandle); 218 } 219 220 /* Alias hipsolverCreate to cusolverDnCreate */ 221 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) { 222 return cusolverDnCreate(hipsolverhandle); 223 } 224 225 /* Alias hipsolverGetStream to cusolverDnGetStream */ 226 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) { 227 return cusolverDnGetStream(handle, stream); 228 } 229 230 /* Alias hipsolverSetStream to cusolverDnSetStream */ 231 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) { 232 return cusolveDnSetStream(handle, stream); 233 } 234 #else /* __HIP_PLATFORM_HCC__ */ 235 typedef rocblas_handle hipsolverHandle_t; 236 typedef rocblas_status hipsolverStatus_t; 237 238 /* Alias hipsolverDestroy to rocblas_destroy_handle */ 239 static inline hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle) { 240 return rocblas_destroy_handle(hipsolverhandle); 241 } 242 243 /* Alias hipsolverCreate to rocblas_destroy_handle */ 244 static inline hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) { 245 return rocblas_create_handle(hipsolverhandle); 246 } 247 248 /* Alias hipsolverGetStream to rocblas_get_stream */ 249 static inline hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) { 250 return rocblas_get_stream(handle, stream); 251 } 252 253 /* Alias hipsolverSetStream to rocblas_set_stream */ 254 static inline hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) { 255 return rocblas_set_stream(handle, stream); 256 } 257 #endif /* __HIP_PLATFORM_NVCC__ */ 258 PETSC_EXTERN hipStream_t PetscDefaultHipStream; /* The default stream used by PETSc */ 259 260 PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t *); 261 PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t *); 262 #endif /* PETSC_HAVE_HIP */ 263 264 /* Cannot use the device context api without C++ */ 265 #if defined(PETSC_HAVE_CXX) 266 PETSC_EXTERN PetscErrorCode PetscDeviceInitializePackage(void); 267 PETSC_EXTERN PetscErrorCode PetscDeviceFinalizePackage(void); 268 269 /* PetscDevice */ 270 PETSC_EXTERN PetscErrorCode PetscDeviceInitialize(PetscDeviceType); 271 PETSC_EXTERN PetscBool PetscDeviceInitialized(PetscDeviceType); 272 PETSC_EXTERN PetscErrorCode PetscDeviceCreate(PetscDeviceType, PetscInt, PetscDevice *); 273 PETSC_EXTERN PetscErrorCode PetscDeviceConfigure(PetscDevice); 274 PETSC_EXTERN PetscErrorCode PetscDeviceView(PetscDevice, PetscViewer); 275 PETSC_EXTERN PetscErrorCode PetscDeviceDestroy(PetscDevice *); 276 PETSC_EXTERN PetscErrorCode PetscDeviceGetDeviceId(PetscDevice, PetscInt *); 277 278 /* PetscDeviceContext */ 279 PETSC_EXTERN PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *); 280 PETSC_EXTERN PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *); 281 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext, PetscDevice); 282 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext, PetscDevice *); 283 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext, PetscStreamType); 284 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext, PetscStreamType *); 285 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext); 286 PETSC_EXTERN PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext, PetscDeviceContext *); 287 PETSC_EXTERN PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext, PetscBool *); 288 PETSC_EXTERN PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext, PetscDeviceContext); 289 PETSC_EXTERN PetscErrorCode PetscDeviceContextFork(PetscDeviceContext, PetscInt, PetscDeviceContext **); 290 PETSC_EXTERN PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext, PetscInt, PetscDeviceContextJoinMode, PetscDeviceContext **); 291 PETSC_EXTERN PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext); 292 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *); 293 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext); 294 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm, const char[], PetscDeviceContext); 295 #else 296 #define PetscDeviceInitialize(...) 0 297 #define PetscDeviceInitialized(...) PETSC_FALSE 298 #endif /* PETSC_HAVE_CXX */ 299 300 #endif /* PETSCDEVICE_H */ 301