1 #if !defined(PETSCDEVICE_H) 2 #define PETSCDEVICE_H 3 4 #include <petscsys.h> 5 #include <petscdevicetypes.h> 6 #include <petscpkg_version.h> 7 8 #if defined(PETSC_HAVE_CUDA) 9 #include <cuda.h> 10 #include <cuda_runtime.h> 11 #include <cublas_v2.h> 12 #include <cusolverDn.h> 13 #include <cusolverSp.h> 14 #include <cufft.h> 15 16 /* cuBLAS does not have cublasGetErrorName(). We create one on our own. */ 17 PETSC_EXTERN const char* PetscCUBLASGetErrorName(cublasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRCUBLAS macro */ 18 PETSC_EXTERN const char* PetscCUSolverGetErrorName(cusolverStatus_t); 19 PETSC_EXTERN const char* PetscCUFFTGetErrorName(cufftResult); 20 21 /* REMOVE ME */ 22 #define WaitForCUDA() cudaDeviceSynchronize() 23 24 /* CUDART_VERSION = 1000 x major + 10 x minor version */ 25 26 /* Could not find exactly which CUDART_VERSION introduced cudaGetErrorName. At least it was in CUDA 8.0 (Sep. 2016) */ 27 #if PETSC_PKG_CUDA_VERSION_GE(8,0,0) 28 #define CHKERRCUDA(cerr) do { \ 29 const cudaError_t _p_cuda_err__ = cerr; \ 30 if (PetscUnlikely(_p_cuda_err__ != cudaSuccess)) { \ 31 const char *name = cudaGetErrorName(_p_cuda_err__); \ 32 const char *descr = cudaGetErrorString(_p_cuda_err__); \ 33 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d (%s) : %s", \ 34 (PetscErrorCode)_p_cuda_err__,name,descr); \ 35 } \ 36 } while (0) 37 #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */ 38 #define CHKERRCUDA(cerr) do { \ 39 const cudaError_t _p_cuda_err__ = cerr; \ 40 if (PetscUnlikely(_p_cuda_err__ != cudaSuccess)) { \ 41 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d",(PetscErrorCode)_p_cuda_err__); \ 42 } \ 43 } while (0) 44 #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */ 45 46 #define CHKERRCUBLAS(stat) do { \ 47 const cublasStatus_t _p_cublas_stat__ = stat; \ 48 if (PetscUnlikely(_p_cublas_stat__ != CUBLAS_STATUS_SUCCESS)) { \ 49 const char *name = PetscCUBLASGetErrorName(_p_cublas_stat__); \ 50 if (((_p_cublas_stat__ == CUBLAS_STATUS_NOT_INITIALIZED) || \ 51 (_p_cublas_stat__ == CUBLAS_STATUS_ALLOC_FAILED)) && \ 52 PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 53 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE, \ 54 "cuBLAS error %d (%s). " \ 55 "Reports not initialized or alloc failed; " \ 56 "this indicates the GPU may have run out resources", \ 57 (PetscErrorCode)_p_cublas_stat__,name); \ 58 } else { \ 59 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuBLAS error %d (%s)", \ 60 (PetscErrorCode)_p_cublas_stat__,name); \ 61 } \ 62 } \ 63 } while (0) 64 65 #if (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) /* According to cuda/10.1.168 on OLCF Summit */ 66 #define CHKERRCUSPARSE(stat)\ 67 do {\ 68 if (PetscUnlikely(stat)) {\ 69 const char *name = cusparseGetErrorName(stat);\ 70 const char *descr = cusparseGetErrorString(stat);\ 71 if ((stat == CUSPARSE_STATUS_NOT_INITIALIZED) || (stat == CUSPARSE_STATUS_ALLOC_FAILED)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,"cuSPARSE errorcode %d (%s) : %s. Reports not initialized or alloc failed; this indicates the GPU has run out resources",(int)stat,name,descr); \ 72 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuSPARSE errorcode %d (%s) : %s",(int)stat,name,descr);\ 73 }\ 74 } while (0) 75 #else /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */ 76 #define CHKERRCUSPARSE(stat) do {if (PetscUnlikely(stat)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuSPARSE errorcode %d",(int)stat);} while (0) 77 #endif /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */ 78 79 #define CHKERRCUSOLVER(stat) do { \ 80 const cusolverStatus_t _p_cusolver_stat__ = stat; \ 81 if (PetscUnlikely(_p_cusolver_stat__ != CUSOLVER_STATUS_SUCCESS)) { \ 82 const char *name = PetscCUSolverGetErrorName(_p_cusolver_stat__); \ 83 if (((_p_cusolver_stat__ == CUSOLVER_STATUS_NOT_INITIALIZED) || \ 84 (_p_cusolver_stat__ == CUSOLVER_STATUS_ALLOC_FAILED) || \ 85 (_p_cusolver_stat__ == CUSOLVER_STATUS_INTERNAL_ERROR)) && \ 86 PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 87 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE, \ 88 "cuSolver error %d (%s). " \ 89 "This indicates the GPU may have run out resources", \ 90 (PetscErrorCode)_p_cusolver_stat__,name); \ 91 } else { \ 92 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuSolver error %d (%s)", \ 93 (PetscErrorCode)_p_cusolver_stat__,name); \ 94 } \ 95 } \ 96 } while (0) 97 98 #define CHKERRCUFFT(res) do { \ 99 const cufftResult_t _p_cufft_stat__ = res; \ 100 if (PetscUnlikely(_p_cufft_stat__ != CUFFT_SUCCESS)) { \ 101 const char *name = PetscCUFFTGetErrorName(_p_cufft_stat__); \ 102 if (((_p_cufft_stat__ == CUFFT_SETUP_FAILED) || \ 103 (_p_cufft_stat__ == CUFFT_ALLOC_FAILED)) && \ 104 PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 105 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE, \ 106 "cuFFT error %d (%s). " \ 107 "Reports not initialized or alloc failed; " \ 108 "this indicates the GPU has run out resources", \ 109 (PetscErrorCode)_p_cufft_stat__,name); \ 110 } else { \ 111 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuFFT error %d (%s)", \ 112 (PetscErrorCode)_p_cufft_stat__,name); \ 113 } \ 114 } \ 115 } while (0) 116 117 #define CHKERRCURAND(stat) do { \ 118 const curandStatus_t _p_curand_stat__ = stat; \ 119 if (PetscUnlikely(_p_curand_stat__ != CURAND_STATUS_SUCCESS)) { \ 120 if (((_p_curand_stat__ == CURAND_STATUS_INITIALIZATION_FAILED) || \ 121 (_p_curand_stat__ == CURAND_STATUS_ALLOCATION_FAILED)) && \ 122 PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 123 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE, \ 124 "cuRAND error %d. " \ 125 "Reports not initialized or alloc failed; " \ 126 "this indicates the GPU has run out resources", \ 127 (PetscErrorCode)_p_curand_stat__); \ 128 } else { \ 129 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU, \ 130 "cuRand error %d",(PetscErrorCode)_p_curand_stat__); \ 131 } \ 132 } \ 133 } while (0) 134 135 PETSC_EXTERN cudaStream_t PetscDefaultCudaStream; /* The default stream used by PETSc */ 136 137 PETSC_EXTERN PetscErrorCode PetscCUBLASGetHandle(cublasHandle_t*); 138 PETSC_EXTERN PetscErrorCode PetscCUSOLVERDnGetHandle(cusolverDnHandle_t*); 139 #endif /* PETSC_HAVE_CUDA */ 140 141 #if defined(PETSC_HAVE_HIP) 142 #include <hip/hip_runtime.h> 143 #include <hipblas.h> 144 #if defined(__HIP_PLATFORM_NVCC__) 145 #include <cusolverDn.h> 146 #else /* __HIP_PLATFORM_HCC__ */ 147 #include <rocsolver.h> 148 #endif /* __HIP_PLATFORM_NVCC__ */ 149 150 /* REMOVE ME */ 151 #define WaitForHIP() hipDeviceSynchronize() 152 153 /* hipBLAS does not have hipblasGetErrorName(). We create one on our own. */ 154 PETSC_EXTERN const char* PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */ 155 156 #define CHKERRHIP(cerr) do { \ 157 const hipError_t _p_hip_err__ = cerr; \ 158 if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \ 159 const char *name = hipGetErrorName(_p_hip_err__); \ 160 const char *descr = hipGetErrorString(_p_hip_err__); \ 161 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"hip error %d (%s) : %s", \ 162 (PetscErrorCode)_p_hip_err__,name,descr); \ 163 } \ 164 } while (0) 165 166 #define CHKERRHIPBLAS(stat) do { \ 167 const hipblasStatus_t _p_hipblas_stat__ = stat; \ 168 if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \ 169 const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \ 170 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"hipBLAS error %d (%s)", \ 171 (PetscErrorCode)_p_hipblas_stat__,name); \ 172 } \ 173 } while (0) 174 175 /* TODO: SEK: Need to figure out the hipsolver issues */ 176 #define CHKERRHIPSOLVER(stat) do { \ 177 const hipsolverStatus_t _p_hipsolver_stat__ = stat; \ 178 if (PetscUnlikely(_p_hipsolver_stat__ /* != HIPSOLVER_STATUS_SUCCESS */)) { \ 179 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_GPU,"HIPSOLVER error %d", \ 180 (PetscErrorCode)_p_hipsolver_stat__); \ 181 } \ 182 } while (0) 183 184 /* hipSolver does not exist yet so we work around it 185 rocSOLVER users rocBLAS for the handle 186 * */ 187 #if defined(__HIP_PLATFORM_NVCC__) 188 typedef cusolverDnHandle_t hipsolverHandle_t; 189 typedef cusolverStatus_t hipsolverStatus_t; 190 191 /* Alias hipsolverDestroy to cusolverDnDestroy */ 192 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle) 193 { 194 return cusolverDnDestroy(hipsolverhandle); 195 } 196 197 /* Alias hipsolverCreate to cusolverDnCreate */ 198 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 199 { 200 return cusolverDnCreate(hipsolverhandle); 201 } 202 203 /* Alias hipsolverGetStream to cusolverDnGetStream */ 204 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 205 { 206 return cusolverDnGetStream(handle,stream); 207 } 208 209 /* Alias hipsolverSetStream to cusolverDnSetStream */ 210 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 211 { 212 return cusolveDnSetStream(handle,stream); 213 } 214 #else /* __HIP_PLATFORM_HCC__ */ 215 typedef rocblas_handle hipsolverHandle_t; 216 typedef rocblas_status hipsolverStatus_t; 217 218 /* Alias hipsolverDestroy to rocblas_destroy_handle */ 219 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle) 220 { 221 return rocblas_destroy_handle(hipsolverhandle); 222 } 223 224 /* Alias hipsolverCreate to rocblas_destroy_handle */ 225 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 226 { 227 return rocblas_create_handle(hipsolverhandle); 228 } 229 230 /* Alias hipsolverGetStream to rocblas_get_stream */ 231 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 232 { 233 return rocblas_get_stream(handle,stream); 234 } 235 236 /* Alias hipsolverSetStream to rocblas_set_stream */ 237 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 238 { 239 return rocblas_set_stream(handle,stream); 240 } 241 #endif /* __HIP_PLATFORM_NVCC__ */ 242 PETSC_EXTERN hipStream_t PetscDefaultHipStream; /* The default stream used by PETSc */ 243 244 PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t*); 245 PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t*); 246 #endif /* PETSC_HAVE_HIP */ 247 248 /* Cannot use the device context api without C++11 */ 249 #if defined(PETSC_HAVE_CXX_DIALECT_CXX11) 250 PETSC_EXTERN PetscErrorCode PetscDeviceInitializePackage(void); 251 PETSC_EXTERN PetscErrorCode PetscDeviceFinalizePackage(void); 252 253 /* PetscDevice */ 254 PETSC_EXTERN PetscErrorCode PetscDeviceInitialize(PetscDeviceType); 255 PETSC_EXTERN PetscBool PetscDeviceInitialized(PetscDeviceType); 256 PETSC_EXTERN PetscErrorCode PetscDeviceCreate(PetscDeviceType,PetscInt,PetscDevice*); 257 PETSC_EXTERN PetscErrorCode PetscDeviceConfigure(PetscDevice); 258 PETSC_EXTERN PetscErrorCode PetscDeviceView(PetscDevice,PetscViewer); 259 PETSC_EXTERN PetscErrorCode PetscDeviceDestroy(PetscDevice*); 260 261 /* PetscDeviceContext */ 262 PETSC_EXTERN PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext*); 263 PETSC_EXTERN PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext*); 264 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext,PetscDevice); 265 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext,PetscDevice*); 266 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext,PetscStreamType); 267 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext,PetscStreamType*); 268 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext); 269 PETSC_EXTERN PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext,PetscDeviceContext*); 270 PETSC_EXTERN PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext,PetscBool*); 271 PETSC_EXTERN PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext,PetscDeviceContext); 272 PETSC_EXTERN PetscErrorCode PetscDeviceContextFork(PetscDeviceContext,PetscInt,PetscDeviceContext**); 273 PETSC_EXTERN PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext,PetscInt,PetscDeviceContextJoinMode,PetscDeviceContext**); 274 PETSC_EXTERN PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext); 275 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext*); 276 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext); 277 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm,const char[],PetscDeviceContext); 278 #endif /* PETSC_HAVE_CXX_DIALECT_CXX11 */ 279 280 #endif /* PETSCDEVICE_H */ 281