1 #if !defined(PETSCDEVICE_H) 2 #define PETSCDEVICE_H 3 4 #include <petscsys.h> 5 #include <petscdevicetypes.h> 6 #include <petscpkg_version.h> 7 8 #if PetscDefined(HAVE_CUDA) 9 #include <cuda.h> 10 #include <cuda_runtime.h> 11 #include <cublas_v2.h> 12 #include <cusolverDn.h> 13 #include <cusolverSp.h> 14 #include <cufft.h> 15 16 /* cuBLAS does not have cublasGetErrorName(). We create one on our own. */ 17 PETSC_EXTERN const char* PetscCUBLASGetErrorName(cublasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRCUBLAS macro */ 18 PETSC_EXTERN const char* PetscCUSolverGetErrorName(cusolverStatus_t); 19 PETSC_EXTERN const char* PetscCUFFTGetErrorName(cufftResult); 20 21 /* REMOVE ME */ 22 #define WaitForCUDA() cudaDeviceSynchronize() 23 24 /* CUDART_VERSION = 1000 x major + 10 x minor version */ 25 26 /* Could not find exactly which CUDART_VERSION introduced cudaGetErrorName. At least it was in CUDA 8.0 (Sep. 2016) */ 27 #if PETSC_PKG_CUDA_VERSION_GE(8,0,0) 28 #define CHKERRCUDA(cerr) do { \ 29 const cudaError_t _p_cuda_err__ = cerr; \ 30 if (PetscUnlikely(_p_cuda_err__ != cudaSuccess)) { \ 31 const char *name = cudaGetErrorName(_p_cuda_err__); \ 32 const char *descr = cudaGetErrorString(_p_cuda_err__); \ 33 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d (%s) : %s", \ 34 (PetscErrorCode)_p_cuda_err__,name,descr); \ 35 } \ 36 } while (0) 37 #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */ 38 #define CHKERRCUDA(cerr) do { \ 39 const cudaError_t _p_cuda_err__ = cerr; \ 40 if (PetscUnlikely(_p_cuda_err__ != cudaSuccess)) { \ 41 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d", \ 42 (PetscErrorCode)_p_cuda_err__); \ 43 } \ 44 } while (0) 45 #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */ 46 47 #define CHKERRCUBLAS(stat) do { \ 48 const cublasStatus_t _p_cublas_stat__ = stat; \ 49 if (PetscUnlikely(_p_cublas_stat__ != CUBLAS_STATUS_SUCCESS)) { \ 50 const char *name = PetscCUBLASGetErrorName(_p_cublas_stat__); \ 51 if (((_p_cublas_stat__ == CUBLAS_STATUS_NOT_INITIALIZED) || \ 52 (_p_cublas_stat__ == CUBLAS_STATUS_ALLOC_FAILED)) && \ 53 PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 54 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE, \ 55 "cuBLAS error %d (%s). " \ 56 "Reports not initialized or alloc failed; " \ 57 "this indicates the GPU may have run out resources", \ 58 (PetscErrorCode)_p_cublas_stat__,name); \ 59 } else { \ 60 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuBLAS error %d (%s)", \ 61 (PetscErrorCode)_p_cublas_stat__,name); \ 62 } \ 63 } \ 64 } while (0) 65 66 #define CHKERRCUSOLVER(stat) do { \ 67 const cusolverStatus_t _p_cusolver_stat__ = stat; \ 68 if (PetscUnlikely(_p_cusolver_stat__ != CUSOLVER_STATUS_SUCCESS)) { \ 69 const char *name = PetscCUSolverGetErrorName(_p_cusolver_stat__); \ 70 if (((_p_cusolver_stat__ == CUSOLVER_STATUS_NOT_INITIALIZED) || \ 71 (_p_cusolver_stat__ == CUSOLVER_STATUS_ALLOC_FAILED) || \ 72 (_p_cusolver_stat__ == CUSOLVER_STATUS_INTERNAL_ERROR)) && \ 73 PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 74 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE, \ 75 "cuSolver error %d (%s). " \ 76 "This indicates the GPU may have run out resources", \ 77 (PetscErrorCode)_p_cusolver_stat__,name); \ 78 } else { \ 79 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU, \ 80 "cuSolver error %d (%s)", \ 81 (PetscErrorCode)_p_cusolver_stat__,name); \ 82 } \ 83 } \ 84 } while (0) 85 86 #define CHKERRCUFFT(res) do { \ 87 const cufftResult_t _p_cufft_stat__ = res; \ 88 if (PetscUnlikely(_p_cufft_stat__ != CUFFT_SUCCESS)) { \ 89 const char *name = PetscCUFFTGetErrorName(_p_cufft_stat__); \ 90 if (((_p_cufft_stat__ == CUFFT_SETUP_FAILED) || \ 91 (_p_cufft_stat__ == CUFFT_ALLOC_FAILED)) && \ 92 PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 93 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE, \ 94 "cuFFT error %d (%s). " \ 95 "Reports not initialized or alloc failed; " \ 96 "this indicates the GPU has run out resources", \ 97 (PetscErrorCode)_p_cufft_stat__,name); \ 98 } else { \ 99 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU, \ 100 "cuFFT error %d (%s)", \ 101 (PetscErrorCode)_p_cufft_stat__,name); \ 102 } \ 103 } \ 104 } while (0) 105 106 #define CHKERRCURAND(stat) do { \ 107 const curandStatus_t _p_curand_stat__ = stat; \ 108 if (PetscUnlikely(_p_curand_stat__ != CURAND_STATUS_SUCCESS)) { \ 109 if (((_p_curand_stat__ == CURAND_STATUS_INITIALIZATION_FAILED) || \ 110 (_p_curand_stat__ == CURAND_STATUS_ALLOCATION_FAILED)) && \ 111 PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 112 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE, \ 113 "cuRAND error %d. " \ 114 "Reports not initialized or alloc failed; " \ 115 "this indicates the GPU has run out resources", \ 116 (PetscErrorCode)_p_curand_stat__); \ 117 } else { \ 118 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_GPU, \ 119 "cuRand error %d",(PetscErrorCode)_p_curand_stat__); \ 120 } \ 121 } \ 122 } while (0) 123 124 PETSC_EXTERN cudaStream_t PetscDefaultCudaStream; /* The default stream used by PETSc */ 125 126 PETSC_EXTERN PetscErrorCode PetscCUBLASGetHandle(cublasHandle_t*); 127 PETSC_EXTERN PetscErrorCode PetscCUSOLVERDnGetHandle(cusolverDnHandle_t*); 128 #endif /* PetscDefined(HAVE_CUDA) */ 129 130 #if PetscDefined(HAVE_HIP) 131 #include <hip/hip_runtime.h> 132 #include <hipblas.h> 133 #if defined(__HIP_PLATFORM_NVCC__) 134 #include <cusolverDn.h> 135 #else /* __HIP_PLATFORM_HCC__ */ 136 #include <rocsolver.h> 137 #endif /* __HIP_PLATFORM_NVCC__ */ 138 139 /* REMOVE ME */ 140 #define WaitForHIP() hipDeviceSynchronize() 141 142 /* hipBLAS does not have hipblasGetErrorName(). We create one on our own. */ 143 PETSC_EXTERN const char* PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */ 144 145 #define CHKERRHIP(cerr) do { \ 146 const hipError_t _p_hip_err__ = cerr; \ 147 if (PetscUnlikely(_p_hip_err__ != hipSuccess)) { \ 148 const char *name = hipGetErrorName(_p_hip_err__); \ 149 const char *descr = hipGetErrorString(_p_hip_err__); \ 150 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_GPU,"hip error %d (%s) : %s", \ 151 (PetscErrorCode)_p_hip_err__,name,descr); \ 152 } \ 153 } while (0) 154 155 #define CHKERRHIPBLAS(stat) do { \ 156 const hipblasStatus_t _p_hipblas_stat__ = stat; \ 157 if (PetscUnlikely(_p_hipblas_stat__ != HIPBLAS_STATUS_SUCCESS)) { \ 158 const char *name = PetscHIPBLASGetErrorName(_p_hipblas_stat__); \ 159 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU,"hipBLAS error %d (%s)", \ 160 (PetscErrorCode)_p_hipblas_stat__,name); \ 161 } \ 162 } while (0) 163 164 /* TODO: SEK: Need to figure out the hipsolver issues */ 165 #define CHKERRHIPSOLVER(stat) do { \ 166 const hipsolverStatus_t _p_hipsolver_stat__ = stat; \ 167 if (PetscUnlikely(_p_hipsolver_stat__ /* != HIPSOLVER_STATUS_SUCCESS */)) { \ 168 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_GPU,"HIPSOLVER error %d", \ 169 (PetscErrorCode)_p_hipsolver_stat__); \ 170 } \ 171 } while (0) 172 173 /* hipSolver does not exist yet so we work around it 174 rocSOLVER users rocBLAS for the handle 175 * */ 176 #if defined(__HIP_PLATFORM_NVCC__) 177 typedef cusolverDnHandle_t hipsolverHandle_t; 178 typedef cusolverStatus_t hipsolverStatus_t; 179 180 /* Alias hipsolverDestroy to cusolverDnDestroy */ 181 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle) 182 { 183 return cusolverDnDestroy(hipsolverhandle) 184 } 185 186 /* Alias hipsolverCreate to cusolverDnCreate */ 187 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 188 { 189 return cusolverDnCreate(hipsolverhandle) 190 } 191 192 /* Alias hipsolverGetStream to cusolverDnGetStream */ 193 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 194 { 195 return cusolverDnGetStream(handle,stream); 196 } 197 198 /* Alias hipsolverSetStream to cusolverDnSetStream */ 199 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 200 { 201 return cusolveDnSetStream(handle,stream); 202 } 203 #else /* __HIP_PLATFORM_HCC__ */ 204 typedef rocblas_handle hipsolverHandle_t; 205 typedef rocblas_status hipsolverStatus_t; 206 207 /* Alias hipsolverDestroy to rocblas_destroy_handle */ 208 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle) 209 { 210 return rocblas_destroy_handle(hipsolverhandle); 211 } 212 213 /* Alias hipsolverCreate to rocblas_destroy_handle */ 214 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 215 { 216 return rocblas_create_handle(hipsolverhandle); 217 } 218 219 /* Alias hipsolverGetStream to rocblas_get_stream */ 220 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 221 { 222 return rocblas_get_stream(handle,stream); 223 } 224 225 /* Alias hipsolverSetStream to rocblas_set_stream */ 226 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 227 { 228 return rocblas_set_stream(handle,stream); 229 } 230 #endif /* __HIP_PLATFORM_NVCC__ */ 231 PETSC_EXTERN hipStream_t PetscDefaultHipStream; /* The default stream used by PETSc */ 232 233 PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t*); 234 PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t*); 235 #endif /* PetscDefined(HAVE_HIP) */ 236 237 /* Cannot use the device context api without C++11 */ 238 #if PetscDefined(HAVE_CXX_DIALECT_CXX11) 239 PETSC_EXTERN PetscErrorCode PetscDeviceInitializePackage(void); 240 PETSC_EXTERN PetscErrorCode PetscDeviceFinalizePackage(void); 241 242 /* PetscDevice */ 243 PETSC_EXTERN PetscErrorCode PetscDeviceInitialize(PetscDeviceType); 244 PETSC_EXTERN PetscBool PetscDeviceInitialized(PetscDeviceType); 245 PETSC_EXTERN PetscErrorCode PetscDeviceCreate(PetscDeviceType,PetscInt,PetscDevice*); 246 PETSC_EXTERN PetscErrorCode PetscDeviceConfigure(PetscDevice); 247 PETSC_EXTERN PetscErrorCode PetscDeviceView(PetscDevice,PetscViewer); 248 PETSC_EXTERN PetscErrorCode PetscDeviceDestroy(PetscDevice*); 249 250 /* PetscDeviceContext */ 251 PETSC_EXTERN PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext*); 252 PETSC_EXTERN PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext*); 253 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext,PetscDevice); 254 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext,PetscDevice*); 255 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext,PetscStreamType); 256 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext,PetscStreamType*); 257 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext); 258 PETSC_EXTERN PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext,PetscDeviceContext*); 259 PETSC_EXTERN PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext,PetscBool*); 260 PETSC_EXTERN PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext,PetscDeviceContext); 261 PETSC_EXTERN PetscErrorCode PetscDeviceContextFork(PetscDeviceContext,PetscInt,PetscDeviceContext**); 262 PETSC_EXTERN PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext,PetscInt,PetscDeviceContextJoinMode,PetscDeviceContext**); 263 PETSC_EXTERN PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext); 264 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext*); 265 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext); 266 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm,const char[],PetscDeviceContext); 267 #endif /* PetscDefined(HAVE_CXX_DIALECT_CXX11) */ 268 #endif /* PETSCDEVICE_H */ 269