1 #ifndef PETSCDEVICE_CUDA_H 2 #define PETSCDEVICE_CUDA_H 3 4 #include <petscdevice.h> 5 #include <petscpkg_version.h> 6 7 #if defined(__NVCC__) || defined(__CUDACC__) 8 #define PETSC_USING_NVCC 1 9 #endif 10 11 #if PetscDefined(HAVE_CUDA) 12 #include <cuda.h> 13 #include <cuda_runtime.h> 14 #include <cublas_v2.h> 15 #include <cusolverDn.h> 16 #include <cusolverSp.h> 17 #include <cufft.h> 18 19 /* cuBLAS does not have cublasGetErrorName(). We create one on our own. */ 20 PETSC_EXTERN const char *PetscCUBLASGetErrorName(cublasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRCUBLAS macro */ 21 PETSC_EXTERN const char *PetscCUSolverGetErrorName(cusolverStatus_t); 22 PETSC_EXTERN const char *PetscCUFFTGetErrorName(cufftResult); 23 24 /* REMOVE ME */ 25 #define WaitForCUDA() cudaDeviceSynchronize() 26 27 /* CUDART_VERSION = 1000 x major + 10 x minor version */ 28 29 /* Could not find exactly which CUDART_VERSION introduced cudaGetErrorName. At least it was in CUDA 8.0 (Sep. 2016) */ 30 #if PETSC_PKG_CUDA_VERSION_GE(8, 0, 0) 31 #define PetscCallCUDAVoid(...) \ 32 do { \ 33 const cudaError_t _p_cuda_err__ = __VA_ARGS__; \ 34 PetscCheckAbort(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d (%s) : %s", (PetscErrorCode)_p_cuda_err__, cudaGetErrorName(_p_cuda_err__), cudaGetErrorString(_p_cuda_err__)); \ 35 } while (0) 36 37 #define PetscCallCUDA(...) \ 38 do { \ 39 const cudaError_t _p_cuda_err__ = __VA_ARGS__; \ 40 PetscCheck(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d (%s) : %s", (PetscErrorCode)_p_cuda_err__, cudaGetErrorName(_p_cuda_err__), cudaGetErrorString(_p_cuda_err__)); \ 41 } while (0) 42 #else /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */ 43 #define PetscCallCUDA(...) \ 44 do { \ 45 const cudaError_t _p_cuda_err__ = __VA_ARGS__; \ 46 PetscCheck(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d", (PetscErrorCode)_p_cuda_err__); \ 47 } while (0) 48 49 #define PetscCallCUDAVoid(...) \ 50 do { \ 51 const cudaError_t _p_cuda_err__ = __VA_ARGS__; \ 52 PetscCheckAbort(_p_cuda_err__ == cudaSuccess, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuda error %d", (PetscErrorCode)_p_cuda_err__); \ 53 } while (0) 54 #endif /* PETSC_PKG_CUDA_VERSION_GE(8,0,0) */ 55 #define CHKERRCUDA(...) PetscCallCUDA(__VA_ARGS__) 56 57 #define PetscCUDACheckLaunch \ 58 do { \ 59 /* Check synchronous errors, i.e. pre-launch */ \ 60 PetscCallCUDA(cudaGetLastError()); \ 61 /* Check asynchronous errors, i.e. kernel failed (ULF) */ \ 62 PetscCallCUDA(cudaDeviceSynchronize()); \ 63 } while (0) 64 65 #define PetscCallCUBLAS(...) \ 66 do { \ 67 const cublasStatus_t _p_cublas_stat__ = __VA_ARGS__; \ 68 if (PetscUnlikely(_p_cublas_stat__ != CUBLAS_STATUS_SUCCESS)) { \ 69 const char *name = PetscCUBLASGetErrorName(_p_cublas_stat__); \ 70 if (((_p_cublas_stat__ == CUBLAS_STATUS_NOT_INITIALIZED) || (_p_cublas_stat__ == CUBLAS_STATUS_ALLOC_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 71 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \ 72 "cuBLAS error %d (%s). " \ 73 "Reports not initialized or alloc failed; " \ 74 "this indicates the GPU may have run out resources", \ 75 (PetscErrorCode)_p_cublas_stat__, name); \ 76 } else { \ 77 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuBLAS error %d (%s)", (PetscErrorCode)_p_cublas_stat__, name); \ 78 } \ 79 } \ 80 } while (0) 81 #define CHKERRCUBLAS(...) PetscCallCUBLAS(__VA_ARGS__) 82 83 #if (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) /* According to cuda/10.1.168 on OLCF Summit */ 84 #define PetscCallCUSPARSE(...) \ 85 do { \ 86 const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \ 87 if (PetscUnlikely(_p_cusparse_stat__)) { \ 88 const char *name = cusparseGetErrorName(_p_cusparse_stat__); \ 89 const char *descr = cusparseGetErrorString(_p_cusparse_stat__); \ 90 PetscCheck((_p_cusparse_stat__ != CUSPARSE_STATUS_NOT_INITIALIZED) && (_p_cusparse_stat__ != CUSPARSE_STATUS_ALLOC_FAILED), PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \ 91 "cuSPARSE errorcode %d (%s) : %s.; " \ 92 "this indicates the GPU has run out resources", \ 93 (int)_p_cusparse_stat__, name, descr); \ 94 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSPARSE errorcode %d (%s) : %s", (int)_p_cusparse_stat__, name, descr); \ 95 } \ 96 } while (0) 97 #else /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */ 98 #define PetscCallCUSPARSE(...) \ 99 do { \ 100 const cusparseStatus_t _p_cusparse_stat__ = __VA_ARGS__; \ 101 PetscCheck(_p_cusparse_stat__ == CUSPARSE_STATUS_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSPARSE errorcode %d", (PetscErrorCode)_p_cusparse_stat__); \ 102 } while (0) 103 #endif /* (CUSPARSE_VER_MAJOR > 10 || CUSPARSE_VER_MAJOR == 10 && CUSPARSE_VER_MINOR >= 2) */ 104 #define CHKERRCUSPARSE(...) PetscCallCUSPARSE(__VA_ARGS__) 105 106 #define PetscCallCUSOLVER(...) \ 107 do { \ 108 const cusolverStatus_t _p_cusolver_stat__ = __VA_ARGS__; \ 109 if (PetscUnlikely(_p_cusolver_stat__ != CUSOLVER_STATUS_SUCCESS)) { \ 110 const char *name = PetscCUSolverGetErrorName(_p_cusolver_stat__); \ 111 if (((_p_cusolver_stat__ == CUSOLVER_STATUS_NOT_INITIALIZED) || (_p_cusolver_stat__ == CUSOLVER_STATUS_ALLOC_FAILED) || (_p_cusolver_stat__ == CUSOLVER_STATUS_INTERNAL_ERROR)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 112 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \ 113 "cuSolver error %d (%s). " \ 114 "This indicates the GPU may have run out resources", \ 115 (PetscErrorCode)_p_cusolver_stat__, name); \ 116 } else { \ 117 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuSolver error %d (%s)", (PetscErrorCode)_p_cusolver_stat__, name); \ 118 } \ 119 } \ 120 } while (0) 121 #define CHKERRCUSOLVER(...) PetscCallCUSOLVER(__VA_ARGS__) 122 123 #define PetscCallCUFFT(...) \ 124 do { \ 125 const cufftResult_t _p_cufft_stat__ = __VA_ARGS__; \ 126 if (PetscUnlikely(_p_cufft_stat__ != CUFFT_SUCCESS)) { \ 127 const char *name = PetscCUFFTGetErrorName(_p_cufft_stat__); \ 128 if (((_p_cufft_stat__ == CUFFT_SETUP_FAILED) || (_p_cufft_stat__ == CUFFT_ALLOC_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 129 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \ 130 "cuFFT error %d (%s). " \ 131 "Reports not initialized or alloc failed; " \ 132 "this indicates the GPU has run out resources", \ 133 (PetscErrorCode)_p_cufft_stat__, name); \ 134 } else { \ 135 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuFFT error %d (%s)", (PetscErrorCode)_p_cufft_stat__, name); \ 136 } \ 137 } \ 138 } while (0) 139 #define CHKERRCUFFT(...) PetscCallCUFFT(__VA_ARGS__) 140 141 #define PetscCallCURAND(...) \ 142 do { \ 143 const curandStatus_t _p_curand_stat__ = __VA_ARGS__; \ 144 if (PetscUnlikely(_p_curand_stat__ != CURAND_STATUS_SUCCESS)) { \ 145 if (((_p_curand_stat__ == CURAND_STATUS_INITIALIZATION_FAILED) || (_p_curand_stat__ == CURAND_STATUS_ALLOCATION_FAILED)) && PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { \ 146 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU_RESOURCE, \ 147 "cuRAND error %d. " \ 148 "Reports not initialized or alloc failed; " \ 149 "this indicates the GPU has run out resources", \ 150 (PetscErrorCode)_p_curand_stat__); \ 151 } else { \ 152 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_GPU, "cuRand error %d", (PetscErrorCode)_p_curand_stat__); \ 153 } \ 154 } \ 155 } while (0) 156 #define CHKERRCURAND(...) PetscCallCURAND(__VA_ARGS__) 157 158 PETSC_EXTERN cudaStream_t PetscDefaultCudaStream; // The default stream used by PETSc 159 PETSC_EXTERN PetscErrorCode PetscCUBLASGetHandle(cublasHandle_t *); 160 PETSC_EXTERN PetscErrorCode PetscCUSOLVERDnGetHandle(cusolverDnHandle_t *); 161 PETSC_EXTERN PetscErrorCode PetscGetCurrentCUDAStream(cudaStream_t *); 162 163 #endif // PETSC_HAVE_CUDA 164 165 // these can also be defined in petscdevice_hip.h so we undef and define them *only* if the 166 // current compiler is NVCC. In this case if petscdevice_hip.h is included first, the macros 167 // would already be defined, but they would be empty since we cannot be using HCC at the same 168 // time. 169 #if PetscDefined(USING_NVCC) 170 #undef PETSC_HOST_DECL 171 #undef PETSC_DEVICE_DECL 172 #undef PETSC_KERNEL_DECL 173 #undef PETSC_SHAREDMEM_DECL 174 #undef PETSC_FORCEINLINE 175 #undef PETSC_CONSTMEM_DECL 176 177 #define PETSC_HOST_DECL __host__ 178 #define PETSC_DEVICE_DECL __device__ 179 #define PETSC_KERNEL_DECL __global__ 180 #define PETSC_SHAREDMEM_DECL __shared__ 181 #define PETSC_FORCEINLINE __forceinline__ 182 #define PETSC_CONSTMEM_DECL __constant__ 183 #endif 184 185 #ifndef PETSC_HOST_DECL // use HOST_DECL as canary 186 #define PETSC_HOST_DECL 187 #define PETSC_DEVICE_DECL 188 #define PETSC_KERNEL_DECL 189 #define PETSC_SHAREDMEM_DECL 190 #define PETSC_FORCEINLINE inline 191 #define PETSC_CONSTMEM_DECL 192 #endif 193 194 #ifndef PETSC_DEVICE_DEFINED_DECLS_PRIVATE 195 #define PETSC_DEVICE_DEFINED_DECLS_PRIVATE 196 #define PETSC_HOSTDEVICE_DECL PETSC_HOST_DECL PETSC_DEVICE_DECL 197 #define PETSC_DEVICE_INLINE_DECL PETSC_DEVICE_DECL PETSC_FORCEINLINE 198 #define PETSC_HOSTDEVICE_INLINE_DECL PETSC_HOSTDEVICE_DECL PETSC_FORCEINLINE 199 #endif 200 201 #endif // PETSCDEVICE_CUDA_H 202