1 #if !defined(PETSCDEVICE_H) 2 #define PETSCDEVICE_H 3 4 #include <petscsys.h> 5 #include <petscdevicetypes.h> 6 7 #if PetscDefined(HAVE_CUDA) 8 #include <cuda.h> 9 #include <cuda_runtime.h> 10 #include <cublas_v2.h> 11 #include <cusolverDn.h> 12 #include <cusolverSp.h> 13 #include <cufft.h> 14 15 PETSC_EXTERN cudaEvent_t petsc_gputimer_begin; 16 PETSC_EXTERN cudaEvent_t petsc_gputimer_end; 17 18 /* cuBLAS does not have cublasGetErrorName(). We create one on our own. */ 19 PETSC_EXTERN const char* PetscCUBLASGetErrorName(cublasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRCUBLAS macro */ 20 PETSC_EXTERN const char* PetscCUSolverGetErrorName(cusolverStatus_t); 21 PETSC_EXTERN const char* PetscCUFFTGetErrorName(cufftResult); 22 23 #define WaitForCUDA() PetscCUDASynchronize ? cudaDeviceSynchronize() : cudaSuccess; 24 25 /* CUDART_VERSION = 1000 x major + 10 x minor version */ 26 27 /* Could not find exactly which CUDART_VERSION introduced cudaGetErrorName. At least it was in CUDA 8.0 (Sep. 2016) */ 28 #if (CUDART_VERSION >= 8000) /* CUDA 8.0 */ 29 #define CHKERRCUDA(cerr) \ 30 do { \ 31 if (PetscUnlikely(cerr)) { \ 32 const char *name = cudaGetErrorName(cerr); \ 33 const char *descr = cudaGetErrorString(cerr); \ 34 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d (%s) : %s", \ 35 (int)cerr,name,descr); \ 36 } \ 37 } while (0) 38 #else 39 #define CHKERRCUDA(cerr) do {if (PetscUnlikely(cerr)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuda error %d",(int)cerr);} while (0) 40 #endif /* CUDART_VERSION >= 8000 */ 41 42 #define CHKERRCUBLAS(stat) \ 43 do { \ 44 if (PetscUnlikely(stat)) { \ 45 const char *name = PetscCUBLASGetErrorName(stat); \ 46 if (((stat == CUBLAS_STATUS_NOT_INITIALIZED) || (stat == CUBLAS_STATUS_ALLOC_FAILED)) && PetscCUDAInitialized) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,"cuBLAS error %d (%s). Reports not initialized or alloc failed; this indicates the GPU has run out resources",(int)stat,name); \ 47 else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuBLAS error %d (%s)",(int)stat,name); \ 48 } \ 49 } while (0) 50 51 #define CHKERRCUSOLVER(stat) \ 52 do { \ 53 if (PetscUnlikely(stat)) { \ 54 const char *name = PetscCUSolverGetErrorName(stat); \ 55 if ((stat == CUSOLVER_STATUS_NOT_INITIALIZED) || (stat == CUSOLVER_STATUS_ALLOC_FAILED) || (stat == CUSOLVER_STATUS_INTERNAL_ERROR)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,"cuSolver error %d (%s). This indicates the GPU has run out resources",(int)stat,name); \ 56 else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuSolver error %d (%s)",(int)stat,name); \ 57 } \ 58 } while (0) 59 60 #define CHKERRCUFFT(res) \ 61 do { \ 62 if (PetscUnlikely(res)) { \ 63 const char *name = PetscCUFFTGetErrorName(res); \ 64 if (((res == CUFFT_SETUP_FAILED) || (res == CUFFT_ALLOC_FAILED)) && PetscCUDAInitialized) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU_RESOURCE,"cuFFT error %d (%s). Reports not initialized or alloc failed; this indicates the GPU has run out resources",(int)res,name); \ 65 else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_GPU,"cuFFT error %d (%s)",(int)res,name); \ 66 } \ 67 } while (0) 68 69 PETSC_EXTERN cudaStream_t PetscDefaultCudaStream; /* The default stream used by PETSc */ 70 PETSC_INTERN PetscErrorCode PetscCUBLASInitializeHandle(void); 71 PETSC_INTERN PetscErrorCode PetscCUSOLVERDnInitializeHandle(void); 72 73 PETSC_EXTERN PetscErrorCode PetscCUBLASGetHandle(cublasHandle_t*); 74 PETSC_EXTERN PetscErrorCode PetscCUSOLVERDnGetHandle(cusolverDnHandle_t*); 75 #endif /* PetscDefined(HAVE_CUDA) */ 76 77 #if PetscDefined(HAVE_HIP) 78 #include <hip/hip_runtime.h> 79 #include <hipblas.h> 80 #if defined(__HIP_PLATFORM_NVCC__) 81 #include <cusolverDn.h> 82 #else /* __HIP_PLATFORM_HCC__ */ 83 #include <rocsolver.h> 84 #endif /* __HIP_PLATFORM_NVCC__ */ 85 86 #define WaitForHIP() PetscHIPSynchronize ? hipDeviceSynchronize() : hipSuccess; 87 88 PETSC_EXTERN hipEvent_t petsc_gputimer_begin; 89 PETSC_EXTERN hipEvent_t petsc_gputimer_end; 90 91 /* hipBLAS does not have hipblasGetErrorName(). We create one on our own. */ 92 PETSC_EXTERN const char* PetscHIPBLASGetErrorName(hipblasStatus_t); /* PETSC_EXTERN since it is exposed by the CHKERRHIPBLAS macro */ 93 94 #define CHKERRHIP(cerr) \ 95 do { \ 96 if (PetscUnlikely(cerr)) { \ 97 const char *name = hipGetErrorName(cerr); \ 98 const char *descr = hipGetErrorString(cerr); \ 99 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_LIB,"hip error %d (%s) : %s", \ 100 (int)cerr,name,descr); \ 101 } \ 102 } while (0) 103 104 #define CHKERRHIPBLAS(stat) \ 105 do { \ 106 if (PetscUnlikely(stat)) { \ 107 const char *name = PetscHIPBLASGetErrorName(stat); \ 108 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"hipBLAS error %d (%s)", \ 109 (int)stat,name); \ 110 } \ 111 } while (0) 112 113 /* TODO: SEK: Need to figure out the hipsolver issues */ 114 #define CHKERRHIPSOLVER(err) \ 115 do { \ 116 if (PetscUnlikely(err)) { \ 117 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"HIPSOLVER error %d",err); \ 118 } \ 119 } while (0) 120 121 /* hipSolver does not exist yet so we work around it 122 rocSOLVER users rocBLAS for the handle 123 * */ 124 #if defined(__HIP_PLATFORM_NVCC__) 125 typedef cusolverDnHandle_t hipsolverHandle_t; 126 typedef cusolverStatus_t hipsolverStatus_t; 127 128 /* Alias hipsolverDestroy to cusolverDnDestroy */ 129 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t *hipsolverhandle) 130 { 131 return cusolverDnDestroy(hipsolverhandle) 132 } 133 134 /* Alias hipsolverCreate to cusolverDnCreate */ 135 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 136 { 137 return cusolverDnCreate(hipsolverhandle) 138 } 139 140 /* Alias hipsolverGetStream to cusolverDnGetStream */ 141 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 142 { 143 return cusolverDnGetStream(handle,stream); 144 } 145 146 /* Alias hipsolverSetStream to cusolverDnSetStream */ 147 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 148 { 149 return cusolveDnSetStream(handle,stream); 150 } 151 #else /* __HIP_PLATFORM_HCC__ */ 152 typedef rocblas_handle hipsolverHandle_t; 153 typedef rocblas_status hipsolverStatus_t; 154 155 /* Alias hipsolverDestroy to rocblas_destroy_handle */ 156 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverDestroy(hipsolverHandle_t hipsolverhandle) 157 { 158 return rocblas_destroy_handle(hipsolverhandle); 159 } 160 161 /* Alias hipsolverCreate to rocblas_destroy_handle */ 162 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverCreate(hipsolverHandle_t *hipsolverhandle) 163 { 164 return rocblas_create_handle(hipsolverhandle); 165 } 166 167 /* Alias hipsolverGetStream to rocblas_get_stream */ 168 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverGetStream(hipsolverHandle_t handle, hipStream_t *stream) 169 { 170 return rocblas_get_stream(handle,stream); 171 } 172 173 /* Alias hipsolverSetStream to rocblas_set_stream */ 174 PETSC_STATIC_INLINE hipsolverStatus_t hipsolverSetStream(hipsolverHandle_t handle, hipStream_t stream) 175 { 176 return rocblas_set_stream(handle,stream); 177 } 178 #endif /* __HIP_PLATFORM_NVCC__ */ 179 PETSC_EXTERN hipStream_t PetscDefaultHipStream; /* The default stream used by PETSc */ 180 PETSC_INTERN PetscErrorCode PetscHIPBLASInitializeHandle(void); 181 PETSC_INTERN PetscErrorCode PetscHIPSOLVERInitializeHandle(void); 182 183 PETSC_EXTERN PetscErrorCode PetscHIPBLASGetHandle(hipblasHandle_t*); 184 PETSC_EXTERN PetscErrorCode PetscHIPSOLVERGetHandle(hipsolverHandle_t*); 185 #endif /* PetscDefined(HAVE_HIP) */ 186 187 /* Cannot use the device context api without C++11 */ 188 #if PetscDefined(HAVE_CXX_DIALECT_CXX11) 189 PETSC_EXTERN PetscErrorCode PetscDeviceInitializePackage(void); 190 PETSC_EXTERN PetscErrorCode PetscDeviceFinalizePackage(void); 191 192 /* PetscDevice */ 193 PETSC_EXTERN PetscErrorCode PetscDeviceCreate(PetscDeviceKind,PetscDevice*); 194 PETSC_EXTERN PetscErrorCode PetscDeviceConfigure(PetscDevice); 195 PETSC_EXTERN PetscErrorCode PetscDeviceDestroy(PetscDevice*); 196 197 /* PetscDeviceContext */ 198 PETSC_EXTERN PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext*); 199 PETSC_EXTERN PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext*); 200 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext,PetscDevice); 201 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext,PetscDevice*); 202 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext,PetscStreamType); 203 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext,PetscStreamType*); 204 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext); 205 PETSC_EXTERN PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext,PetscDeviceContext*); 206 PETSC_EXTERN PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext,PetscBool*); 207 PETSC_EXTERN PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext,PetscDeviceContext); 208 PETSC_EXTERN PetscErrorCode PetscDeviceContextFork(PetscDeviceContext,PetscInt,PetscDeviceContext**); 209 PETSC_EXTERN PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext,PetscInt,PetscDeviceContextJoinMode,PetscDeviceContext**); 210 PETSC_EXTERN PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext); 211 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext*); 212 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext); 213 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm,const char[],PetscDeviceContext); 214 #endif /* PetscDefined(HAVE_CXX_DIALECT_CXX11) */ 215 #endif /* PETSCDEVICE_H */ 216