1 #if !defined(PETSCDEVICETYPES_H) 2 #define PETSCDEVICETYPES_H 3 4 #include <petscsys.h> /* for PETSC_HAVE_CUDA/HIP/KOKKOS etc */ 5 6 /*E 7 PetscMemType - Memory type of a pointer 8 9 Level: beginner 10 11 Developer Note: 12 Encoding of the bitmask in binary: xxxxyyyz 13 z = 0: Host memory 14 z = 1: Device memory 15 yyy = 000: CUDA-related memory 16 yyy = 001: HIP-related memory 17 xxxxyyy1 = 0000,0001: CUDA memory 18 xxxxyyy1 = 0001,0001: CUDA NVSHMEM memory 19 xxxxyyy1 = 0000,0011: HIP memory 20 21 Other types of memory, e.g., CUDA managed memory, can be added when needed. 22 23 .seealso: VecGetArrayAndMemType(), PetscSFBcastWithMemTypeBegin(), PetscSFReduceWithMemTypeBegin() 24 E*/ 25 typedef enum {PETSC_MEMTYPE_HOST=0, PETSC_MEMTYPE_DEVICE=0x01, PETSC_MEMTYPE_CUDA=0x01, PETSC_MEMTYPE_NVSHMEM=0x11,PETSC_MEMTYPE_HIP=0x03} PetscMemType; 26 27 #define PetscMemTypeHost(m) (((m) & 0x1) == PETSC_MEMTYPE_HOST) 28 #define PetscMemTypeDevice(m) (((m) & 0x1) == PETSC_MEMTYPE_DEVICE) 29 #define PetscMemTypeCUDA(m) (((m) & 0xF) == PETSC_MEMTYPE_CUDA) 30 #define PetscMemTypeHIP(m) (((m) & 0xF) == PETSC_MEMTYPE_HIP) 31 #define PetscMemTypeNVSHMEM(m) ((m) == PETSC_MEMTYPE_NVSHMEM) 32 33 /*E 34 PetscOffloadMask - indicates which memory (CPU, GPU, or none) contains valid data 35 36 PETSC_OFFLOAD_UNALLOCATED - no memory contains valid matrix entries; NEVER used for vectors 37 PETSC_OFFLOAD_GPU - GPU has valid vector/matrix entries 38 PETSC_OFFLOAD_CPU - CPU has valid vector/matrix entries 39 PETSC_OFFLOAD_BOTH - Both GPU and CPU have valid vector/matrix entries and they match 40 PETSC_OFFLOAD_VECKOKKOS - Reserved for Vec_Kokkos. The offload is managed by Kokkos, thus this flag is not used in Vec_Kokkos. 41 42 Level: developer 43 E*/ 44 typedef enum {PETSC_OFFLOAD_UNALLOCATED=0x0,PETSC_OFFLOAD_CPU=0x1,PETSC_OFFLOAD_GPU=0x2,PETSC_OFFLOAD_BOTH=0x3,PETSC_OFFLOAD_VECKOKKOS=0x100} PetscOffloadMask; 45 46 /*E 47 PetscDeviceKind - Kind of accelerator device backend 48 49 $ PETSC_DEVICE_INVALID - Invalid type, do not use 50 $ PETSC_DEVICE_CUDA - CUDA enabled GPU 51 $ PETSC_DEVICE_HIP - ROCM/HIP enabled GPU 52 $ PETSC_DEVICE_DEFAULT - Automatically select backend based on availability 53 $ PETSC_DEVICE_MAX - Always 1 greater than the largest valid PetscDeviceKInd, invalid type, do not use 54 55 Notes: 56 PETSC_DEVICE_DEFAULT is selected in the following order: PETSC_DEVICE_HIP, PETSC_DEVICE_CUDA, PETSC_DEVICE_INVALID. 57 58 Level: beginner 59 60 .seealso: PetscDevice, PetscDeviceCreate() 61 E*/ 62 typedef enum { 63 PETSC_DEVICE_INVALID = 0, 64 PETSC_DEVICE_CUDA = 1, 65 PETSC_DEVICE_HIP = 2, 66 PETSC_DEVICE_MAX = 3 67 } PetscDeviceKind; 68 PETSC_EXTERN const char *const PetscDeviceKinds[]; 69 #if PetscDefined(HAVE_HIP) 70 # define PETSC_DEVICE_DEFAULT PETSC_DEVICE_HIP 71 #elif PetscDefined(HAVE_CUDA) 72 # define PETSC_DEVICE_DEFAULT PETSC_DEVICE_CUDA 73 #else 74 # define PETSC_DEVICE_DEFAULT PETSC_DEVICE_INVALID 75 #endif 76 77 /*S 78 PetscDevice - Handle to an accelerator "device" (usually a GPU) 79 80 Notes: 81 This object is used to house configuration and state of a device, but does not offer any ability to interact with or 82 drive device computation. This functionality is facilitated instead by the PetscDeviceContext object. 83 84 Level: beginner 85 86 .seealso: PetscDeviceKind, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceDestroy(), PetscDeviceContext, PetscDeviceContextSetDevice(), PetscDeviceContextGetDevice() 87 S*/ 88 typedef struct _n_PetscDevice *PetscDevice; 89 90 /*E 91 PetscStreamType - Stream blocking mode, indicates how a stream implementation will interact with the default "NULL" 92 stream, which is usually blocking. 93 94 $ PETSC_STREAM_GLOBAL_BLOCKING - Alias for NULL stream. Any stream of this type will block the host for all other streams to finish work before starting its operations. 95 $ PETSC_STREAM_DEFAULT_BLOCKING - Stream will act independent of other streams, but will still be blocked by actions on the NULL stream. 96 $ PETSC_STREAM_GLOBAL_NONBLOCKING - Stream is truly asynchronous, and is blocked by nothing, not even the NULL stream. 97 $ PETSC_STREAM_MAX - Always 1 greater than the largest PetscStreamType, do not use 98 99 Level: intermediate 100 101 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextGetStreamType() 102 E*/ 103 typedef enum { 104 PETSC_STREAM_GLOBAL_BLOCKING = 0, 105 PETSC_STREAM_DEFAULT_BLOCKING = 1, 106 PETSC_STREAM_GLOBAL_NONBLOCKING = 2, 107 PETSC_STREAM_MAX = 3 108 } PetscStreamType; 109 PETSC_EXTERN const char *const PetscStreamTypes[]; 110 111 /*E 112 PetscDeviceContextJoinMode - Describes the type of join operation to perform in PetscDeviceContextJoin() 113 114 $ PETSC_DEVICE_CONTEXT_DESTROY - Destroy all incoming sub-contexts after join. 115 $ PETSC_CONTEXT_JOIN_SYNC - Synchronize incoming sub-contexts after join. 116 $ PETSC_CONTEXT_JOIN_NO_SYNC - Do not synchronize incoming sub-contexts after join. 117 118 Level: beginner 119 120 .seealso: PetscDeviceContextFork(), PetscDeviceContextJoin() 121 E*/ 122 typedef enum { 123 PETSC_DEVICE_CONTEXT_JOIN_DESTROY, 124 PETSC_DEVICE_CONTEXT_JOIN_SYNC, 125 PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC 126 } PetscDeviceContextJoinMode; 127 PETSC_EXTERN const char *const PetscDeviceContextJoinModes[]; 128 129 /*S 130 PetscDeviceContext - Container to manage stream dependencies and the various solver handles for asynchronous device compute. 131 132 Level: beginner 133 134 .seealso: PetscDevice, PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextDestroy(), 135 PetscDeviceContextFork(), PetscDeviceContextJoin() 136 S*/ 137 typedef struct _n_PetscDeviceContext *PetscDeviceContext; 138 #endif /* PETSCDEVICETYPES_H */ 139