xref: /petsc/include/petscdevicetypes.h (revision 2fa40bb9206b96114faa7cb222621ec184d31cd2)
1 #if !defined(PETSCDEVICETYPES_H)
2 #define PETSCDEVICETYPES_H
3 
4 #include <petscsys.h> /* for PETSC_HAVE_CUDA/HIP/KOKKOS etc */
5 
6 /*E
7   PetscMemType - Memory type of a pointer
8 
9   Level: beginner
10 
11   Developer Note:
12    Encoding of the bitmask in binary: xxxxyyyz
13    z = 0:                Host memory
14    z = 1:                Device memory
15    yyy = 000:            CUDA-related memory
16    yyy = 001:            HIP-related memory
17    xxxxyyy1 = 0000,0001: CUDA memory
18    xxxxyyy1 = 0001,0001: CUDA NVSHMEM memory
19    xxxxyyy1 = 0000,0011: HIP memory
20 
21   Other types of memory, e.g., CUDA managed memory, can be added when needed.
22 
23 .seealso: VecGetArrayAndMemType(), PetscSFBcastWithMemTypeBegin(), PetscSFReduceWithMemTypeBegin()
24 E*/
25 typedef enum {PETSC_MEMTYPE_HOST=0, PETSC_MEMTYPE_DEVICE=0x01, PETSC_MEMTYPE_CUDA=0x01, PETSC_MEMTYPE_NVSHMEM=0x11,PETSC_MEMTYPE_HIP=0x03} PetscMemType;
26 
27 #define PetscMemTypeHost(m)    (((m) & 0x1) == PETSC_MEMTYPE_HOST)
28 #define PetscMemTypeDevice(m)  (((m) & 0x1) == PETSC_MEMTYPE_DEVICE)
29 #define PetscMemTypeCUDA(m)    (((m) & 0xF) == PETSC_MEMTYPE_CUDA)
30 #define PetscMemTypeHIP(m)     (((m) & 0xF) == PETSC_MEMTYPE_HIP)
31 #define PetscMemTypeNVSHMEM(m) ((m) == PETSC_MEMTYPE_NVSHMEM)
32 
33 /*E
34     PetscOffloadMask - indicates which memory (CPU, GPU, or none) contains valid data
35 
36    PETSC_OFFLOAD_UNALLOCATED  - no memory contains valid matrix entries; NEVER used for vectors
37    PETSC_OFFLOAD_GPU - GPU has valid vector/matrix entries
38    PETSC_OFFLOAD_CPU - CPU has valid vector/matrix entries
39    PETSC_OFFLOAD_BOTH - Both GPU and CPU have valid vector/matrix entries and they match
40    PETSC_OFFLOAD_VECKOKKOS - Reserved for Vec_Kokkos. The offload is managed by Kokkos, thus this flag is not used in Vec_Kokkos.
41 
42    Level: developer
43 E*/
44 typedef enum {PETSC_OFFLOAD_UNALLOCATED=0x0,PETSC_OFFLOAD_CPU=0x1,PETSC_OFFLOAD_GPU=0x2,PETSC_OFFLOAD_BOTH=0x3,PETSC_OFFLOAD_VECKOKKOS=0x100} PetscOffloadMask;
45 
46 /*E
47   PetscDeviceKind - Kind of accelerator device backend
48 
49 $ PETSC_DEVICE_INVALID - Invalid type, do not use
50 $ PETSC_DEVICE_CUDA    - CUDA enabled GPU
51 $ PETSC_DEVICE_HIP     - ROCM/HIP enabled GPU
52 $ PETSC_DEVICE_DEFAULT - Automatically select backend based on availability
53 $ PETSC_DEVICE_MAX     - Always 1 greater than the largest valid PetscDeviceKInd, invalid type, do not use
54 
55   Notes:
56   PETSC_DEVICE_DEFAULT is selected in the following order: PETSC_DEVICE_HIP, PETSC_DEVICE_CUDA, PETSC_DEVICE_INVALID.
57 
58   Level: beginner
59 
60 .seealso: PetscDevice, PetscDeviceCreate()
61 E*/
62 typedef enum {
63   PETSC_DEVICE_INVALID = 0,
64   PETSC_DEVICE_CUDA    = 1,
65   PETSC_DEVICE_HIP     = 2,
66   PETSC_DEVICE_MAX     = 3
67 } PetscDeviceKind;
68 PETSC_EXTERN const char *const PetscDeviceKinds[];
69 #if PetscDefined(HAVE_HIP)
70 #  define PETSC_DEVICE_DEFAULT PETSC_DEVICE_HIP
71 #elif PetscDefined(HAVE_CUDA)
72 #  define PETSC_DEVICE_DEFAULT PETSC_DEVICE_CUDA
73 #else
74 #  define PETSC_DEVICE_DEFAULT PETSC_DEVICE_INVALID
75 #endif
76 
77 /*S
78   PetscDevice - Handle to an accelerator "device" (usually a GPU)
79 
80   Notes:
81   This object is used to house configuration and state of a device, but does not offer any ability to interact with or
82   drive device computation. This functionality is facilitated instead by the PetscDeviceContext object.
83 
84   Level: beginner
85 
86 .seealso: PetscDeviceKind, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceDestroy(), PetscDeviceContext, PetscDeviceContextSetDevice(), PetscDeviceContextGetDevice()
87 S*/
88 typedef struct _n_PetscDevice *PetscDevice;
89 
90 /*E
91   PetscStreamType - Stream blocking mode, indicates how a stream implementation will interact with the default "NULL"
92   stream, which is usually blocking.
93 
94 $ PETSC_STREAM_GLOBAL_BLOCKING    - Alias for NULL stream. Any stream of this type will block the host for all other streams to finish work before starting its operations.
95 $ PETSC_STREAM_DEFAULT_BLOCKING   - Stream will act independent of other streams, but will still be blocked by actions on the NULL stream.
96 $ PETSC_STREAM_GLOBAL_NONBLOCKING - Stream is truly asynchronous, and is blocked by nothing, not even the NULL stream.
97 $ PETSC_STREAM_MAX                - Always 1 greater than the largest PetscStreamType, do not use
98 
99   Level: intermediate
100 
101 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextGetStreamType()
102 E*/
103 typedef enum {
104   PETSC_STREAM_GLOBAL_BLOCKING    = 0,
105   PETSC_STREAM_DEFAULT_BLOCKING   = 1,
106   PETSC_STREAM_GLOBAL_NONBLOCKING = 2,
107   PETSC_STREAM_MAX                = 3
108 } PetscStreamType;
109 PETSC_EXTERN const char *const PetscStreamTypes[];
110 
111 /*E
112   PetscDeviceContextJoinMode - Describes the type of join operation to perform in PetscDeviceContextJoin()
113 
114 $ PETSC_DEVICE_CONTEXT_DESTROY - Destroy all incoming sub-contexts after join.
115 $ PETSC_CONTEXT_JOIN_SYNC      - Synchronize incoming sub-contexts after join.
116 $ PETSC_CONTEXT_JOIN_NO_SYNC   - Do not synchronize incoming sub-contexts after join.
117 
118   Level: beginner
119 
120 .seealso: PetscDeviceContextFork(), PetscDeviceContextJoin()
121 E*/
122 typedef enum {
123   PETSC_DEVICE_CONTEXT_JOIN_DESTROY,
124   PETSC_DEVICE_CONTEXT_JOIN_SYNC,
125   PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC
126 } PetscDeviceContextJoinMode;
127 PETSC_EXTERN const char *const PetscDeviceContextJoinModes[];
128 
129 /*S
130   PetscDeviceContext - Container to manage stream dependencies and the various solver handles for asynchronous device compute.
131 
132   Level: beginner
133 
134 .seealso: PetscDevice, PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextDestroy(),
135 PetscDeviceContextFork(), PetscDeviceContextJoin()
136 S*/
137 typedef struct _n_PetscDeviceContext *PetscDeviceContext;
138 #endif /* PETSCDEVICETYPES_H */
139