xref: /petsc/include/petscdevice.h (revision 017deb10d530c1b6d9744fcd772cd96c5fcd74f2)
1 #pragma once
2 
3 #include <petscdevicetypes.h>
4 #include <petscviewertypes.h>
5 
6 #if PETSC_CPP_VERSION >= 11 // C++11
7   #define PETSC_DEVICE_ALIGNOF(...) alignof(decltype(__VA_ARGS__))
8 #elif PETSC_C_VERSION >= 11 // C11
9   #ifdef __GNUC__
10     #define PETSC_DEVICE_ALIGNOF(...) _Alignof(__typeof__(__VA_ARGS__))
11   #else
12     #include <stddef.h> // max_align_t
13     // Note we cannot just do _Alignof(expression) since clang warns that "'_Alignof' applied to an
14     // expression is a GNU extension", so we just default to max_align_t which is ultra safe
15     #define PETSC_DEVICE_ALIGNOF(...) _Alignof(max_align_t)
16   #endif // __GNUC__
17 #else
18   #define PETSC_DEVICE_ALIGNOF(...) PETSC_MEMALIGN
19 #endif
20 
21 /* MANSEC = Sys */
22 /* SUBMANSEC = Device */
23 
24 // REVIEW ME: this should probably go somewhere better, configure-time?
25 #define PETSC_HAVE_HOST 1
26 
27 /* logging support */
28 PETSC_EXTERN PetscClassId PETSC_DEVICE_CLASSID;
29 PETSC_EXTERN PetscClassId PETSC_DEVICE_CONTEXT_CLASSID;
30 
31 PETSC_EXTERN PetscErrorCode PetscDeviceInitializePackage(void);
32 PETSC_EXTERN PetscErrorCode PetscDeviceFinalizePackage(void);
33 PETSC_EXTERN PetscErrorCode PetscGetMemType(const void *, PetscMemType *);
34 
35 /* PetscDevice */
36 PETSC_EXTERN PetscErrorCode  PetscDeviceCreate(PetscDeviceType, PetscInt, PetscDevice *);
37 PETSC_EXTERN PetscErrorCode  PetscDeviceDestroy(PetscDevice *);
38 PETSC_EXTERN PetscErrorCode  PetscDeviceConfigure(PetscDevice);
39 PETSC_EXTERN PetscErrorCode  PetscDeviceView(PetscDevice, PetscViewer);
40 PETSC_EXTERN PetscErrorCode  PetscDeviceGetType(PetscDevice, PetscDeviceType *);
41 PETSC_EXTERN PetscErrorCode  PetscDeviceGetDeviceId(PetscDevice, PetscInt *);
42 PETSC_EXTERN PetscDeviceType PETSC_DEVICE_DEFAULT(void);
43 PETSC_EXTERN PetscErrorCode  PetscDeviceSetDefaultDeviceType(PetscDeviceType);
44 PETSC_EXTERN PetscErrorCode  PetscDeviceInitialize(PetscDeviceType);
45 PETSC_EXTERN PetscBool       PetscDeviceInitialized(PetscDeviceType);
46 
47 /* PetscDeviceContext */
48 PETSC_EXTERN PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *);
49 PETSC_EXTERN PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *);
50 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext, PetscStreamType);
51 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext, PetscStreamType *);
52 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext, PetscDevice);
53 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext, PetscDevice *);
54 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext, PetscDeviceType *);
55 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext);
56 PETSC_EXTERN PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext, PetscDeviceContext *);
57 PETSC_EXTERN PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext, PetscBool *);
58 PETSC_EXTERN PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext, PetscDeviceContext);
59 PETSC_EXTERN PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext, PetscStreamType, PetscInt, PetscDeviceContext **);
60 PETSC_EXTERN PetscErrorCode PetscDeviceContextFork(PetscDeviceContext, PetscInt, PetscDeviceContext **);
61 PETSC_EXTERN PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext, PetscInt, PetscDeviceContextJoinMode, PetscDeviceContext **);
62 PETSC_EXTERN PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext);
63 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm, PetscDeviceContext);
64 PETSC_EXTERN PetscErrorCode PetscDeviceContextView(PetscDeviceContext, PetscViewer);
65 PETSC_EXTERN PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext, PetscObject, const char[]);
66 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *);
67 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext);
68 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext, void **);
69 
70 /* memory */
71 PETSC_EXTERN PetscErrorCode PetscDeviceAllocate_Private(PetscDeviceContext, PetscBool, PetscMemType, size_t, size_t, void **PETSC_RESTRICT);
72 PETSC_EXTERN PetscErrorCode PetscDeviceDeallocate_Private(PetscDeviceContext, void *PETSC_RESTRICT);
73 PETSC_EXTERN PetscErrorCode PetscDeviceMemcpy(PetscDeviceContext, void *PETSC_RESTRICT, const void *PETSC_RESTRICT, size_t);
74 PETSC_EXTERN PetscErrorCode PetscDeviceMemset(PetscDeviceContext, void *PETSC_RESTRICT, PetscInt, size_t);
75 
76 /*MC
77   PetscDeviceMalloc - Allocate device-aware memory
78 
79   Synopsis:
80   #include <petscdevice.h>
81   PetscErrorCode PetscDeviceMalloc(PetscDeviceContext dctx, PetscMemType mtype, size_t n, Type **ptr)
82 
83   Not Collective, Asynchronous, Auto-dependency aware
84 
85   Input Parameters:
86 + dctx  - The `PetscDeviceContext` used to allocate the memory
87 . mtype - The type of memory to allocate
88 - n     - The amount (in elements) to allocate
89 
90   Output Parameter:
91 . ptr - The pointer to store the result in
92 
93   Level: beginner
94 
95   Notes:
96   Memory allocated with this function must be freed with `PetscDeviceFree()`.
97 
98   If `n` is zero, then `ptr` is set to `PETSC_NULLPTR`.
99 
100   This routine falls back to using `PetscMalloc1()` if PETSc was not configured with device
101   support. The user should note that `mtype` is ignored in this case, as `PetscMalloc1()`
102   allocates only host memory.
103 
104   This routine uses the `sizeof()` of the memory type requested to determine the total memory
105   to be allocated, therefore you should not multiply the number of elements requested by the
106   `sizeof()` the type\:
107 
108 .vb
109   PetscInt *arr;
110 
111   // correct
112   PetscDeviceMalloc(dctx,PETSC_MEMTYPE_DEVICE,n,&arr);
113 
114   // incorrect
115   PetscDeviceMalloc(dctx,PETSC_MEMTYPE_DEVICE,n*sizeof(*arr),&arr);
116 .ve
117 
118   Note result stored `ptr` is immediately valid and the user may freely inspect or manipulate
119   its value on function return, i.e.\:
120 
121 .vb
122   PetscInt *ptr;
123 
124   PetscDeviceMalloc(dctx, PETSC_MEMTYPE_DEVICE, 20, &ptr);
125 
126   PetscInt *sub_ptr = ptr + 10; // OK, no need to synchronize
127 
128   ptr[0] = 10; // ERROR, directly accessing contents of ptr is undefined until synchronization
129 .ve
130 
131   DAG representation:
132 .vb
133   time ->
134 
135   -> dctx - |= CALL =| -\- dctx -->
136                          \- ptr ->
137 .ve
138 
139 .N ASYNC_API
140 
141 .seealso: `PetscDeviceFree()`, `PetscDeviceCalloc()`, `PetscDeviceArrayCopy()`,
142 `PetscDeviceArrayZero()`
143 M*/
144 #define PetscDeviceMalloc(dctx, mtype, n, ptr) PetscDeviceAllocate_Private((dctx), PETSC_FALSE, (mtype), (size_t)(n) * sizeof(**(ptr)), PETSC_DEVICE_ALIGNOF(**(ptr)), (void **)(ptr))
145 
146 /*MC
147   PetscDeviceCalloc - Allocate zeroed device-aware memory
148 
149   Synopsis:
150   #include <petscdevice.h>
151   PetscErrorCode PetscDeviceCalloc(PetscDeviceContext dctx, PetscMemType mtype, size_t n, Type **ptr)
152 
153   Not Collective, Asynchronous, Auto-dependency aware
154 
155   Input Parameters:
156 + dctx  - The `PetscDeviceContext` used to allocate the memory
157 . mtype - The type of memory to allocate
158 - n     - The amount (in elements) to allocate
159 
160   Output Parameter:
161 . ptr - The pointer to store the result in
162 
163   Level: beginner
164 
165   Notes:
166   Has identical usage to `PetscDeviceMalloc()` except that the memory is zeroed before it is
167   returned. See `PetscDeviceMalloc()` for further discussion.
168 
169   This routine falls back to using `PetscCalloc1()` if PETSc was not configured with device
170   support. The user should note that `mtype` is ignored in this case, as `PetscCalloc1()`
171   allocates only host memory.
172 
173 .N ASYNC_API
174 
175 .seealso: `PetscDeviceFree()`, `PetscDeviceMalloc()`, `PetscDeviceArrayCopy()`,
176 `PetscDeviceArrayZero()`
177 M*/
178 #define PetscDeviceCalloc(dctx, mtype, n, ptr) PetscDeviceAllocate_Private((dctx), PETSC_TRUE, (mtype), (size_t)(n) * sizeof(**(ptr)), PETSC_DEVICE_ALIGNOF(**(ptr)), (void **)(ptr))
179 
180 /*MC
181   PetscDeviceFree - Free device-aware memory obtained with  `PetscDeviceMalloc()` or `PetscDeviceCalloc()`
182 
183   Synopsis:
184   #include <petscdevice.h>
185   PetscErrorCode PetscDeviceFree(PetscDeviceContext dctx, void *ptr)
186 
187   Not Collective, Asynchronous, Auto-dependency aware
188 
189   Input Parameters:
190 + dctx - The `PetscDeviceContext` used to free the memory
191 - ptr  - The pointer to free, may be `NULL`
192 
193   Level: beginner
194 
195   Notes:
196   `ptr` is set to `PETSC_NULLPTR` on successful deallocation.
197 
198   `ptr` must have been allocated using `PetscDeviceMalloc()`, `PetscDeviceCalloc()` not `PetscMalloc()` or related routines
199 
200   This routine falls back to using `PetscFree()` if PETSc was not configured with device
201   support. The user should note that `PetscFree()` frees only host memory.
202 
203   DAG representation:
204 .vb
205   time ->
206 
207   -> dctx -/- |= CALL =| - dctx ->
208   -> ptr -/
209 .ve
210 
211 .N ASYNC_API
212 
213 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()`
214 M*/
215 #define PetscDeviceFree(dctx, ptr) ((PetscErrorCode)(PetscDeviceDeallocate_Private((dctx), (ptr)) || ((ptr) = PETSC_NULLPTR, PETSC_SUCCESS)))
216 
217 /*MC
218   PetscDeviceArrayCopy - Copy memory in a device-aware manner
219 
220   Synopsis:
221   #include <petscdevice.h>
222   PetscErrorCode PetscDeviceArrayCopy(PetscDeviceContext dctx, void *dest, const void *src, size_t n)
223 
224   Not Collective, Asynchronous, Auto-dependency aware
225 
226   Input Parameters:
227 + dctx - The `PetscDeviceContext` used to copy the memory
228 . dest - The pointer to copy to
229 . src  - The pointer to copy from
230 - n    - The amount (in elements) to copy
231 
232   Notes:
233   Both `dest` and `src` must have been allocated using `PetscDeviceMalloc()` or
234   `PetscDeviceCalloc()`.
235 
236   This uses the `sizeof()` of the `src` memory type requested to determine the total memory to
237   be copied, therefore you should not multiply the number of elements by the `sizeof()` the
238   type\:
239 
240 .vb
241   PetscInt *to,*from;
242 
243   // correct
244   PetscDeviceArrayCopy(dctx,to,from,n);
245 
246   // incorrect
247   PetscDeviceArrayCopy(dctx,to,from,n*sizeof(*from));
248 .ve
249 
250   See `PetscDeviceMemcpy()` for further discussion.
251 
252   Level: beginner
253 
254 .N ASYNC_API
255 
256 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, `PetscDeviceFree()`,
257 `PetscDeviceArrayZero()`, `PetscDeviceMemcpy()`
258 M*/
259 #define PetscDeviceArrayCopy(dctx, dest, src, n) PetscDeviceMemcpy((dctx), (dest), (src), (size_t)(n) * sizeof(*(src)))
260 
261 /*MC
262   PetscDeviceArrayZero - Zero memory in a device-aware manner
263 
264   Synopsis:
265   #include <petscdevice.h>
266   PetscErrorCode PetscDeviceArrayZero(PetscDeviceContext dctx, void *ptr, size_t n)
267 
268   Not Collective, Asynchronous, Auto-dependency aware
269 
270   Input Parameters:
271 + dctx  - The `PetscDeviceContext` used to zero the memory
272 . ptr   - The pointer to the memory
273 - n     - The amount (in elements) to zero
274 
275   Level: beginner
276 
277   Notes:
278   `ptr` must have been allocated using `PetscDeviceMalloc()` or `PetscDeviceCalloc()`.
279 
280   This uses the `sizeof()` of the memory type requested to determine the total memory to be
281   zeroed, therefore you should not multiply the number of elements by the `sizeof()` the type\:
282 
283 .vb
284   PetscInt *ptr;
285 
286   // correct
287   PetscDeviceArrayZero(dctx,ptr,n);
288 
289   // incorrect
290   PetscDeviceArrayZero(dctx,ptr,n*sizeof(*ptr));
291 .ve
292 
293   See `PetscDeviceMemset()` for further discussion.
294 
295 .N ASYNC_API
296 
297 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, `PetscDeviceFree()`,
298 `PetscDeviceArrayCopy()`, `PetscDeviceMemset()`
299 M*/
300 #define PetscDeviceArrayZero(dctx, ptr, n) PetscDeviceMemset((dctx), (ptr), 0, (size_t)(n) * sizeof(*(ptr)))
301