xref: /petsc/include/petscdevice.h (revision 884d422fa8a734f01f4cddac68ca9e4d00f53602)
1 #pragma once
2 
3 #include <petscdevicetypes.h>
4 #include <petscviewertypes.h>
5 
6 #if PETSC_CPP_VERSION >= 11 // C++11
7   #define PETSC_DEVICE_ALIGNOF(...) alignof(decltype(__VA_ARGS__))
8 #elif PETSC_C_VERSION >= 11 // C11
9   #ifdef __GNUC__
10     #define PETSC_DEVICE_ALIGNOF(...) _Alignof(__typeof__(__VA_ARGS__))
11   #else
12     #include <stddef.h> // max_align_t
13     // Note we cannot just do _Alignof(expression) since clang warns that "'_Alignof' applied to an
14     // expression is a GNU extension", so we just default to max_align_t which is ultra safe
15     #define PETSC_DEVICE_ALIGNOF(...) _Alignof(max_align_t)
16   #endif // __GNUC__
17 #else
18   #define PETSC_DEVICE_ALIGNOF(...) PETSC_MEMALIGN
19 #endif
20 
21 /* SUBMANSEC = Sys */
22 
23 // REVIEW ME: this should probably go somewhere better, configure-time?
24 #define PETSC_HAVE_HOST 1
25 
26 /* logging support */
27 PETSC_EXTERN PetscClassId PETSC_DEVICE_CLASSID;
28 PETSC_EXTERN PetscClassId PETSC_DEVICE_CONTEXT_CLASSID;
29 
30 PETSC_EXTERN PetscErrorCode PetscDeviceInitializePackage(void);
31 PETSC_EXTERN PetscErrorCode PetscDeviceFinalizePackage(void);
32 PETSC_EXTERN PetscErrorCode PetscGetMemType(const void *, PetscMemType *);
33 
34 /* PetscDevice */
35 #if PetscDefined(HAVE_CXX)
36 PETSC_EXTERN PetscErrorCode  PetscDeviceCreate(PetscDeviceType, PetscInt, PetscDevice *);
37 PETSC_EXTERN PetscErrorCode  PetscDeviceDestroy(PetscDevice *);
38 PETSC_EXTERN PetscErrorCode  PetscDeviceConfigure(PetscDevice);
39 PETSC_EXTERN PetscErrorCode  PetscDeviceView(PetscDevice, PetscViewer);
40 PETSC_EXTERN PetscErrorCode  PetscDeviceGetType(PetscDevice, PetscDeviceType *);
41 PETSC_EXTERN PetscErrorCode  PetscDeviceGetDeviceId(PetscDevice, PetscInt *);
42 PETSC_EXTERN PetscDeviceType PETSC_DEVICE_DEFAULT(void);
43 PETSC_EXTERN PetscErrorCode  PetscDeviceSetDefaultDeviceType(PetscDeviceType);
44 PETSC_EXTERN PetscErrorCode  PetscDeviceInitialize(PetscDeviceType);
45 PETSC_EXTERN PetscBool       PetscDeviceInitialized(PetscDeviceType);
46 #else
47   #define PetscDeviceCreate(PetscDeviceType, PetscInt, dev) (*(dev) = PETSC_NULLPTR, PETSC_SUCCESS)
48   #define PetscDeviceDestroy(dev)                           (*(dev) = PETSC_NULLPTR, PETSC_SUCCESS)
49   #define PetscDeviceConfigure(PetscDevice)                 PETSC_SUCCESS
50   #define PetscDeviceView(PetscDevice, PetscViewer)         PETSC_SUCCESS
51   #define PetscDeviceGetType(PetscDevice, type)             (*(type) = PETSC_DEVICE_DEFAULT(), PETSC_SUCCESS)
52   #define PetscDeviceGetDeviceId(PetscDevice, id)           (*(id) = 0, PETSC_SUCCESS)
53   #define PETSC_DEVICE_DEFAULT()                            PETSC_DEVICE_HOST
54   #define PetscDeviceSetDefaultDeviceType(PetscDeviceType)  PETSC_SUCCESS
55   #define PetscDeviceInitialize(PetscDeviceType)            PETSC_SUCCESS
56   #define PetscDeviceInitialized(dtype)                     ((dtype) == PETSC_DEVICE_HOST)
57 #endif /* PetscDefined(HAVE_CXX) */
58 
59 /* PetscDeviceContext */
60 #if PetscDefined(HAVE_CXX)
61 PETSC_EXTERN PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *);
62 PETSC_EXTERN PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *);
63 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext, PetscStreamType);
64 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext, PetscStreamType *);
65 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext, PetscDevice);
66 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext, PetscDevice *);
67 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext, PetscDeviceType *);
68 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext);
69 PETSC_EXTERN PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext, PetscDeviceContext *);
70 PETSC_EXTERN PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext, PetscBool *);
71 PETSC_EXTERN PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext, PetscDeviceContext);
72 PETSC_EXTERN PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext, PetscStreamType, PetscInt, PetscDeviceContext **);
73 PETSC_EXTERN PetscErrorCode PetscDeviceContextFork(PetscDeviceContext, PetscInt, PetscDeviceContext **);
74 PETSC_EXTERN PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext, PetscInt, PetscDeviceContextJoinMode, PetscDeviceContext **);
75 PETSC_EXTERN PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext);
76 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm, PetscDeviceContext);
77 PETSC_EXTERN PetscErrorCode PetscDeviceContextView(PetscDeviceContext, PetscViewer);
78 PETSC_EXTERN PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext, PetscObject, const char name[]);
79 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *);
80 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext);
81 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext, void **);
82 #else
83   #define PetscDeviceContextCreate(dctx)                                                                            (*(dctx) = PETSC_NULLPTR, PETSC_SUCCESS)
84   #define PetscDeviceContextDestroy(dctx)                                                                           (*(dctx) = PETSC_NULLPTR, PETSC_SUCCESS)
85   #define PetscDeviceContextSetStreamType(PetscDeviceContext, PetscStreamType)                                      PETSC_SUCCESS
86   #define PetscDeviceContextGetStreamType(PetscDeviceContext, type)                                                 (*(type) = PETSC_STREAM_GLOBAL_BLOCKING, PETSC_SUCCESS)
87   #define PetscDeviceContextSetDevice(PetscDeviceContext, PetscDevice)                                              PETSC_SUCCESS
88   #define PetscDeviceContextGetDevice(PetscDeviceContext, device)                                                   (*(device) = PETSC_NULLPTR, PETSC_SUCCESS)
89   #define PetscDeviceContextGetDeviceType(PetscDeviceContext, type)                                                 (*(type) = PETSC_DEVICE_DEFAULT(), PETSC_SUCCESS)
90   #define PetscDeviceContextSetUp(PetscDeviceContext)                                                               PETSC_SUCCESS
91   #define PetscDeviceContextDuplicate(PetscDeviceContextl, PetscDeviceContextr)                                     (*(PetscDeviceContextr) = PETSC_NULLPTR, PETSC_SUCCESS)
92   #define PetscDeviceContextQueryIdle(PetscDeviceContext, idle)                                                     (*(idle) = PETSC_TRUE, PETSC_SUCCESS)
93   #define PetscDeviceContextWaitForContext(PetscDeviceContextl, PetscDeviceContextr)                                PETSC_SUCCESS
94   #define PetscDeviceContextForkWithStreamType(PetscDeviceContextp, PetscStreamType, PetscInt, PetscDeviceContextc) (*(PetscDeviceContextc) = PETSC_NULLPTR, PETSC_SUCCESS)
95   #define PetscDeviceContextFork(PetscDeviceContextp, PetscInt, PetscDeviceContextc)                                (*(PetscDeviceContextc) = PETSC_NULLPTR, PETSC_SUCCESS)
96   #define PetscDeviceContextJoin(PetscDeviceContextp, PetscInt, PetscDeviceContextJoinMode, PetscDeviceContextc)    (*(PetscDeviceContextc) = PETSC_NULLPTR, PETSC_SUCCESS)
97   #define PetscDeviceContextSynchronize(PetscDeviceContext)                                                         PETSC_SUCCESS
98   #define PetscDeviceContextSetFromOptions(MPI_Comm, PetscDeviceContext)                                            PETSC_SUCCESS
99   #define PetscDeviceContextView(PetscDeviceContext, PetscViewer)                                                   PETSC_SUCCESS
100   #define PetscDeviceContextViewFromOptions(PetscDeviceContext, PetscObject, PetscViewer)                           PETSC_SUCCESS
101   #define PetscDeviceContextGetCurrentContext(dctx)                                                                 (*(dctx) = PETSC_NULLPTR, PETSC_SUCCESS)
102   #define PetscDeviceContextSetCurrentContext(PetscDeviceContext)                                                   PETSC_SUCCESS
103   #define PetscDeviceContextGetStreamHandle(PetscDeviceContext, handle)                                             (*(handle) = PETSC_NULLPTR, PETSC_SUCCESS)
104 #endif /* PetscDefined(HAVE_CXX) */
105 
106 /* memory */
107 #if PetscDefined(HAVE_CXX)
108 PETSC_EXTERN PetscErrorCode PetscDeviceAllocate_Private(PetscDeviceContext, PetscBool, PetscMemType, size_t, size_t, void **PETSC_RESTRICT);
109 PETSC_EXTERN PetscErrorCode PetscDeviceDeallocate_Private(PetscDeviceContext, void *PETSC_RESTRICT);
110 PETSC_EXTERN PetscErrorCode PetscDeviceMemcpy(PetscDeviceContext, void *PETSC_RESTRICT, const void *PETSC_RESTRICT, size_t);
111 PETSC_EXTERN PetscErrorCode PetscDeviceMemset(PetscDeviceContext, void *PETSC_RESTRICT, PetscInt, size_t);
112 #else
113   #include <string.h> // memset()
114   #define PetscDeviceAllocate_Private(PetscDeviceContext, clear, PetscMemType, size, alignment, ptr) PetscMallocA(1, (clear), __LINE__, PETSC_FUNCTION_NAME, __FILE__, (size), (ptr))
115   #define PetscDeviceDeallocate_Private(PetscDeviceContext, ptr)                                     PetscFree((ptr))
116   #define PetscDeviceMemcpy(PetscDeviceContext, dest, src, size)                                     PetscMemcpy((dest), (src), (size))
117   #define PetscDeviceMemset(PetscDeviceContext, ptr, v, size)                                        ((void)memset((ptr), (unsigned char)(v), (size)), PETSC_SUCCESS)
118 #endif /* PetscDefined(HAVE_CXX) */
119 
120 /*MC
121   PetscDeviceMalloc - Allocate device-aware memory
122 
123   Synopsis:
124   #include <petscdevice.h>
125   PetscErrorCode PetscDeviceMalloc(PetscDeviceContext dctx, PetscMemType mtype, size_t n, Type **ptr)
126 
127   Not Collective, Asynchronous, Auto-dependency aware
128 
129   Input Parameters:
130 + dctx  - The `PetscDeviceContext` used to allocate the memory
131 . mtype - The type of memory to allocate
132 - n     - The amount (in elements) to allocate
133 
134   Output Parameter:
135 . ptr - The pointer to store the result in
136 
137   Level: beginner
138 
139   Notes:
140   Memory allocated with this function must be freed with `PetscDeviceFree()`.
141 
142   If `n` is zero, then `ptr` is set to `PETSC_NULLPTR`.
143 
144   This routine falls back to using `PetscMalloc1()` if PETSc was not configured with device
145   support. The user should note that `mtype` is ignored in this case, as `PetscMalloc1()`
146   allocates only host memory.
147 
148   This routine uses the `sizeof()` of the memory type requested to determine the total memory
149   to be allocated, therefore you should not multiply the number of elements requested by the
150   `sizeof()` the type\:
151 
152 .vb
153   PetscInt *arr;
154 
155   // correct
156   PetscDeviceMalloc(dctx,PETSC_MEMTYPE_DEVICE,n,&arr);
157 
158   // incorrect
159   PetscDeviceMalloc(dctx,PETSC_MEMTYPE_DEVICE,n*sizeof(*arr),&arr);
160 .ve
161 
162   Note result stored `ptr` is immediately valid and the user may freely inspect or manipulate
163   its value on function return, i.e.\:
164 
165 .vb
166   PetscInt *ptr;
167 
168   PetscDeviceMalloc(dctx, PETSC_MEMTYPE_DEVICE, 20, &ptr);
169 
170   PetscInt *sub_ptr = ptr + 10; // OK, no need to synchronize
171 
172   ptr[0] = 10; // ERROR, directly accessing contents of ptr is undefined until synchronization
173 .ve
174 
175   DAG representation:
176 .vb
177   time ->
178 
179   -> dctx - |= CALL =| -\- dctx -->
180                          \- ptr ->
181 .ve
182 
183 .N ASYNC_API
184 
185 .seealso: `PetscDeviceFree()`, `PetscDeviceCalloc()`, `PetscDeviceArrayCopy()`,
186 `PetscDeviceArrayZero()`
187 M*/
188 #define PetscDeviceMalloc(dctx, mtype, n, ptr) PetscDeviceAllocate_Private((dctx), PETSC_FALSE, (mtype), (size_t)(n) * sizeof(**(ptr)), PETSC_DEVICE_ALIGNOF(**(ptr)), (void **)(ptr))
189 
190 /*MC
191   PetscDeviceCalloc - Allocate zeroed device-aware memory
192 
193   Synopsis:
194   #include <petscdevice.h>
195   PetscErrorCode PetscDeviceCalloc(PetscDeviceContext dctx, PetscMemType mtype, size_t n, Type **ptr)
196 
197   Not Collective, Asynchronous, Auto-dependency aware
198 
199   Input Parameters:
200 + dctx  - The `PetscDeviceContext` used to allocate the memory
201 . mtype - The type of memory to allocate
202 - n     - The amount (in elements) to allocate
203 
204   Output Parameter:
205 . ptr - The pointer to store the result in
206 
207   Level: beginner
208 
209   Notes:
210   Has identical usage to `PetscDeviceMalloc()` except that the memory is zeroed before it is
211   returned. See `PetscDeviceMalloc()` for further discussion.
212 
213   This routine falls back to using `PetscCalloc1()` if PETSc was not configured with device
214   support. The user should note that `mtype` is ignored in this case, as `PetscCalloc1()`
215   allocates only host memory.
216 
217 .N ASYNC_API
218 
219 .seealso: `PetscDeviceFree()`, `PetscDeviceMalloc()`, `PetscDeviceArrayCopy()`,
220 `PetscDeviceArrayZero()`
221 M*/
222 #define PetscDeviceCalloc(dctx, mtype, n, ptr) PetscDeviceAllocate_Private((dctx), PETSC_TRUE, (mtype), (size_t)(n) * sizeof(**(ptr)), PETSC_DEVICE_ALIGNOF(**(ptr)), (void **)(ptr))
223 
224 /*MC
225   PetscDeviceFree - Free device-aware memory obtained with  `PetscDeviceMalloc()` or `PetscDeviceCalloc()`
226 
227   Synopsis:
228   #include <petscdevice.h>
229   PetscErrorCode PetscDeviceFree(PetscDeviceContext dctx, void *ptr)
230 
231   Not Collective, Asynchronous, Auto-dependency aware
232 
233   Input Parameters:
234 + dctx - The `PetscDeviceContext` used to free the memory
235 - ptr  - The pointer to free, may be `NULL`
236 
237   Level: beginner
238 
239   Notes:
240   `ptr` is set to `PETSC_NULLPTR` on successful deallocation.
241 
242   `ptr` must have been allocated using `PetscDeviceMalloc()`, `PetscDeviceCalloc()` not `PetscMalloc()` or related routines
243 
244   This routine falls back to using `PetscFree()` if PETSc was not configured with device
245   support. The user should note that `PetscFree()` frees only host memory.
246 
247   DAG representation:
248 .vb
249   time ->
250 
251   -> dctx -/- |= CALL =| - dctx ->
252   -> ptr -/
253 .ve
254 
255 .N ASYNC_API
256 
257 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()`
258 M*/
259 #define PetscDeviceFree(dctx, ptr) ((PetscErrorCode)(PetscDeviceDeallocate_Private((dctx), (ptr)) || ((ptr) = PETSC_NULLPTR, PETSC_SUCCESS)))
260 
261 /*MC
262   PetscDeviceArrayCopy - Copy memory in a device-aware manner
263 
264   Synopsis:
265   #include <petscdevice.h>
266   PetscErrorCode PetscDeviceArrayCopy(PetscDeviceContext dctx, void *dest, const void *src, size_t n)
267 
268   Not Collective, Asynchronous, Auto-dependency aware
269 
270   Input Parameters:
271 + dctx - The `PetscDeviceContext` used to copy the memory
272 . dest - The pointer to copy to
273 . src  - The pointer to copy from
274 - n    - The amount (in elements) to copy
275 
276   Notes:
277   Both `dest` and `src` must have been allocated using `PetscDeviceMalloc()` or
278   `PetscDeviceCalloc()`.
279 
280   This uses the `sizeof()` of the `src` memory type requested to determine the total memory to
281   be copied, therefore you should not multiply the number of elements by the `sizeof()` the
282   type\:
283 
284 .vb
285   PetscInt *to,*from;
286 
287   // correct
288   PetscDeviceArrayCopy(dctx,to,from,n);
289 
290   // incorrect
291   PetscDeviceArrayCopy(dctx,to,from,n*sizeof(*from));
292 .ve
293 
294   See `PetscDeviceMemcpy()` for further discussion.
295 
296   Level: beginner
297 
298 .N ASYNC_API
299 
300 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, `PetscDeviceFree()`,
301 `PetscDeviceArrayZero()`, `PetscDeviceMemcpy()`
302 M*/
303 #define PetscDeviceArrayCopy(dctx, dest, src, n) PetscDeviceMemcpy((dctx), (dest), (src), (size_t)(n) * sizeof(*(src)))
304 
305 /*MC
306   PetscDeviceArrayZero - Zero memory in a device-aware manner
307 
308   Synopsis:
309   #include <petscdevice.h>
310   PetscErrorCode PetscDeviceArrayZero(PetscDeviceContext dctx, void *ptr, size_t n)
311 
312   Not Collective, Asynchronous, Auto-dependency aware
313 
314   Input Parameters:
315 + dctx  - The `PetscDeviceContext` used to zero the memory
316 . ptr   - The pointer to the memory
317 - n     - The amount (in elements) to zero
318 
319   Level: beginner
320 
321   Notes:
322   `ptr` must have been allocated using `PetscDeviceMalloc()` or `PetscDeviceCalloc()`.
323 
324   This uses the `sizeof()` of the memory type requested to determine the total memory to be
325   zeroed, therefore you should not multiply the number of elements by the `sizeof()` the type\:
326 
327 .vb
328   PetscInt *ptr;
329 
330   // correct
331   PetscDeviceArrayZero(dctx,ptr,n);
332 
333   // incorrect
334   PetscDeviceArrayZero(dctx,ptr,n*sizeof(*ptr));
335 .ve
336 
337   See `PetscDeviceMemset()` for further discussion.
338 
339 .N ASYNC_API
340 
341 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, `PetscDeviceFree()`,
342 `PetscDeviceArrayCopy()`, `PetscDeviceMemset()`
343 M*/
344 #define PetscDeviceArrayZero(dctx, ptr, n) PetscDeviceMemset((dctx), (ptr), 0, (size_t)(n) * sizeof(*(ptr)))
345