1 #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/
2 #include <petsc/private/viewerimpl.h> // _p_PetscViewer for PetscObjectCast()
3
4 #include <petsc/private/cpp/object_pool.hpp>
5 #include <petsc/private/cpp/utility.hpp>
6 #include <petsc/private/cpp/array.hpp>
7
8 #include <vector>
9 #include <string> // std::to_string among other things
10
11 /* Define the allocator */
12 class PetscDeviceContextConstructor : public Petsc::ConstructorInterface<_p_PetscDeviceContext, PetscDeviceContextConstructor> {
13 public:
construct_(PetscDeviceContext dctx) const14 PetscErrorCode construct_(PetscDeviceContext dctx) const noexcept
15 {
16 PetscFunctionBegin;
17 PetscCall(PetscArrayzero(dctx, 1));
18 PetscCall(PetscHeaderCreate_Private((PetscObject)dctx, PETSC_DEVICE_CONTEXT_CLASSID, "PetscDeviceContext", "PetscDeviceContext", "Sys", PETSC_COMM_SELF, (PetscObjectDestroyFn *)PetscDeviceContextDestroy, (PetscObjectViewFn *)PetscDeviceContextView));
19 PetscCall(PetscLogObjectCreate((PetscObject)dctx));
20
21 PetscCallCXX(PetscObjectCast(dctx)->cpp = new CxxData{dctx});
22 PetscCall(underlying().reset(dctx, false));
23 PetscFunctionReturn(PETSC_SUCCESS);
24 }
25
destroy_(PetscDeviceContext dctx)26 static PetscErrorCode destroy_(PetscDeviceContext dctx) noexcept
27 {
28 PetscFunctionBegin;
29 PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren);
30 PetscTryTypeMethod(dctx, destroy);
31 PetscCall(PetscDeviceDestroy(&dctx->device));
32 PetscCall(PetscFree(dctx->childIDs));
33 delete CxxDataCast(dctx);
34 PetscCall(PetscHeaderDestroy_Private(PetscObjectCast(dctx), PETSC_FALSE));
35 PetscFunctionReturn(PETSC_SUCCESS);
36 }
37
reset_(PetscDeviceContext dctx,bool zero=true)38 static PetscErrorCode reset_(PetscDeviceContext dctx, bool zero = true) noexcept
39 {
40 PetscFunctionBegin;
41 if (zero) {
42 // reset the device if the user set it
43 if (Petsc::util::exchange(dctx->usersetdevice, PETSC_FALSE)) {
44 PetscTryTypeMethod(dctx, destroy);
45 PetscCall(PetscDeviceDestroy(&dctx->device));
46 PetscCall(PetscArrayzero(dctx->ops, 1));
47 dctx->data = nullptr;
48 }
49 PetscCall(PetscHeaderReset_Internal(PetscObjectCast(dctx)));
50 dctx->numChildren = 0;
51 dctx->setup = PETSC_FALSE;
52 // don't deallocate the child array, rather just zero it out
53 PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren));
54 PetscCall(CxxDataCast(dctx)->clear());
55 PetscCall(CxxDataCast(dctx)->reset_self(dctx));
56 }
57 dctx->streamType = PETSC_STREAM_DEFAULT;
58 PetscFunctionReturn(PETSC_SUCCESS);
59 }
60
invalidate_(PetscDeviceContext dctx)61 static PetscErrorCode invalidate_(PetscDeviceContext dctx) noexcept
62 {
63 PetscFunctionBegin;
64 PetscCall(CxxDataCast(dctx)->reset_self(dctx));
65 PetscFunctionReturn(PETSC_SUCCESS);
66 }
67 };
68
69 static Petsc::ObjectPool<_p_PetscDeviceContext, PetscDeviceContextConstructor> contextPool;
70
71 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
72 /*@C
73 PetscDeviceContextCreate - Creates a `PetscDeviceContext`
74
75 Not Collective
76
77 Output Parameter:
78 . dctx - The `PetscDeviceContext`
79
80 Level: beginner
81
82 Note:
83 Unlike almost every other PETSc class it is advised that most users use
84 `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts of
85 different types are incompatible with one another; using `PetscDeviceContextDuplicate()`
86 ensures compatible types.
87
88 DAG representation:
89 .vb
90 time ->
91
92 |= CALL =| - dctx ->
93 .ve
94
95 .N ASYNC_API
96
97 .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`,
98 `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`,
99 `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextView()`, `PetscDeviceContextDestroy()`
100 @*/
PetscDeviceContextCreate(PetscDeviceContext * dctx)101 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
102 {
103 PetscFunctionBegin;
104 PetscAssertPointer(dctx, 1);
105 PetscCall(PetscDeviceInitializePackage());
106 PetscCall(PetscLogEventBegin(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
107 PetscCall(contextPool.allocate(dctx));
108 PetscCall(PetscLogEventEnd(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
109 PetscFunctionReturn(PETSC_SUCCESS);
110 }
111
112 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
113 /*@C
114 PetscDeviceContextDestroy - Frees a `PetscDeviceContext`
115
116 Not Collective
117
118 Input Parameter:
119 . dctx - The `PetscDeviceContext`
120
121 Level: beginner
122
123 Notes:
124 No implicit synchronization occurs due to this routine, all resources are released completely
125 asynchronously w.r.t. the host. If one needs to guarantee access to the data produced on
126 `dctx`'s stream the user is responsible for calling `PetscDeviceContextSynchronize()` before
127 calling this routine.
128
129 DAG representation:
130 .vb
131 time ->
132
133 -> dctx - |= CALL =|
134 .ve
135
136 Developer Notes:
137 `dctx` is never actually "destroyed" in the classical sense. It is returned to an ever
138 growing pool of `PetscDeviceContext`s. There are currently no limits on the size of the pool,
139 this should perhaps be implemented.
140
141 .N ASYNC_API
142
143 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
144 `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()`
145 @*/
PetscDeviceContextDestroy(PetscDeviceContext * dctx)146 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
147 {
148 PetscFunctionBegin;
149 PetscAssertPointer(dctx, 1);
150 if (!*dctx) PetscFunctionReturn(PETSC_SUCCESS);
151 PetscCall(PetscLogEventBegin(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
152 if (--(PetscObjectCast(*dctx)->refct) <= 0) {
153 PetscCall(PetscDeviceContextCheckNotOrphaned_Internal(*dctx));
154 PetscCall(contextPool.deallocate(dctx));
155 }
156 PetscCall(PetscLogEventEnd(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
157 *dctx = nullptr;
158 PetscFunctionReturn(PETSC_SUCCESS);
159 }
160
161 /*@C
162 PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a
163 `PetscDeviceContext`
164
165 Not Collective
166
167 Input Parameters:
168 + dctx - The `PetscDeviceContext`
169 - type - The `PetscStreamType`
170
171 Level: beginner
172
173 Note:
174 See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
175 types and their interactions. If the `PetscDeviceContext` was previously set up and stream
176 type was changed, you must call `PetscDeviceContextSetUp()` again after this routine.
177
178 .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`,
179 `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()`
180 @*/
PetscDeviceContextSetStreamType(PetscDeviceContext dctx,PetscStreamType type)181 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
182 {
183 PetscFunctionBegin;
184 // do not use getoptionalnullcontext here since we do not want the user to change the stream
185 // type
186 PetscValidDeviceContext(dctx, 1);
187 PetscValidStreamType(type, 2);
188 // only need to do complex swapping if the object has already been setup
189 if (dctx->setup && (dctx->streamType != type)) {
190 dctx->setup = PETSC_FALSE;
191 PetscCall(PetscLogEventBegin(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
192 PetscUseTypeMethod(dctx, changestreamtype, type);
193 PetscCall(PetscLogEventEnd(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
194 }
195 dctx->streamType = type;
196 PetscFunctionReturn(PETSC_SUCCESS);
197 }
198
199 /*@C
200 PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a
201 `PetscDeviceContext`
202
203 Not Collective
204
205 Input Parameter:
206 . dctx - The `PetscDeviceContext`
207
208 Output Parameter:
209 . type - The `PetscStreamType`
210
211 Level: beginner
212
213 Note:
214 See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
215 types and their interactions
216
217 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`,
218 `PetscDeviceContextSetFromOptions()`
219 @*/
PetscDeviceContextGetStreamType(PetscDeviceContext dctx,PetscStreamType * type)220 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
221 {
222 PetscFunctionBegin;
223 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
224 PetscAssertPointer(type, 2);
225 *type = dctx->streamType;
226 PetscFunctionReturn(PETSC_SUCCESS);
227 }
228
229 /*
230 Actual function to set the device.
231
232 1. Repeatedly destroying and recreating internal data structures (like streams and events)
233 for recycled PetscDeviceContexts is not free. If done often, it does add up.
234 2. The vast majority of PetscDeviceContexts are created by PETSc either as children or
235 default contexts. The default contexts *never* change type, and the children are extremely
236 unlikely to (chances are if you fork once, you will fork again very soon).
237 3. The only time this calculus changes is if the user themselves sets the device type. In
238 this case we do not know what the user has changed, so must always wipe the slate clean.
239
240 Thus we need to keep track whether the user explicitly sets the device contexts device.
241 */
PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx,PetscDevice device,PetscBool user_set)242 static PetscErrorCode PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx, PetscDevice device, PetscBool user_set)
243 {
244 PetscFunctionBegin;
245 // do not use getoptionalnullcontext here since we do not want the user to change its device
246 PetscValidDeviceContext(dctx, 1);
247 PetscValidDevice(device, 2);
248 if (dctx->device && (dctx->device->id == device->id)) PetscFunctionReturn(PETSC_SUCCESS);
249 PetscCall(PetscLogEventBegin(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
250 PetscTryTypeMethod(dctx, destroy);
251 PetscCall(PetscDeviceDestroy(&dctx->device));
252 PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops)));
253 PetscCall(PetscDeviceReference_Internal(device));
254 // set it before calling the method
255 dctx->device = device;
256 PetscCall((*device->ops->createcontext)(dctx));
257 PetscCall(PetscLogEventEnd(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
258 dctx->setup = PETSC_FALSE;
259 dctx->usersetdevice = user_set;
260 PetscFunctionReturn(PETSC_SUCCESS);
261 }
262
PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx,PetscDeviceType type)263 PetscErrorCode PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx, PetscDeviceType type)
264 {
265 PetscDevice device;
266
267 PetscFunctionBegin;
268 PetscCall(PetscDeviceGetDefaultForType_Internal(type, &device));
269 PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_FALSE));
270 PetscFunctionReturn(PETSC_SUCCESS);
271 }
272
273 /*@C
274 PetscDeviceContextSetDevice - Set the underlying `PetscDevice` for a `PetscDeviceContext`
275
276 Not Collective
277
278 Input Parameters:
279 + dctx - The `PetscDeviceContext`
280 - device - The `PetscDevice`
281
282 Level: intermediate
283
284 Notes:
285 This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` must
286 also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is not strictly
287 necessary to set a contexts device to enable usage, any created `PetscDeviceContext`s will
288 always come equipped with the "default" device.
289
290 This routine is a no-op if `device` is already attached to `dctx`.
291
292 This routine may (but is very unlikely to) initialize the backend device and may incur
293 synchronization.
294
295 .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`,
296 `PetscDeviceContextGetDeviceType()`
297 @*/
PetscDeviceContextSetDevice(PetscDeviceContext dctx,PetscDevice device)298 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
299 {
300 PetscFunctionBegin;
301 PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_TRUE));
302 PetscFunctionReturn(PETSC_SUCCESS);
303 }
304
305 /*@C
306 PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext`
307
308 Not Collective
309
310 Input Parameter:
311 . dctx - the `PetscDeviceContext`
312
313 Output Parameter:
314 . device - The `PetscDevice`
315
316 Level: intermediate
317
318 Note:
319 This is a borrowed reference, the user should not destroy `device`.
320
321 .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`, `PetscDeviceContextGetDeviceType()`
322 @*/
PetscDeviceContextGetDevice(PetscDeviceContext dctx,PetscDevice * device)323 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
324 {
325 PetscFunctionBegin;
326 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
327 PetscAssertPointer(device, 2);
328 PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt64_FMT " has no attached PetscDevice to get", PetscObjectCast(dctx)->id);
329 *device = dctx->device;
330 PetscFunctionReturn(PETSC_SUCCESS);
331 }
332
333 /*@C
334 PetscDeviceContextGetDeviceType - Get the `PetscDeviceType` for a `PetscDeviceContext`
335
336 Not Collective
337
338 Input Parameter:
339 . dctx - The `PetscDeviceContext`
340
341 Output Parameter:
342 . type - The `PetscDeviceType`
343
344 Level: beginner
345
346 Note:
347 This routine is a convenience shorthand for `PetscDeviceContextGetDevice()` ->
348 `PetscDeviceGetType()`.
349
350 .seealso: `PetscDeviceType`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetType()`, `PetscDevice`
351 @*/
PetscDeviceContextGetDeviceType(PetscDeviceContext dctx,PetscDeviceType * type)352 PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext dctx, PetscDeviceType *type)
353 {
354 PetscDevice device = nullptr;
355
356 PetscFunctionBegin;
357 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
358 PetscAssertPointer(type, 2);
359 PetscCall(PetscDeviceContextGetDevice(dctx, &device));
360 PetscCall(PetscDeviceGetType(device, type));
361 PetscFunctionReturn(PETSC_SUCCESS);
362 }
363
364 /*@C
365 PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use
366
367 Not Collective
368
369 Input Parameter:
370 . dctx - The `PetscDeviceContext`
371
372 Level: beginner
373
374 Developer Notes:
375 This routine is usually the stage where a `PetscDeviceContext` acquires device-side data
376 structures such as streams, events, and (possibly) handles.
377
378 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
379 `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()`
380 @*/
PetscDeviceContextSetUp(PetscDeviceContext dctx)381 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
382 {
383 PetscFunctionBegin;
384 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
385 if (dctx->setup) PetscFunctionReturn(PETSC_SUCCESS);
386 if (!dctx->device) {
387 const auto default_dtype = PETSC_DEVICE_DEFAULT();
388
389 PetscCall(PetscInfo(dctx, "PetscDeviceContext %" PetscInt64_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", PetscObjectCast(dctx)->id, PetscDeviceTypes[default_dtype]));
390 PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, default_dtype));
391 }
392 PetscCall(PetscLogEventBegin(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
393 PetscUseTypeMethod(dctx, setup);
394 PetscCall(PetscLogEventEnd(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
395 dctx->setup = PETSC_TRUE;
396 PetscFunctionReturn(PETSC_SUCCESS);
397 }
398
PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx,PetscStreamType stype,PetscDeviceContext * dctxdup)399 static PetscErrorCode PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx, PetscStreamType stype, PetscDeviceContext *dctxdup)
400 {
401 PetscFunctionBegin;
402 PetscCall(PetscLogEventBegin(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
403 PetscCall(PetscDeviceContextCreate(dctxdup));
404 PetscCall(PetscDeviceContextSetStreamType(*dctxdup, stype));
405 if (const auto device = dctx->device) PetscCall(PetscDeviceContextSetDevice_Private(*dctxdup, device, dctx->usersetdevice));
406 PetscCall(PetscDeviceContextSetUp(*dctxdup));
407 PetscCall(PetscLogEventEnd(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
408 PetscFunctionReturn(PETSC_SUCCESS);
409 }
410
411 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
412 /*@C
413 PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object
414
415 Not Collective
416
417 Input Parameter:
418 . dctx - The `PetscDeviceContext` to duplicate
419
420 Output Parameter:
421 . dctxdup - The duplicated `PetscDeviceContext`
422
423 Level: beginner
424
425 Notes:
426 This is a shorthand method for creating a `PetscDeviceContext` with the exact same settings as
427 another. Note however that `dctxdup` does not share any of the underlying data with `dctx`,
428 (including its current stream-state) they are completely separate objects.
429
430 There is no implied ordering between `dctx` or `dctxdup`.
431
432 DAG representation:
433 .vb
434 time ->
435
436 -> dctx - |= CALL =| - dctx ---->
437 - dctxdup ->
438 .ve
439
440 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
441 `PetscDeviceContextSetStreamType()`
442 @*/
PetscDeviceContextDuplicate(PetscDeviceContext dctx,PetscDeviceContext * dctxdup)443 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
444 {
445 auto stype = PETSC_STREAM_DEFAULT;
446
447 PetscFunctionBegin;
448 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
449 PetscAssertPointer(dctxdup, 2);
450 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
451 PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, dctxdup));
452 PetscFunctionReturn(PETSC_SUCCESS);
453 }
454
455 /*@C
456 PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle
457
458 Not Collective
459
460 Input Parameter:
461 . dctx - The `PetscDeviceContext`
462
463 Output Parameter:
464 . idle - `PETSC_TRUE` if `dctx` has NO work, `PETSC_FALSE` if it has work
465
466 Level: intermediate
467
468 Note:
469 This routine only refers a singular context and does NOT take any of its children into
470 account. That is, if `dctx` is idle but has dependents who do have work this routine still
471 returns `PETSC_TRUE`.
472
473 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()`
474 @*/
PetscDeviceContextQueryIdle(PetscDeviceContext dctx,PetscBool * idle)475 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
476 {
477 PetscFunctionBegin;
478 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
479 PetscAssertPointer(idle, 2);
480 PetscCall(PetscLogEventBegin(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
481 PetscUseTypeMethod(dctx, query, idle);
482 PetscCall(PetscLogEventEnd(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
483 PetscCall(PetscInfo(dctx, "PetscDeviceContext ('%s', id %" PetscInt64_FMT ") %s idle\n", PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, *idle ? "was" : "was not"));
484 PetscFunctionReturn(PETSC_SUCCESS);
485 }
486
487 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
488 /*@C
489 PetscDeviceContextWaitForContext - Make one context wait for another context to finish
490
491 Not Collective
492
493 Input Parameters:
494 + dctxa - The `PetscDeviceContext` object that is waiting
495 - dctxb - The `PetscDeviceContext` object that is being waited on
496
497 Level: beginner
498
499 Notes:
500 Serializes two `PetscDeviceContext`s. Serialization is performed asynchronously; the host
501 does not wait for the serialization to actually occur.
502
503 This routine uses only the state of `dctxb` at the moment this routine was called, so any
504 future work queued will not affect `dctxa`. It is safe to pass the same context to both
505 arguments (in which case this routine does nothing).
506
507 DAG representation:
508 .vb
509 time ->
510
511 -> dctxa ---/- |= CALL =| - dctxa ->
512 /
513 -> dctxb -/------------------------>
514 .ve
515
516 .N ASYNC_API
517
518 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()`
519 @*/
PetscDeviceContextWaitForContext(PetscDeviceContext dctxa,PetscDeviceContext dctxb)520 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
521 {
522 PetscObjectId bid;
523
524 PetscFunctionBegin;
525 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxa));
526 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxb));
527 PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2);
528 if (dctxa == dctxb) PetscFunctionReturn(PETSC_SUCCESS);
529 bid = PetscObjectCast(dctxb)->id;
530 PetscCall(PetscLogEventBegin(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
531 PetscUseTypeMethod(dctxa, waitforcontext, dctxb);
532 PetscCallCXX(CxxDataCast(dctxa)->upstream()[bid] = CxxDataCast(dctxb)->weak_snapshot());
533 PetscCall(PetscLogEventEnd(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
534 PetscCall(PetscInfo(dctxa, "dctx %" PetscInt64_FMT " waiting on dctx %" PetscInt64_FMT "\n", PetscObjectCast(dctxa)->id, bid));
535 PetscCall(PetscObjectStateIncrease(PetscObjectCast(dctxa)));
536 PetscFunctionReturn(PETSC_SUCCESS);
537 }
538
539 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
540 /*@C
541 PetscDeviceContextForkWithStreamType - Create a set of dependent child contexts from a parent
542 context with a prescribed `PetscStreamType`
543
544 Not Collective, Asynchronous
545
546 Input Parameters:
547 + dctx - The parent `PetscDeviceContext`
548 . stype - The prescribed `PetscStreamType`
549 - n - The number of children to create
550
551 Output Parameter:
552 . dsub - The created child context(s)
553
554 Level: intermediate
555
556 Notes:
557 This routine creates `n` edges of a DAG from a source node which are causally dependent on the
558 source node. This causal dependency is established as-if by calling
559 `PetscDeviceContextWaitForContext()` on every child.
560
561 `dsub` is allocated by this routine and has its lifetime bounded by `dctx`. That is, `dctx`
562 expects to free `dsub` (via `PetscDeviceContextJoin()`) before it itself is destroyed.
563
564 This routine only accounts for work queued on `dctx` up until calling this routine, any
565 subsequent work enqueued on `dctx` has no effect on `dsub`.
566
567 The `PetscStreamType` of `dctx` does not have to equal `stype`. In fact, it is often the case
568 that they are different. This is useful in cases where a routine can locally exploit stream
569 parallelism without needing to worry about what stream type the incoming `PetscDeviceContext`
570 carries.
571
572 DAG representation:
573 .vb
574 time ->
575
576 -> dctx - |= CALL =| -\----> dctx ------>
577 \---> dsub[0] --->
578 \--> ... ------->
579 \-> dsub[n-1] ->
580 .ve
581
582 .N ASYNC_API
583
584 .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`,
585 `PetscDeviceContextQueryIdle()`, `PetscDeviceContextWaitForContext()`
586 @*/
PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx,PetscStreamType stype,PetscInt n,PetscDeviceContext ** dsub)587 PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx, PetscStreamType stype, PetscInt n, PetscDeviceContext **dsub)
588 {
589 // debugging only
590 std::string idList;
591 auto ninput = n;
592
593 PetscFunctionBegin;
594 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
595 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n);
596 PetscAssertPointer(dsub, 4);
597 *dsub = nullptr;
598 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
599 if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
600 PetscCall(PetscLogEventBegin(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
601 /* update child totals */
602 dctx->numChildren += n;
603 /* now to find out if we have room */
604 if (dctx->numChildren > dctx->maxNumChildren) {
605 const auto numChildren = dctx->numChildren;
606 auto &maxNumChildren = dctx->maxNumChildren;
607 auto numAllocated = numChildren;
608
609 /* no room, either from having too many kids or not having any */
610 if (auto &childIDs = dctx->childIDs) {
611 // the difference is backwards because we have not updated maxNumChildren yet
612 numAllocated -= maxNumChildren;
613 /* have existing children, must reallocate them */
614 PetscCall(PetscRealloc(numChildren * sizeof(*childIDs), &childIDs));
615 /* clear the extra memory since realloc doesn't do it for us */
616 PetscCall(PetscArrayzero(std::next(childIDs, maxNumChildren), numAllocated));
617 } else {
618 /* have no children */
619 PetscCall(PetscCalloc1(numChildren, &childIDs));
620 }
621 /* update total number of children */
622 maxNumChildren = numChildren;
623 }
624 PetscCall(PetscMalloc1(n, dsub));
625 for (PetscInt i = 0; ninput && (i < dctx->numChildren); ++i) {
626 auto &childID = dctx->childIDs[i];
627 /* empty child slot */
628 if (!childID) {
629 auto &childctx = (*dsub)[i];
630
631 /* create the child context in the image of its parent */
632 PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, &childctx));
633 PetscCall(PetscDeviceContextWaitForContext(childctx, dctx));
634 /* register the child with its parent */
635 PetscCall(PetscObjectGetId(PetscObjectCast(childctx), &childID));
636 if (PetscDefined(USE_DEBUG_AND_INFO)) {
637 PetscCallCXX(idList += std::to_string(childID));
638 if (ninput != 1) PetscCallCXX(idList += ", ");
639 }
640 --ninput;
641 }
642 }
643 PetscCall(PetscLogEventEnd(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
644 PetscCall(PetscDebugInfo(dctx, "Forked %" PetscInt_FMT " children from parent %" PetscInt64_FMT " with IDs: %s\n", n, PetscObjectCast(dctx)->id, idList.c_str()));
645 PetscFunctionReturn(PETSC_SUCCESS);
646 }
647
648 /*@C
649 PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
650
651 Not Collective, Asynchronous
652
653 Input Parameters:
654 + dctx - The parent `PetscDeviceContext`
655 - n - The number of children to create
656
657 Output Parameter:
658 . dsub - The created child context(s)
659
660 Level: beginner
661
662 Notes:
663 Behaves identically to `PetscDeviceContextForkWithStreamType()` except that the prescribed
664 `PetscStreamType` is taken from `dctx`. In effect this routine is shorthand for\:
665
666 .vb
667 PetscStreamType stype;
668
669 PetscDeviceContextGetStreamType(dctx, &stype);
670 PetscDeviceContextForkWithStreamType(dctx, stype, ...);
671 .ve
672
673 .N ASYNC_API
674
675 .seealso: `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextJoin()`,
676 `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()`
677 @*/
PetscDeviceContextFork(PetscDeviceContext dctx,PetscInt n,PetscDeviceContext ** dsub)678 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
679 {
680 auto stype = PETSC_STREAM_DEFAULT;
681
682 PetscFunctionBegin;
683 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
684 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
685 PetscCall(PetscDeviceContextForkWithStreamType(dctx, stype, n, dsub));
686 PetscFunctionReturn(PETSC_SUCCESS);
687 }
688
689 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
690 /*@C
691 PetscDeviceContextJoin - Converge a set of child contexts
692
693 Not Collective, Asynchronous
694
695 Input Parameters:
696 + dctx - A `PetscDeviceContext` to converge on
697 . n - The number of sub contexts to converge
698 . joinMode - The type of join to perform
699 - dsub - The sub contexts to converge
700
701 Level: beginner
702
703 Notes:
704 If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the source
705 node, then this routine is the exact mirror. That is, it creates a node (represented in `dctx`)
706 which receives `n` edges (and optionally destroys them) which is dependent on the completion
707 of all incoming edges.
708
709 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`. All contexts in `dsub` will be
710 destroyed by this routine. Thus all sub contexts must have been created with the `dctx`
711 passed to this routine.
712
713 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`. All sub contexts will additionally wait on
714 `dctx` after converging. This has the effect of "synchronizing" the outgoing edges. Note the
715 sync suffix does NOT refer to the host, i.e. this routine does NOT call
716 `PetscDeviceSynchronize()`.
717
718 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`. `dctx` waits for all sub contexts but
719 the sub contexts do not wait for one another or `dctx` afterwards.
720
721 DAG representations:
722 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`
723 .vb
724 time ->
725
726 -> dctx ---------/- |= CALL =| - dctx ->
727 -> dsub[0] -----/
728 -> ... -------/
729 -> dsub[n-1] -/
730 .ve
731 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`
732 .vb
733 time ->
734
735 -> dctx ---------/- |= CALL =| -\----> dctx ------>
736 -> dsub[0] -----/ \---> dsub[0] --->
737 -> ... -------/ \--> ... ------->
738 -> dsub[n-1] -/ \-> dsub[n-1] ->
739 .ve
740 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`
741 .vb
742 time ->
743
744 -> dctx ----------/- |= CALL =| - dctx ->
745 -> dsub[0] ------/----------------------->
746 -> ... --------/------------------------>
747 -> dsub[n-1] --/------------------------->
748 .ve
749
750 .N ASYNC_API
751
752 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextForkWithStreamType()`,
753 `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode`
754 @*/
PetscDeviceContextJoin(PetscDeviceContext dctx,PetscInt n,PetscDeviceContextJoinMode joinMode,PetscDeviceContext ** dsub)755 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
756 {
757 // debugging only
758 std::string idList;
759
760 PetscFunctionBegin;
761 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
762 /* validity of dctx is checked in the wait-for loop */
763 PetscAssertPointer(dsub, 4);
764 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n);
765 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
766 if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
767 /* first dctx waits on all the incoming edges */
768 PetscCall(PetscLogEventBegin(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
769 for (PetscInt i = 0; i < n; ++i) {
770 PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4);
771 PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i]));
772 if (PetscDefined(USE_DEBUG_AND_INFO)) {
773 PetscCallCXX(idList += std::to_string(PetscObjectCast((*dsub)[i])->id));
774 if (i + 1 < n) PetscCallCXX(idList += ", ");
775 }
776 }
777
778 /* now we handle the aftermath */
779 switch (joinMode) {
780 case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: {
781 const auto children = dctx->childIDs;
782 const auto maxchild = dctx->maxNumChildren;
783 auto &nchild = dctx->numChildren;
784 PetscInt j = 0;
785
786 PetscCheck(n <= nchild, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, nchild);
787 /* update child count while it's still fresh in memory */
788 nchild -= n;
789 for (PetscInt i = 0; i < maxchild; ++i) {
790 if (children[i] && (children[i] == PetscObjectCast((*dsub)[j])->id)) {
791 /* child is one of ours, can destroy it */
792 PetscCall(PetscDeviceContextDestroy((*dsub) + j));
793 /* reset the child slot */
794 children[i] = 0;
795 if (++j == n) break;
796 }
797 }
798 /* gone through the loop but did not find every child */
799 PetscCheck(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out in", n - j);
800 PetscCall(PetscFree(*dsub));
801 } break;
802 case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
803 for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx));
804 case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
805 break;
806 default:
807 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given");
808 }
809 PetscCall(PetscLogEventEnd(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
810
811 PetscCall(PetscDebugInfo(dctx, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt64_FMT ", mode %s with IDs: %s\n", n, PetscObjectCast(dctx)->id, PetscDeviceContextJoinModes[joinMode], idList.c_str()));
812 PetscFunctionReturn(PETSC_SUCCESS);
813 }
814
815 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
816 /*@C
817 PetscDeviceContextSynchronize - Block the host until all work queued on a
818 `PetscDeviceContext` has finished
819
820 Not Collective
821
822 Input Parameter:
823 . dctx - The `PetscDeviceContext` to synchronize
824
825 Level: beginner
826
827 Notes:
828 The host will not return from this routine until `dctx` is idle. Any and all memory
829 operations queued on or otherwise associated with (either explicitly or implicitly via
830 dependencies) are guaranteed to have finished and be globally visible on return.
831
832 In effect, this routine serves as memory and execution barrier.
833
834 DAG representation:
835 .vb
836 time ->
837
838 -> dctx - |= CALL =| - dctx ->
839 .ve
840
841 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()`
842 @*/
PetscDeviceContextSynchronize(PetscDeviceContext dctx)843 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
844 {
845 PetscFunctionBegin;
846 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
847 PetscCall(PetscLogEventBegin(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
848 /* if it isn't setup there is nothing to sync on */
849 if (dctx->setup) {
850 PetscUseTypeMethod(dctx, synchronize);
851 PetscCall(PetscDeviceContextSyncClearMap_Internal(dctx));
852 }
853 PetscCall(PetscLogEventEnd(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
854 PetscFunctionReturn(PETSC_SUCCESS);
855 }
856
857 /* every device type has a vector of null PetscDeviceContexts -- one for each device */
858 static auto nullContexts = std::array<std::vector<PetscDeviceContext>, PETSC_DEVICE_MAX>{};
859 static auto nullContextsFinalizer = false;
860
PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device,PetscDevice device,PetscDeviceContext * dctx)861 static PetscErrorCode PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device, PetscDevice device, PetscDeviceContext *dctx)
862 {
863 PetscInt devid;
864 PetscDeviceType dtype;
865
866 PetscFunctionBegin;
867 PetscValidDevice(device, 2);
868 PetscAssertPointer(dctx, 3);
869 if (PetscUnlikely(!nullContextsFinalizer)) {
870 nullContextsFinalizer = true;
871 PetscCall(PetscRegisterFinalize([] {
872 PetscFunctionBegin;
873 for (auto &&dvec : nullContexts) {
874 for (auto &&dctx : dvec) PetscCall(PetscDeviceContextDestroy(&dctx));
875 PetscCallCXX(dvec.clear());
876 }
877 nullContextsFinalizer = false;
878 PetscFunctionReturn(PETSC_SUCCESS);
879 }));
880 }
881 PetscCall(PetscDeviceGetDeviceId(device, &devid));
882 PetscCall(PetscDeviceGetType(device, &dtype));
883 {
884 auto &ctxlist = nullContexts[dtype];
885
886 PetscCheck(devid >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Device ID (%" PetscInt_FMT ") must be positive", devid);
887 // need to resize the container if not big enough because incrementing the iterator in
888 // std::next() (if we haven't initialized that ctx yet) may cause it to fall outside the
889 // current size of the container.
890 if (static_cast<std::size_t>(devid) >= ctxlist.size()) PetscCallCXX(ctxlist.resize(devid + 1));
891 if (PetscUnlikely(!ctxlist[devid])) {
892 // we have not seen this device before
893 PetscCall(PetscDeviceContextCreate(dctx));
894 PetscCall(PetscInfo(*dctx, "Initializing null PetscDeviceContext (of type %s) for device %" PetscInt_FMT "\n", PetscDeviceTypes[dtype], devid));
895 {
896 const auto pobj = PetscObjectCast(*dctx);
897 const auto name = "null context " + std::to_string(devid);
898 const auto prefix = "null_context_" + std::to_string(devid) + '_';
899
900 PetscCall(PetscObjectSetName(pobj, name.c_str()));
901 PetscCall(PetscObjectSetOptionsPrefix(pobj, prefix.c_str()));
902 }
903 PetscCall(PetscDeviceContextSetStreamType(*dctx, PETSC_STREAM_DEFAULT));
904 PetscCall(PetscDeviceContextSetDevice_Private(*dctx, device, user_set_device));
905 PetscCall(PetscDeviceContextSetUp(*dctx));
906 // would use ctxlist.cbegin() but GCC 4.8 can't handle const iterator insert!
907 PetscCallCXX(ctxlist.insert(std::next(ctxlist.begin(), devid), *dctx));
908 } else *dctx = ctxlist[devid];
909 }
910 PetscFunctionReturn(PETSC_SUCCESS);
911 }
912
913 /*
914 Gets the "NULL" context for the current PetscDeviceType and PetscDevice. NULL contexts are
915 guaranteed to always be globally blocking.
916 */
PetscDeviceContextGetNullContext_Internal(PetscDeviceContext * dctx)917 PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *dctx)
918 {
919 PetscDeviceContext gctx;
920 PetscDevice gdev = nullptr;
921
922 PetscFunctionBegin;
923 PetscAssertPointer(dctx, 1);
924 PetscCall(PetscDeviceContextGetCurrentContext(&gctx));
925 PetscCall(PetscDeviceContextGetDevice(gctx, &gdev));
926 PetscCall(PetscDeviceContextGetNullContextForDevice_Private(gctx->usersetdevice, gdev, dctx));
927 PetscFunctionReturn(PETSC_SUCCESS);
928 }
929
930 /*@C
931 PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database
932
933 Collective on `comm` or `dctx`
934
935 Input Parameters:
936 + comm - MPI communicator on which to query the options database (optional)
937 - dctx - The `PetscDeviceContext` to configure
938
939 Output Parameter:
940 . dctx - The `PetscDeviceContext`
941
942 Options Database Keys:
943 + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` -
944 `PetscDeviceContextSetStreamType()`
945 - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType`
946
947 Level: beginner
948
949 Note:
950 The user may pass `MPI_COMM_NULL` for `comm` in which case the communicator of `dctx` is
951 used (which is always `PETSC_COMM_SELF`).
952
953 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`,
954 `PetscDeviceContextView()`
955 @*/
PetscDeviceContextSetFromOptions(MPI_Comm comm,PetscDeviceContext dctx)956 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, PetscDeviceContext dctx)
957 {
958 const auto pobj = PetscObjectCast(dctx);
959 auto dtype = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE);
960 auto stype = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE);
961 MPI_Comm old_comm = PETSC_COMM_SELF;
962
963 PetscFunctionBegin;
964 // do not user getoptionalnullcontext here, the user is not allowed to set it from options!
965 PetscValidDeviceContext(dctx, 2);
966 /* set the device type first */
967 if (const auto device = dctx->device) PetscCall(PetscDeviceGetType(device, &dtype.first));
968 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype.first));
969
970 if (comm == MPI_COMM_NULL) {
971 PetscCall(PetscObjectGetComm(pobj, &comm));
972 } else {
973 // briefly set the communicator for dctx (it is always PETSC_COMM_SELF) so
974 // PetscObjectOptionsBegin() behaves as if dctx had comm
975 old_comm = Petsc::util::exchange(pobj->comm, comm);
976 }
977
978 PetscObjectOptionsBegin(pobj);
979 PetscCall(PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype));
980 PetscOptionsEnd();
981 // reset the comm (should be PETSC_COMM_SELF)
982 if (comm != MPI_COMM_NULL) pobj->comm = old_comm;
983 if (dtype.second) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, dtype.first));
984 if (stype.second) PetscCall(PetscDeviceContextSetStreamType(dctx, stype.first));
985 PetscCall(PetscDeviceContextSetUp(dctx));
986 PetscFunctionReturn(PETSC_SUCCESS);
987 }
988
989 /*@
990 PetscDeviceContextView - View a `PetscDeviceContext`
991
992 Collective on `viewer`
993
994 Input Parameters:
995 + dctx - The `PetscDeviceContext`
996 - viewer - The `PetscViewer` to view `dctx` with (may be `NULL`)
997
998 Level: beginner
999
1000 Note:
1001 If `viewer` is `NULL`, `PETSC_VIEWER_STDOUT_WORLD` is used instead, in which case this
1002 routine is collective on `PETSC_COMM_WORLD`.
1003
1004 .seealso: `PetscDeviceContextViewFromOptions()`, `PetscDeviceView()`, `PETSC_VIEWER_STDOUT_WORLD`, `PetscDeviceContextCreate()`
1005 @*/
PetscDeviceContextView(PetscDeviceContext dctx,PetscViewer viewer)1006 PetscErrorCode PetscDeviceContextView(PetscDeviceContext dctx, PetscViewer viewer)
1007 {
1008 PetscBool isascii;
1009
1010 PetscFunctionBegin;
1011 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1012 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer));
1013 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1014 PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &isascii));
1015 if (isascii) {
1016 auto stype = PETSC_STREAM_DEFAULT;
1017 PetscViewer sub;
1018
1019 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub));
1020 PetscCall(PetscObjectPrintClassNamePrefixType(PetscObjectCast(dctx), sub));
1021 PetscCall(PetscViewerASCIIPushTab(sub));
1022 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
1023 PetscCall(PetscViewerASCIIPrintf(sub, "stream type: %s\n", PetscStreamTypes[stype]));
1024 PetscCall(PetscViewerASCIIPrintf(sub, "children: %" PetscInt_FMT "\n", dctx->numChildren));
1025 if (const auto nchild = dctx->numChildren) {
1026 PetscCall(PetscViewerASCIIPushTab(sub));
1027 for (PetscInt i = 0; i < nchild; ++i) {
1028 if (i == nchild - 1) {
1029 PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT, dctx->childIDs[i]));
1030 } else {
1031 PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT ", ", dctx->childIDs[i]));
1032 }
1033 }
1034 }
1035 PetscCall(PetscViewerASCIIPopTab(sub));
1036 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub));
1037 PetscCall(PetscViewerASCIIPushTab(viewer));
1038 }
1039 if (const auto device = dctx->device) PetscCall(PetscDeviceView(device, viewer));
1040 if (isascii) PetscCall(PetscViewerASCIIPopTab(viewer));
1041 PetscFunctionReturn(PETSC_SUCCESS);
1042 }
1043
1044 /*@
1045 PetscDeviceContextViewFromOptions - View a `PetscDeviceContext` from options
1046
1047 Input Parameters:
1048 + dctx - The `PetscDeviceContext` to view
1049 . obj - Optional `PetscObject` to associate (may be `NULL`)
1050 - name - The command line option
1051
1052 Level: beginner
1053
1054 .seealso: `PetscDeviceContextView()`, `PetscObjectViewFromOptions()`, `PetscDeviceContextCreate()`
1055 @*/
PetscDeviceContextViewFromOptions(PetscDeviceContext dctx,PetscObject obj,const char name[])1056 PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext dctx, PetscObject obj, const char name[])
1057 {
1058 PetscFunctionBegin;
1059 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1060 if (obj) PetscValidHeader(obj, 2);
1061 PetscAssertPointer(name, 3);
1062 PetscCall(PetscObjectViewFromOptions(PetscObjectCast(dctx), obj, name));
1063 PetscFunctionReturn(PETSC_SUCCESS);
1064 }
1065
1066 /*@C
1067 PetscDeviceContextGetStreamHandle - Return a handle to the underlying stream of the current device context
1068
1069 Input Parameter:
1070 . dctx - The `PetscDeviceContext` to get the stream from
1071
1072 Output Parameter:
1073 . handle - A pointer to the handle to the stream
1074
1075 Level: developer
1076
1077 Note:
1078 This routine is dangerous. It exists only for the most experienced users and
1079 internal PETSc development.
1080
1081 There is no way for PETSc's auto-dependency system to track what the caller does with the
1082 stream.
1083
1084 If the user uses the stream to copy memory that was previously modified by PETSc, or launches
1085 kernels that modify memory with the stream, it is the users responsibility to inform PETSc of
1086 their actions via `PetscDeviceContextMarkIntentFromID()`. Failure to do so may introduce a
1087 race condition. This race condition may manifest in nondeterministic ways.
1088
1089 Alternatively, the user may synchronize the stream immediately before and after use. This is
1090 the safest option.
1091
1092 Example Usage:
1093 .vb
1094 PetscDeviceContext dctx;
1095 PetscDeviceType type;
1096 void *handle;
1097
1098 PetscDeviceContextGetCurrentContext(&dctx);
1099 PetscDeviceContextGetStreamHandle(dctx, &handle);
1100 PetscDeviceContextGetDeviceType(dctx, &type);
1101
1102 if (type == PETSC_DEVICE_CUDA) {
1103 cudaStream_t stream = *(cudaStream_t *)handle;
1104
1105 my_cuda_kernel<<<1, 2, 3, stream>>>();
1106 }
1107 .ve
1108 Alternatively, if type of `PetscDeviceContext` is known (for example `PETSC_DEVICE_HIP`), the
1109 user may pass in a pointer to stream handle directly\:
1110 .vb
1111 hipStream_t *stream;
1112
1113 // note the cast to void **
1114 PetscDeviceContextGetStreamHandle(dctx, (void **)&stream);
1115 // note the dereference
1116 my_hip_kernel<<<1, 2, 3, *stream>>>();
1117 .ve
1118
1119 .N ASYNC_API
1120
1121 .seealso: `PetscDeviceContext`
1122 @*/
PetscDeviceContextGetStreamHandle(PetscDeviceContext dctx,void ** handle)1123 PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext dctx, void **handle)
1124 {
1125 PetscFunctionBegin;
1126 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1127 PetscAssertPointer(handle, 2);
1128 PetscCall(PetscDeviceContextGetStreamHandle_Internal(dctx, handle));
1129 PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131