xref: /petsc/src/sys/objects/device/interface/dcontext.cxx (revision 0970d93f389b89d61186faed376e9cc5531f567f)
1 #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/
2 #include <petsc/private/viewerimpl.h>         // _p_PetscViewer for PetscObjectCast()
3 
4 #include <petsc/private/cpp/object_pool.hpp>
5 #include <petsc/private/cpp/utility.hpp>
6 #include <petsc/private/cpp/array.hpp>
7 
8 #include <vector>
9 #include <string> // std::to_string among other things
10 
11 /* Define the allocator */
12 class PetscDeviceContextConstructor : public Petsc::ConstructorInterface<_p_PetscDeviceContext, PetscDeviceContextConstructor> {
13 public:
14   PetscErrorCode construct_(PetscDeviceContext dctx) const noexcept
15   {
16     PetscFunctionBegin;
17     PetscCall(PetscArrayzero(dctx, 1));
18     PetscCall(PetscHeaderInitialize_Private(dctx, PETSC_DEVICE_CONTEXT_CLASSID, "PetscDeviceContext", "PetscDeviceContext", "Sys", PETSC_COMM_SELF, PetscDeviceContextDestroy, PetscDeviceContextView));
19     PetscCallCXX(PetscObjectCast(dctx)->cpp = new CxxData());
20     PetscCall(underlying().reset(dctx, false));
21     PetscFunctionReturn(PETSC_SUCCESS);
22   }
23 
24   static PetscErrorCode destroy_(PetscDeviceContext dctx) noexcept
25   {
26     PetscFunctionBegin;
27     PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren);
28     PetscTryTypeMethod(dctx, destroy);
29     PetscCall(PetscDeviceDestroy(&dctx->device));
30     PetscCall(PetscFree(dctx->childIDs));
31     delete CxxDataCast(dctx);
32     PetscCall(PetscHeaderDestroy_Private(PetscObjectCast(dctx), PETSC_FALSE));
33     PetscFunctionReturn(PETSC_SUCCESS);
34   }
35 
36   static PetscErrorCode reset_(PetscDeviceContext dctx, bool zero = true) noexcept
37   {
38     PetscFunctionBegin;
39     if (zero) {
40       // reset the device if the user set it
41       if (Petsc::util::exchange(dctx->usersetdevice, PETSC_FALSE)) {
42         PetscTryTypeMethod(dctx, destroy);
43         PetscCall(PetscDeviceDestroy(&dctx->device));
44         PetscCall(PetscArrayzero(dctx->ops, 1));
45         dctx->data = nullptr;
46       }
47       PetscCall(PetscHeaderReset_Internal(PetscObjectCast(dctx)));
48       dctx->numChildren = 0;
49       dctx->setup       = PETSC_FALSE;
50       // don't deallocate the child array, rather just zero it out
51       PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren));
52       PetscCall(CxxDataCast(dctx)->clear());
53     }
54     dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING;
55     PetscFunctionReturn(PETSC_SUCCESS);
56   }
57 
58   static PetscErrorCode invalidate_(PetscDeviceContext) noexcept { return PETSC_SUCCESS; }
59 };
60 
61 static Petsc::ObjectPool<_p_PetscDeviceContext, PetscDeviceContextConstructor> contextPool;
62 
63 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
64 /*@C
65   PetscDeviceContextCreate - Creates a `PetscDeviceContext`
66 
67   Not Collective
68 
69   Output Parameter:
70 . dctx - The `PetscDeviceContext`
71 
72   Level: beginner
73 
74   Note:
75   Unlike almost every other PETSc class it is advised that most users use
76   `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts of
77   different types are incompatible with one another; using `PetscDeviceContextDuplicate()`
78   ensures compatible types.
79 
80   DAG representation:
81 .vb
82   time ->
83 
84   |= CALL =| - dctx ->
85 .ve
86 
87 .N ASYNC_API
88 
89 .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`,
90 `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`,
91 `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextView()`, `PetscDeviceContextDestroy()`
92 @*/
93 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
94 {
95   PetscFunctionBegin;
96   PetscAssertPointer(dctx, 1);
97   PetscCall(PetscDeviceInitializePackage());
98   PetscCall(PetscLogEventBegin(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
99   PetscCall(contextPool.allocate(dctx));
100   PetscCall(PetscLogEventEnd(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
101   PetscFunctionReturn(PETSC_SUCCESS);
102 }
103 
104 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
105 /*@C
106   PetscDeviceContextDestroy - Frees a `PetscDeviceContext`
107 
108   Not Collective
109 
110   Input Parameter:
111 . dctx - The `PetscDeviceContext`
112 
113   Level: beginner
114 
115   Notes:
116   No implicit synchronization occurs due to this routine, all resources are released completely
117   asynchronously w.r.t. the host. If one needs to guarantee access to the data produced on
118   `dctx`'s stream the user is responsible for calling `PetscDeviceContextSynchronize()` before
119   calling this routine.
120 
121   DAG representation:
122 .vb
123   time ->
124 
125   -> dctx - |= CALL =|
126 .ve
127 
128   Developer Notes:
129   `dctx` is never actually "destroyed" in the classical sense. It is returned to an ever
130   growing pool of `PetscDeviceContext`s. There are currently no limits on the size of the pool,
131   this should perhaps be implemented.
132 
133 .N ASYNC_API
134 
135 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
136 `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()`
137 @*/
138 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
139 {
140   PetscFunctionBegin;
141   PetscAssertPointer(dctx, 1);
142   if (!*dctx) PetscFunctionReturn(PETSC_SUCCESS);
143   PetscCall(PetscLogEventBegin(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
144   if (--(PetscObjectCast(*dctx)->refct) <= 0) {
145     PetscCall(PetscDeviceContextCheckNotOrphaned_Internal(*dctx));
146     PetscCall(contextPool.deallocate(dctx));
147   }
148   PetscCall(PetscLogEventEnd(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
149   *dctx = nullptr;
150   PetscFunctionReturn(PETSC_SUCCESS);
151 }
152 
153 /*@C
154   PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a
155   `PetscDeviceContext`
156 
157   Not Collective
158 
159   Input Parameters:
160 + dctx - The `PetscDeviceContext`
161 - type - The `PetscStreamType`
162 
163   Level: beginner
164 
165   Note:
166   See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
167   types and their interactions. If the `PetscDeviceContext` was previously set up and stream
168   type was changed, you must call `PetscDeviceContextSetUp()` again after this routine.
169 
170 .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`,
171 `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()`
172 @*/
173 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
174 {
175   PetscFunctionBegin;
176   // do not use getoptionalnullcontext here since we do not want the user to change the stream
177   // type
178   PetscValidDeviceContext(dctx, 1);
179   PetscValidStreamType(type, 2);
180   // only need to do complex swapping if the object has already been setup
181   if (dctx->setup && (dctx->streamType != type)) {
182     dctx->setup = PETSC_FALSE;
183     PetscCall(PetscLogEventBegin(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
184     PetscUseTypeMethod(dctx, changestreamtype, type);
185     PetscCall(PetscLogEventEnd(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
186   }
187   dctx->streamType = type;
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 /*@C
192   PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a
193   `PetscDeviceContext`
194 
195   Not Collective
196 
197   Input Parameter:
198 . dctx - The `PetscDeviceContext`
199 
200   Output Parameter:
201 . type - The `PetscStreamType`
202 
203   Level: beginner
204 
205   Note:
206   See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
207   types and their interactions
208 
209 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`,
210 `PetscDeviceContextSetFromOptions()`
211 @*/
212 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
213 {
214   PetscFunctionBegin;
215   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
216   PetscAssertPointer(type, 2);
217   *type = dctx->streamType;
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 /*
222   Actual function to set the device.
223 
224   1. Repeatedly destroying and recreating internal data structures (like streams and events)
225      for recycled PetscDeviceContexts is not free. If done often, it does add up.
226   2. The vast majority of PetscDeviceContexts are created by PETSc either as children or
227      default contexts. The default contexts *never* change type, and the children are extremely
228      unlikely to (chances are if you fork once, you will fork again very soon).
229   3. The only time this calculus changes is if the user themselves sets the device type. In
230      this case we do not know what the user has changed, so must always wipe the slate clean.
231 
232   Thus we need to keep track whether the user explicitly sets the device contexts device.
233 */
234 static PetscErrorCode PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx, PetscDevice device, PetscBool user_set)
235 {
236   PetscFunctionBegin;
237   // do not use getoptionalnullcontext here since we do not want the user to change its device
238   PetscValidDeviceContext(dctx, 1);
239   PetscValidDevice(device, 2);
240   if (dctx->device && (dctx->device->id == device->id)) PetscFunctionReturn(PETSC_SUCCESS);
241   PetscCall(PetscLogEventBegin(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
242   PetscTryTypeMethod(dctx, destroy);
243   PetscCall(PetscDeviceDestroy(&dctx->device));
244   PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops)));
245   PetscCall(PetscDeviceReference_Internal(device));
246   // set it before calling the method
247   dctx->device = device;
248   PetscCall((*device->ops->createcontext)(dctx));
249   PetscCall(PetscLogEventEnd(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
250   dctx->setup         = PETSC_FALSE;
251   dctx->usersetdevice = user_set;
252   PetscFunctionReturn(PETSC_SUCCESS);
253 }
254 
255 PetscErrorCode PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx, PetscDeviceType type)
256 {
257   PetscDevice device;
258 
259   PetscFunctionBegin;
260   PetscCall(PetscDeviceGetDefaultForType_Internal(type, &device));
261   PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_FALSE));
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 /*@C
266   PetscDeviceContextSetDevice - Set the underlying `PetscDevice` for a `PetscDeviceContext`
267 
268   Not Collective
269 
270   Input Parameters:
271 + dctx   - The `PetscDeviceContext`
272 - device - The `PetscDevice`
273 
274   Level: intermediate
275 
276   Notes:
277   This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` must
278   also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is not strictly
279   necessary to set a contexts device to enable usage, any created `PetscDeviceContext`s will
280   always come equipped with the "default" device.
281 
282   This routine is a no-op if `device` is already attached to `dctx`.
283 
284   This routine may (but is very unlikely to) initialize the backend device and may incur
285   synchronization.
286 
287 .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`,
288 `PetscDeviceContextGetDeviceType()`
289 @*/
290 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
291 {
292   PetscFunctionBegin;
293   PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_TRUE));
294   PetscFunctionReturn(PETSC_SUCCESS);
295 }
296 
297 /*@C
298   PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext`
299 
300   Not Collective
301 
302   Input Parameter:
303 . dctx - the `PetscDeviceContext`
304 
305   Output Parameter:
306 . device - The `PetscDevice`
307 
308   Level: intermediate
309 
310   Note:
311   This is a borrowed reference, the user should not destroy `device`.
312 
313 .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`, `PetscDeviceContextGetDeviceType()`
314 @*/
315 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
316 {
317   PetscFunctionBegin;
318   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
319   PetscAssertPointer(device, 2);
320   PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt64_FMT " has no attached PetscDevice to get", PetscObjectCast(dctx)->id);
321   *device = dctx->device;
322   PetscFunctionReturn(PETSC_SUCCESS);
323 }
324 
325 /*@C
326   PetscDeviceContextGetDeviceType - Get the `PetscDeviceType` for a `PetscDeviceContext`
327 
328   Not Collective
329 
330   Input Parameter:
331 . dctx - The `PetscDeviceContext`
332 
333   Output Parameter:
334 . type - The `PetscDeviceType`
335 
336   Level: beginner
337 
338   Note:
339   This routine is a convenience shorthand for `PetscDeviceContextGetDevice()` ->
340   `PetscDeviceGetType()`.
341 
342 .seealso: `PetscDeviceType`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetType()`, `PetscDevice`
343 @*/
344 PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext dctx, PetscDeviceType *type)
345 {
346   PetscDevice device = nullptr;
347 
348   PetscFunctionBegin;
349   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
350   PetscAssertPointer(type, 2);
351   PetscCall(PetscDeviceContextGetDevice(dctx, &device));
352   PetscCall(PetscDeviceGetType(device, type));
353   PetscFunctionReturn(PETSC_SUCCESS);
354 }
355 
356 /*@C
357   PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use
358 
359   Not Collective
360 
361   Input Parameter:
362 . dctx - The `PetscDeviceContext`
363 
364   Level: beginner
365 
366   Developer Notes:
367   This routine is usually the stage where a `PetscDeviceContext` acquires device-side data
368   structures such as streams, events, and (possibly) handles.
369 
370 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
371 `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()`
372 @*/
373 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
374 {
375   PetscFunctionBegin;
376   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
377   if (dctx->setup) PetscFunctionReturn(PETSC_SUCCESS);
378   if (!dctx->device) {
379     const auto default_dtype = PETSC_DEVICE_DEFAULT();
380 
381     PetscCall(PetscInfo(dctx, "PetscDeviceContext %" PetscInt64_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", PetscObjectCast(dctx)->id, PetscDeviceTypes[default_dtype]));
382     PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, default_dtype));
383   }
384   PetscCall(PetscLogEventBegin(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
385   PetscUseTypeMethod(dctx, setup);
386   PetscCall(PetscLogEventEnd(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
387   dctx->setup = PETSC_TRUE;
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 static PetscErrorCode PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx, PetscStreamType stype, PetscDeviceContext *dctxdup)
392 {
393   PetscFunctionBegin;
394   PetscCall(PetscLogEventBegin(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
395   PetscCall(PetscDeviceContextCreate(dctxdup));
396   PetscCall(PetscDeviceContextSetStreamType(*dctxdup, stype));
397   if (const auto device = dctx->device) PetscCall(PetscDeviceContextSetDevice_Private(*dctxdup, device, dctx->usersetdevice));
398   PetscCall(PetscDeviceContextSetUp(*dctxdup));
399   PetscCall(PetscLogEventEnd(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
404 /*@C
405   PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object
406 
407   Not Collective
408 
409   Input Parameter:
410 . dctx - The `PetscDeviceContext` to duplicate
411 
412   Output Parameter:
413 . dctxdup - The duplicated `PetscDeviceContext`
414 
415   Level: beginner
416 
417   Notes:
418   This is a shorthand method for creating a `PetscDeviceContext` with the exact same settings as
419   another. Note however that `dctxdup` does not share any of the underlying data with `dctx`,
420   (including its current stream-state) they are completely separate objects.
421 
422   There is no implied ordering between `dctx` or `dctxdup`.
423 
424   DAG representation:
425 .vb
426   time ->
427 
428   -> dctx - |= CALL =| - dctx ---->
429                        - dctxdup ->
430 .ve
431 
432 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
433 `PetscDeviceContextSetStreamType()`
434 @*/
435 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
436 {
437   auto stype = PETSC_STREAM_DEFAULT_BLOCKING;
438 
439   PetscFunctionBegin;
440   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
441   PetscAssertPointer(dctxdup, 2);
442   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
443   PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, dctxdup));
444   PetscFunctionReturn(PETSC_SUCCESS);
445 }
446 
447 /*@C
448   PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle
449 
450   Not Collective
451 
452   Input Parameter:
453 . dctx - The `PetscDeviceContext`
454 
455   Output Parameter:
456 . idle - `PETSC_TRUE` if `dctx` has NO work, `PETSC_FALSE` if it has work
457 
458   Level: intermediate
459 
460   Note:
461   This routine only refers a singular context and does NOT take any of its children into
462   account. That is, if `dctx` is idle but has dependents who do have work this routine still
463   returns `PETSC_TRUE`.
464 
465 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()`
466 @*/
467 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
468 {
469   PetscFunctionBegin;
470   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
471   PetscAssertPointer(idle, 2);
472   PetscCall(PetscLogEventBegin(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
473   PetscUseTypeMethod(dctx, query, idle);
474   PetscCall(PetscLogEventEnd(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
475   PetscCall(PetscInfo(dctx, "PetscDeviceContext ('%s', id %" PetscInt64_FMT ") %s idle\n", PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, *idle ? "was" : "was not"));
476   PetscFunctionReturn(PETSC_SUCCESS);
477 }
478 
479 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
480 /*@C
481   PetscDeviceContextWaitForContext - Make one context wait for another context to finish
482 
483   Not Collective
484 
485   Input Parameters:
486 + dctxa - The `PetscDeviceContext` object that is waiting
487 - dctxb - The `PetscDeviceContext` object that is being waited on
488 
489   Level: beginner
490 
491   Notes:
492   Serializes two `PetscDeviceContext`s. Serialization is performed asynchronously; the host
493   does not wait for the serialization to actually occur.
494 
495   This routine uses only the state of `dctxb` at the moment this routine was called, so any
496   future work queued will not affect `dctxa`. It is safe to pass the same context to both
497   arguments (in which case this routine does nothing).
498 
499   DAG representation:
500 .vb
501   time ->
502 
503   -> dctxa ---/- |= CALL =| - dctxa ->
504              /
505   -> dctxb -/------------------------>
506 .ve
507 
508 .N ASYNC_API
509 
510 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()`
511 @*/
512 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
513 {
514   PetscObject aobj;
515 
516   PetscFunctionBegin;
517   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxa));
518   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxb));
519   PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2);
520   if (dctxa == dctxb) PetscFunctionReturn(PETSC_SUCCESS);
521   aobj = PetscObjectCast(dctxa);
522   PetscCall(PetscLogEventBegin(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
523   PetscUseTypeMethod(dctxa, waitforcontext, dctxb);
524   PetscCallCXX(CxxDataCast(dctxa)->upstream[dctxb] = CxxDataParent(dctxb));
525   PetscCall(PetscLogEventEnd(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
526   PetscCall(PetscInfo(dctxa, "dctx %" PetscInt64_FMT " waiting on dctx %" PetscInt64_FMT "\n", aobj->id, PetscObjectCast(dctxb)->id));
527   PetscCall(PetscObjectStateIncrease(aobj));
528   PetscFunctionReturn(PETSC_SUCCESS);
529 }
530 
531 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
532 /*@C
533   PetscDeviceContextForkWithStreamType - Create a set of dependent child contexts from a parent
534   context with a prescribed `PetscStreamType`
535 
536   Not Collective, Asynchronous
537 
538   Input Parameters:
539 + dctx  - The parent `PetscDeviceContext`
540 . stype - The prescribed `PetscStreamType`
541 - n     - The number of children to create
542 
543   Output Parameter:
544 . dsub - The created child context(s)
545 
546   Level: intermediate
547 
548   Notes:
549   This routine creates `n` edges of a DAG from a source node which are causally dependent on the
550   source node. This causal dependency is established as-if by calling
551   `PetscDeviceContextWaitForContext()` on every child.
552 
553   `dsub` is allocated by this routine and has its lifetime bounded by `dctx`. That is, `dctx`
554   expects to free `dsub` (via `PetscDeviceContextJoin()`) before it itself is destroyed.
555 
556   This routine only accounts for work queued on `dctx` up until calling this routine, any
557   subsequent work enqueued on `dctx` has no effect on `dsub`.
558 
559   The `PetscStreamType` of `dctx` does not have to equal `stype`. In fact, it is often the case
560   that they are different. This is useful in cases where a routine can locally exploit stream
561   parallelism without needing to worry about what stream type the incoming `PetscDeviceContext`
562   carries.
563 
564   DAG representation:
565 .vb
566   time ->
567 
568   -> dctx - |= CALL =| -\----> dctx ------>
569                          \---> dsub[0] --->
570                           \--> ... ------->
571                            \-> dsub[n-1] ->
572 .ve
573 
574 .N ASYNC_API
575 
576 .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`,
577 `PetscDeviceContextQueryIdle()`, `PetscDeviceContextWaitForContext()`
578 @*/
579 PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx, PetscStreamType stype, PetscInt n, PetscDeviceContext **dsub)
580 {
581   // debugging only
582   std::string idList;
583   auto        ninput = n;
584 
585   PetscFunctionBegin;
586   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
587   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n);
588   PetscAssertPointer(dsub, 4);
589   *dsub = nullptr;
590   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
591   if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
592   PetscCall(PetscLogEventBegin(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
593   /* update child totals */
594   dctx->numChildren += n;
595   /* now to find out if we have room */
596   if (dctx->numChildren > dctx->maxNumChildren) {
597     const auto numChildren    = dctx->numChildren;
598     auto      &maxNumChildren = dctx->maxNumChildren;
599     auto       numAllocated   = numChildren;
600 
601     /* no room, either from having too many kids or not having any */
602     if (auto &childIDs = dctx->childIDs) {
603       // the difference is backwards because we have not updated maxNumChildren yet
604       numAllocated -= maxNumChildren;
605       /* have existing children, must reallocate them */
606       PetscCall(PetscRealloc(numChildren * sizeof(*childIDs), &childIDs));
607       /* clear the extra memory since realloc doesn't do it for us */
608       PetscCall(PetscArrayzero(std::next(childIDs, maxNumChildren), numAllocated));
609     } else {
610       /* have no children */
611       PetscCall(PetscCalloc1(numChildren, &childIDs));
612     }
613     /* update total number of children */
614     maxNumChildren = numChildren;
615   }
616   PetscCall(PetscMalloc1(n, dsub));
617   for (PetscInt i = 0; ninput && (i < dctx->numChildren); ++i) {
618     auto &childID = dctx->childIDs[i];
619     /* empty child slot */
620     if (!childID) {
621       auto &childctx = (*dsub)[i];
622 
623       /* create the child context in the image of its parent */
624       PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, &childctx));
625       PetscCall(PetscDeviceContextWaitForContext(childctx, dctx));
626       /* register the child with its parent */
627       PetscCall(PetscObjectGetId(PetscObjectCast(childctx), &childID));
628       if (PetscDefined(USE_DEBUG_AND_INFO)) {
629         PetscCallCXX(idList += std::to_string(childID));
630         if (ninput != 1) PetscCallCXX(idList += ", ");
631       }
632       --ninput;
633     }
634   }
635   PetscCall(PetscLogEventEnd(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
636   PetscCall(PetscDebugInfo(dctx, "Forked %" PetscInt_FMT " children from parent %" PetscInt64_FMT " with IDs: %s\n", n, PetscObjectCast(dctx)->id, idList.c_str()));
637   PetscFunctionReturn(PETSC_SUCCESS);
638 }
639 
640 /*@C
641   PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
642 
643   Not Collective, Asynchronous
644 
645   Input Parameters:
646 + dctx - The parent `PetscDeviceContext`
647 - n    - The number of children to create
648 
649   Output Parameter:
650 . dsub - The created child context(s)
651 
652   Level: beginner
653 
654   Notes:
655   Behaves identically to `PetscDeviceContextForkWithStreamType()` except that the prescribed
656   `PetscStreamType` is taken from `dctx`. In effect this routine is shorthand for\:
657 
658 .vb
659   PetscStreamType stype;
660 
661   PetscDeviceContextGetStreamType(dctx, &stype);
662   PetscDeviceContextForkWithStreamType(dctx, stype, ...);
663 .ve
664 
665 .N ASYNC_API
666 
667 .seealso: `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextJoin()`,
668 `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()`
669 @*/
670 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
671 {
672   auto stype = PETSC_STREAM_DEFAULT_BLOCKING;
673 
674   PetscFunctionBegin;
675   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
676   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
677   PetscCall(PetscDeviceContextForkWithStreamType(dctx, stype, n, dsub));
678   PetscFunctionReturn(PETSC_SUCCESS);
679 }
680 
681 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
682 /*@C
683   PetscDeviceContextJoin - Converge a set of child contexts
684 
685   Not Collective, Asynchronous
686 
687   Input Parameters:
688 + dctx     - A `PetscDeviceContext` to converge on
689 . n        - The number of sub contexts to converge
690 . joinMode - The type of join to perform
691 - dsub     - The sub contexts to converge
692 
693   Level: beginner
694 
695   Notes:
696   If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the source
697   node, then this routine is the exact mirror. That is, it creates a node (represented in `dctx`)
698   which receives `n` edges (and optionally destroys them) which is dependent on the completion
699   of all incoming edges.
700 
701   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`. All contexts in `dsub` will be
702   destroyed by this routine. Thus all sub contexts must have been created with the `dctx`
703   passed to this routine.
704 
705   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`. All sub contexts will additionally wait on
706   `dctx` after converging. This has the effect of "synchronizing" the outgoing edges. Note the
707   sync suffix does NOT refer to the host, i.e. this routine does NOT call
708   `PetscDeviceSynchronize()`.
709 
710   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`. `dctx` waits for all sub contexts but
711   the sub contexts do not wait for one another or `dctx` afterwards.
712 
713   DAG representations:
714   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`
715 .vb
716   time ->
717 
718   -> dctx ---------/- |= CALL =| - dctx ->
719   -> dsub[0] -----/
720   ->  ... -------/
721   -> dsub[n-1] -/
722 .ve
723   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`
724 .vb
725   time ->
726 
727   -> dctx ---------/- |= CALL =| -\----> dctx ------>
728   -> dsub[0] -----/                \---> dsub[0] --->
729   ->  ... -------/                  \--> ... ------->
730   -> dsub[n-1] -/                    \-> dsub[n-1] ->
731 .ve
732   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`
733 .vb
734   time ->
735 
736   -> dctx ----------/- |= CALL =| - dctx ->
737   -> dsub[0] ------/----------------------->
738   ->  ... --------/------------------------>
739   -> dsub[n-1] --/------------------------->
740 .ve
741 
742 .N ASYNC_API
743 
744 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextForkWithStreamType()`,
745 `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode`
746 @*/
747 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
748 {
749   // debugging only
750   std::string idList;
751 
752   PetscFunctionBegin;
753   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
754   /* validity of dctx is checked in the wait-for loop */
755   PetscAssertPointer(dsub, 4);
756   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n);
757   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
758   if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
759   /* first dctx waits on all the incoming edges */
760   PetscCall(PetscLogEventBegin(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
761   for (PetscInt i = 0; i < n; ++i) {
762     PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4);
763     PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i]));
764     if (PetscDefined(USE_DEBUG_AND_INFO)) {
765       PetscCallCXX(idList += std::to_string(PetscObjectCast((*dsub)[i])->id));
766       if (i + 1 < n) PetscCallCXX(idList += ", ");
767     }
768   }
769 
770   /* now we handle the aftermath */
771   switch (joinMode) {
772   case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: {
773     const auto children = dctx->childIDs;
774     const auto maxchild = dctx->maxNumChildren;
775     auto      &nchild   = dctx->numChildren;
776     PetscInt   j        = 0;
777 
778     PetscCheck(n <= nchild, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, nchild);
779     /* update child count while it's still fresh in memory */
780     nchild -= n;
781     for (PetscInt i = 0; i < maxchild; ++i) {
782       if (children[i] && (children[i] == PetscObjectCast((*dsub)[j])->id)) {
783         /* child is one of ours, can destroy it */
784         PetscCall(PetscDeviceContextDestroy((*dsub) + j));
785         /* reset the child slot */
786         children[i] = 0;
787         if (++j == n) break;
788       }
789     }
790     /* gone through the loop but did not find every child */
791     PetscCheck(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in", n - j);
792     PetscCall(PetscFree(*dsub));
793   } break;
794   case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
795     for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx));
796   case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
797     break;
798   default:
799     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given");
800   }
801   PetscCall(PetscLogEventEnd(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
802 
803   PetscCall(PetscDebugInfo(dctx, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt64_FMT ", mode %s with IDs: %s\n", n, PetscObjectCast(dctx)->id, PetscDeviceContextJoinModes[joinMode], idList.c_str()));
804   PetscFunctionReturn(PETSC_SUCCESS);
805 }
806 
807 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
808 /*@C
809   PetscDeviceContextSynchronize - Block the host until all work queued on a
810   `PetscDeviceContext` has finished
811 
812   Not Collective
813 
814   Input Parameter:
815 . dctx - The `PetscDeviceContext` to synchronize
816 
817   Level: beginner
818 
819   Notes:
820   The host will not return from this routine until `dctx` is idle. Any and all memory
821   operations queued on or otherwise associated with (either explicitly or implicitly via
822   dependencies) are guaranteed to have finished and be globally visible on return.
823 
824   In effect, this routine serves as memory and execution barrier.
825 
826   DAG representation:
827 .vb
828   time ->
829 
830   -> dctx - |= CALL =| - dctx ->
831 .ve
832 
833 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()`
834 @*/
835 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
836 {
837   PetscFunctionBegin;
838   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
839   PetscCall(PetscLogEventBegin(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
840   /* if it isn't setup there is nothing to sync on */
841   if (dctx->setup) {
842     PetscUseTypeMethod(dctx, synchronize);
843     PetscCall(PetscDeviceContextSyncClearMap_Internal(dctx));
844   }
845   PetscCall(PetscLogEventEnd(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
846   PetscFunctionReturn(PETSC_SUCCESS);
847 }
848 
849 /* every device type has a vector of null PetscDeviceContexts -- one for each device */
850 static auto nullContexts          = std::array<std::vector<PetscDeviceContext>, PETSC_DEVICE_MAX>{};
851 static auto nullContextsFinalizer = false;
852 
853 static PetscErrorCode PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device, PetscDevice device, PetscDeviceContext *dctx)
854 {
855   PetscInt        devid;
856   PetscDeviceType dtype;
857 
858   PetscFunctionBegin;
859   PetscValidDevice(device, 2);
860   PetscAssertPointer(dctx, 3);
861   if (PetscUnlikely(!nullContextsFinalizer)) {
862     const auto finalizer = [] {
863       PetscFunctionBegin;
864       for (auto &&dvec : nullContexts) {
865         for (auto &&dctx : dvec) PetscCall(PetscDeviceContextDestroy(&dctx));
866         PetscCallCXX(dvec.clear());
867       }
868       nullContextsFinalizer = false;
869       PetscFunctionReturn(PETSC_SUCCESS);
870     };
871 
872     nullContextsFinalizer = true;
873     PetscCall(PetscRegisterFinalize(std::move(finalizer)));
874   }
875   PetscCall(PetscDeviceGetDeviceId(device, &devid));
876   PetscCall(PetscDeviceGetType(device, &dtype));
877   {
878     auto &ctxlist = nullContexts[dtype];
879 
880     PetscCheck(devid >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Device ID (%" PetscInt_FMT ") must be positive", devid);
881     // need to resize the container if not big enough because incrementing the iterator in
882     // std::next() (if we haven't initialized that ctx yet) may cause it to fall outside the
883     // current size of the container.
884     if (static_cast<std::size_t>(devid) >= ctxlist.size()) PetscCallCXX(ctxlist.resize(devid + 1));
885     if (PetscUnlikely(!ctxlist[devid])) {
886       // we have not seen this device before
887       PetscCall(PetscDeviceContextCreate(dctx));
888       PetscCall(PetscInfo(*dctx, "Initializing null PetscDeviceContext (of type %s) for device %" PetscInt_FMT "\n", PetscDeviceTypes[dtype], devid));
889       {
890         const auto pobj   = PetscObjectCast(*dctx);
891         const auto name   = "null context " + std::to_string(devid);
892         const auto prefix = "null_context_" + std::to_string(devid) + '_';
893 
894         PetscCall(PetscObjectSetName(pobj, name.c_str()));
895         PetscCall(PetscObjectSetOptionsPrefix(pobj, prefix.c_str()));
896       }
897       PetscCall(PetscDeviceContextSetStreamType(*dctx, PETSC_STREAM_GLOBAL_BLOCKING));
898       PetscCall(PetscDeviceContextSetDevice_Private(*dctx, device, user_set_device));
899       PetscCall(PetscDeviceContextSetUp(*dctx));
900       // would use ctxlist.cbegin() but GCC 4.8 can't handle const iterator insert!
901       PetscCallCXX(ctxlist.insert(std::next(ctxlist.begin(), devid), *dctx));
902     } else *dctx = ctxlist[devid];
903   }
904   PetscFunctionReturn(PETSC_SUCCESS);
905 }
906 
907 /*
908   Gets the "NULL" context for the current PetscDeviceType and PetscDevice. NULL contexts are
909   guaranteed to always be globally blocking.
910 */
911 PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *dctx)
912 {
913   PetscDeviceContext gctx;
914   PetscDevice        gdev = nullptr;
915 
916   PetscFunctionBegin;
917   PetscAssertPointer(dctx, 1);
918   PetscCall(PetscDeviceContextGetCurrentContext(&gctx));
919   PetscCall(PetscDeviceContextGetDevice(gctx, &gdev));
920   PetscCall(PetscDeviceContextGetNullContextForDevice_Private(gctx->usersetdevice, gdev, dctx));
921   PetscFunctionReturn(PETSC_SUCCESS);
922 }
923 
924 /*@C
925   PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database
926 
927   Collective on `comm` or `dctx`
928 
929   Input Parameters:
930 + comm - MPI communicator on which to query the options database (optional)
931 - dctx - The `PetscDeviceContext` to configure
932 
933   Output Parameter:
934 . dctx - The `PetscDeviceContext`
935 
936   Options Database Keys:
937 + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` -
938    `PetscDeviceContextSetStreamType()`
939 - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType`
940 
941   Level: beginner
942 
943   Note:
944   The user may pass `MPI_COMM_NULL` for `comm` in which case the communicator of `dctx` is
945   used (which is always `PETSC_COMM_SELF`).
946 
947 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`,
948 `PetscDeviceContextView()`
949 @*/
950 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, PetscDeviceContext dctx)
951 {
952   const auto pobj     = PetscObjectCast(dctx);
953   auto       dtype    = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE);
954   auto       stype    = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE);
955   MPI_Comm   old_comm = PETSC_COMM_SELF;
956 
957   PetscFunctionBegin;
958   // do not user getoptionalnullcontext here, the user is not allowed to set it from options!
959   PetscValidDeviceContext(dctx, 2);
960   /* set the device type first */
961   if (const auto device = dctx->device) PetscCall(PetscDeviceGetType(device, &dtype.first));
962   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype.first));
963 
964   if (comm == MPI_COMM_NULL) {
965     PetscCall(PetscObjectGetComm(pobj, &comm));
966   } else {
967     // briefly set the communicator for dctx (it is always PETSC_COMM_SELF) so
968     // PetscObjectOptionsBegin() behaves as if dctx had comm
969     old_comm = Petsc::util::exchange(pobj->comm, comm);
970   }
971 
972   PetscObjectOptionsBegin(pobj);
973   PetscCall(PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype));
974   PetscOptionsEnd();
975   // reset the comm (should be PETSC_COMM_SELF)
976   if (comm != MPI_COMM_NULL) pobj->comm = old_comm;
977   if (dtype.second) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, dtype.first));
978   if (stype.second) PetscCall(PetscDeviceContextSetStreamType(dctx, stype.first));
979   PetscCall(PetscDeviceContextSetUp(dctx));
980   PetscFunctionReturn(PETSC_SUCCESS);
981 }
982 
983 /*@C
984   PetscDeviceContextView - View a `PetscDeviceContext`
985 
986   Collective on `viewer`
987 
988   Input Parameters:
989 + dctx   - The `PetscDeviceContext`
990 - viewer - The `PetscViewer` to view `dctx` with (may be `NULL`)
991 
992   Level: beginner
993 
994   Note:
995   If `viewer` is `NULL`, `PETSC_VIEWER_STDOUT_WORLD` is used instead, in which case this
996   routine is collective on `PETSC_COMM_WORLD`.
997 
998 .seealso: `PetscDeviceContextViewFromOptions()`, `PetscDeviceView()`, `PETSC_VIEWER_STDOUT_WORLD`, `PetscDeviceContextCreate()`
999 @*/
1000 PetscErrorCode PetscDeviceContextView(PetscDeviceContext dctx, PetscViewer viewer)
1001 {
1002   PetscBool iascii;
1003 
1004   PetscFunctionBegin;
1005   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1006   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer));
1007   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1008   PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &iascii));
1009   if (iascii) {
1010     auto        stype = PETSC_STREAM_DEFAULT_BLOCKING;
1011     PetscViewer sub;
1012 
1013     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub));
1014     PetscCall(PetscObjectPrintClassNamePrefixType(PetscObjectCast(dctx), sub));
1015     PetscCall(PetscViewerASCIIPushTab(sub));
1016     PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
1017     PetscCall(PetscViewerASCIIPrintf(sub, "stream type: %s\n", PetscStreamTypes[stype]));
1018     PetscCall(PetscViewerASCIIPrintf(sub, "children: %" PetscInt_FMT "\n", dctx->numChildren));
1019     if (const auto nchild = dctx->numChildren) {
1020       PetscCall(PetscViewerASCIIPushTab(sub));
1021       for (PetscInt i = 0; i < nchild; ++i) {
1022         if (i == nchild - 1) {
1023           PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT, dctx->childIDs[i]));
1024         } else {
1025           PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT ", ", dctx->childIDs[i]));
1026         }
1027       }
1028     }
1029     PetscCall(PetscViewerASCIIPopTab(sub));
1030     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub));
1031     PetscCall(PetscViewerFlush(viewer));
1032     PetscCall(PetscViewerASCIIPushTab(viewer));
1033   }
1034   if (const auto device = dctx->device) PetscCall(PetscDeviceView(device, viewer));
1035   if (iascii) PetscCall(PetscViewerASCIIPopTab(viewer));
1036   PetscFunctionReturn(PETSC_SUCCESS);
1037 }
1038 
1039 /*@C
1040   PetscDeviceContextViewFromOptions - View a `PetscDeviceContext` from options
1041 
1042   Input Parameters:
1043 + dctx - The `PetscDeviceContext` to view
1044 . obj  - Optional `PetscObject` to associate (may be `NULL`)
1045 - name - The command line option
1046 
1047   Level: beginner
1048 
1049 .seealso: `PetscDeviceContextView()`, `PetscObjectViewFromOptions()`, `PetscDeviceContextCreate()`
1050 @*/
1051 PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext dctx, PetscObject obj, const char name[])
1052 {
1053   PetscFunctionBegin;
1054   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1055   if (obj) PetscValidHeader(obj, 2);
1056   PetscAssertPointer(name, 3);
1057   PetscCall(PetscObjectViewFromOptions(PetscObjectCast(dctx), obj, name));
1058   PetscFunctionReturn(PETSC_SUCCESS);
1059 }
1060 
1061 /*@C
1062   PetscDeviceContextGetStreamHandle - Return a handle to the underlying stream of the current device context
1063 
1064   Input Parameters:
1065 + dctx   - The `PetscDeviceContext` to get the stream from
1066 - handle - A handle to the stream
1067 
1068   Level: developer
1069 
1070   Note:
1071   This routine is dangerous. It exists only for the most experienced users and
1072   internal PETSc developement.
1073 
1074   There is no way for PETSc's auto-dependency system to track what the caller does with the
1075   stream.
1076 
1077   If the user uses the stream to copy memory that was previously modified by PETSc, or launches
1078   kernels that modify memory with the stream, it is the users responsibility to inform PETSc of
1079   their actions via `PetscDeviceContextMarkIntentFromID()`. Failure to do so may introduce a
1080   race condition. This race condition may manifest in nondeterministic ways.
1081 
1082   Alternatively, the user may synchronize the stream immediately before and after use. This is
1083   the safest option.
1084 
1085   Example Usage:
1086 .vb
1087   PetscDeviceContext dctx;
1088   PetscDeviceType    type;
1089   void               *handle;
1090 
1091   PetscDeviceContextGetCurrentContext(&dctx);
1092   PetscDeviceContextGetStreamHandle(dctx, &handle);
1093   PetscDeviceContextGetDeviceType(dctx, &type);
1094 
1095   if (type == PETSC_DEVICE_CUDA) {
1096     cudsStream_t stream = *(cudaStream_t*)handle;
1097 
1098     my_cuda_kernel<<<1, 2, 3, stream>>>();
1099   }
1100 .ve
1101 
1102 .N ASYNC_API
1103 
1104 .seealso: `PetscDeviceContext`
1105 @*/
1106 PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext dctx, void *handle)
1107 {
1108   PetscFunctionBegin;
1109   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1110   PetscAssertPointer(handle, 2);
1111   PetscCall(PetscDeviceContextGetStreamHandle_Internal(dctx, (void **)handle));
1112   PetscFunctionReturn(PETSC_SUCCESS);
1113 }
1114