xref: /petsc/src/sys/objects/device/interface/dcontext.cxx (revision 3048253cfd29e6a237b5e244ab190e7e08d38e72)
1 #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/
2 #include <petsc/private/viewerimpl.h>         // _p_PetscViewer for PetscObjectCast()
3 
4 #include <petsc/private/cpp/object_pool.hpp>
5 #include <petsc/private/cpp/utility.hpp>
6 #include <petsc/private/cpp/array.hpp>
7 
8 #include <vector>
9 #include <string> // std::to_string among other things
10 
11 /* Define the allocator */
12 class PetscDeviceContextConstructor : public Petsc::ConstructorInterface<_p_PetscDeviceContext, PetscDeviceContextConstructor> {
13 public:
14   PetscErrorCode construct_(PetscDeviceContext dctx) const noexcept
15   {
16     PetscFunctionBegin;
17     PetscCall(PetscArrayzero(dctx, 1));
18     PetscCall(PetscHeaderInitialize_Private(dctx, PETSC_DEVICE_CONTEXT_CLASSID, "PetscDeviceContext", "PetscDeviceContext", "Sys", PETSC_COMM_SELF, PetscDeviceContextDestroy, PetscDeviceContextView));
19     PetscCallCXX(PetscObjectCast(dctx)->cpp = new CxxData());
20     PetscCall(underlying().reset(dctx, false));
21     PetscFunctionReturn(PETSC_SUCCESS);
22   }
23 
24   static PetscErrorCode destroy_(PetscDeviceContext dctx) noexcept
25   {
26     PetscFunctionBegin;
27     PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren);
28     PetscTryTypeMethod(dctx, destroy);
29     PetscCall(PetscDeviceDestroy(&dctx->device));
30     PetscCall(PetscFree(dctx->childIDs));
31     delete CxxDataCast(dctx);
32     PetscCall(PetscHeaderDestroy_Private(PetscObjectCast(dctx), PETSC_FALSE));
33     PetscFunctionReturn(PETSC_SUCCESS);
34   }
35 
36   static PetscErrorCode reset_(PetscDeviceContext dctx, bool zero = true) noexcept
37   {
38     PetscFunctionBegin;
39     if (zero) {
40       // reset the device if the user set it
41       if (Petsc::util::exchange(dctx->usersetdevice, PETSC_FALSE)) {
42         PetscTryTypeMethod(dctx, destroy);
43         PetscCall(PetscDeviceDestroy(&dctx->device));
44         PetscCall(PetscArrayzero(dctx->ops, 1));
45         dctx->data = nullptr;
46       }
47       PetscCall(PetscHeaderReset_Internal(PetscObjectCast(dctx)));
48       dctx->numChildren = 0;
49       dctx->setup       = PETSC_FALSE;
50       // don't deallocate the child array, rather just zero it out
51       PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren));
52       PetscCall(CxxDataCast(dctx)->clear());
53     }
54     dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING;
55     PetscFunctionReturn(PETSC_SUCCESS);
56   }
57 
58   static PetscErrorCode invalidate_(PetscDeviceContext) noexcept { return PETSC_SUCCESS; }
59 };
60 
61 static Petsc::ObjectPool<_p_PetscDeviceContext, PetscDeviceContextConstructor> contextPool;
62 
63 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
64 /*@C
65   PetscDeviceContextCreate - Creates a `PetscDeviceContext`
66 
67   Not Collective
68 
69   Output Parameter:
70 . dctx - The `PetscDeviceContext`
71 
72   Level: beginner
73 
74   Note:
75   Unlike almost every other PETSc class it is advised that most users use
76   `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts of
77   different types are incompatible with one another; using `PetscDeviceContextDuplicate()`
78   ensures compatible types.
79 
80   DAG representation:
81 .vb
82   time ->
83 
84   |= CALL =| - dctx ->
85 .ve
86 
87 .N ASYNC_API
88 
89 .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`,
90 `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`,
91 `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextView()`, `PetscDeviceContextDestroy()`
92 @*/
93 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
94 {
95   PetscFunctionBegin;
96   PetscAssertPointer(dctx, 1);
97   PetscCall(PetscDeviceInitializePackage());
98   PetscCall(PetscLogEventBegin(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
99   PetscCall(contextPool.allocate(dctx));
100   PetscCall(PetscLogEventEnd(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
101   PetscFunctionReturn(PETSC_SUCCESS);
102 }
103 
104 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
105 /*@C
106   PetscDeviceContextDestroy - Frees a `PetscDeviceContext`
107 
108   Not Collective
109 
110   Input Parameter:
111 . dctx - The `PetscDeviceContext`
112 
113   Level: beginner
114 
115   Notes:
116   No implicit synchronization occurs due to this routine, all resources are released completely
117   asynchronously w.r.t. the host. If one needs to guarantee access to the data produced on
118   `dctx`'s stream the user is responsible for calling `PetscDeviceContextSynchronize()` before
119   calling this routine.
120 
121   DAG representation:
122 .vb
123   time ->
124 
125   -> dctx - |= CALL =|
126 .ve
127 
128   Developer Notes:
129   `dctx` is never actually "destroyed" in the classical sense. It is returned to an ever
130   growing pool of `PetscDeviceContext`s. There are currently no limits on the size of the pool,
131   this should perhaps be implemented.
132 
133 .N ASYNC_API
134 
135 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
136 `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()`
137 @*/
138 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
139 {
140   PetscFunctionBegin;
141   PetscAssertPointer(dctx, 1);
142   if (!*dctx) PetscFunctionReturn(PETSC_SUCCESS);
143   PetscCall(PetscLogEventBegin(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
144   if (--(PetscObjectCast(*dctx)->refct) <= 0) {
145     PetscCall(PetscDeviceContextCheckNotOrphaned_Internal(*dctx));
146     PetscCall(contextPool.deallocate(dctx));
147   }
148   PetscCall(PetscLogEventEnd(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
149   *dctx = nullptr;
150   PetscFunctionReturn(PETSC_SUCCESS);
151 }
152 
153 /*@C
154   PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a
155   `PetscDeviceContext`
156 
157   Not Collective
158 
159   Input Parameters:
160 + dctx - The `PetscDeviceContext`
161 - type - The `PetscStreamType`
162 
163   Level: beginner
164 
165   Note:
166   See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
167   types and their interactions. If the `PetscDeviceContext` was previously set up and stream
168   type was changed, you must call `PetscDeviceContextSetUp()` again after this routine.
169 
170 .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`,
171 `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()`
172 @*/
173 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
174 {
175   PetscFunctionBegin;
176   // do not use getoptionalnullcontext here since we do not want the user to change the stream
177   // type
178   PetscValidDeviceContext(dctx, 1);
179   PetscValidStreamType(type, 2);
180   // only need to do complex swapping if the object has already been setup
181   if (dctx->setup && (dctx->streamType != type)) {
182     dctx->setup = PETSC_FALSE;
183     PetscCall(PetscLogEventBegin(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
184     PetscUseTypeMethod(dctx, changestreamtype, type);
185     PetscCall(PetscLogEventEnd(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
186   }
187   dctx->streamType = type;
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 /*@C
192   PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a
193   `PetscDeviceContext`
194 
195   Not Collective
196 
197   Input Parameter:
198 . dctx - The `PetscDeviceContext`
199 
200   Output Parameter:
201 . type - The `PetscStreamType`
202 
203   Level: beginner
204 
205   Note:
206   See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
207   types and their interactions
208 
209 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`,
210 `PetscDeviceContextSetFromOptions()`
211 @*/
212 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
213 {
214   PetscFunctionBegin;
215   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
216   PetscAssertPointer(type, 2);
217   *type = dctx->streamType;
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 /*
222   Actual function to set the device.
223 
224   1. Repeatedly destroying and recreating internal data structures (like streams and events)
225      for recycled PetscDeviceContexts is not free. If done often, it does add up.
226   2. The vast majority of PetscDeviceContexts are created by PETSc either as children or
227      default contexts. The default contexts *never* change type, and the children are extremely
228      unlikely to (chances are if you fork once, you will fork again very soon).
229   3. The only time this calculus changes is if the user themselves sets the device type. In
230      this case we do not know what the user has changed, so must always wipe the slate clean.
231 
232   Thus we need to keep track whether the user explicitly sets the device contexts device.
233 */
234 static PetscErrorCode PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx, PetscDevice device, PetscBool user_set)
235 {
236   PetscFunctionBegin;
237   // do not use getoptionalnullcontext here since we do not want the user to change its device
238   PetscValidDeviceContext(dctx, 1);
239   PetscValidDevice(device, 2);
240   if (dctx->device && (dctx->device->id == device->id)) PetscFunctionReturn(PETSC_SUCCESS);
241   PetscCall(PetscLogEventBegin(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
242   PetscTryTypeMethod(dctx, destroy);
243   PetscCall(PetscDeviceDestroy(&dctx->device));
244   PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops)));
245   PetscCall(PetscDeviceReference_Internal(device));
246   // set it before calling the method
247   dctx->device = device;
248   PetscCall((*device->ops->createcontext)(dctx));
249   PetscCall(PetscLogEventEnd(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
250   dctx->setup         = PETSC_FALSE;
251   dctx->usersetdevice = user_set;
252   PetscFunctionReturn(PETSC_SUCCESS);
253 }
254 
255 PetscErrorCode PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx, PetscDeviceType type)
256 {
257   PetscDevice device;
258 
259   PetscFunctionBegin;
260   PetscCall(PetscDeviceGetDefaultForType_Internal(type, &device));
261   PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_FALSE));
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 /*@C
266   PetscDeviceContextSetDevice - Set the underlying `PetscDevice` for a `PetscDeviceContext`
267 
268   Not Collective
269 
270   Input Parameters:
271 + dctx   - The `PetscDeviceContext`
272 - device - The `PetscDevice`
273 
274   Level: intermediate
275 
276   Notes:
277   This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` must
278   also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is not strictly
279   necessary to set a contexts device to enable usage, any created `PetscDeviceContext`s will
280   always come equipped with the "default" device.
281 
282   This routine is a no-op if `device` is already attached to `dctx`.
283 
284   This routine may (but is very unlikely to) initialize the backend device and may incur
285   synchronization.
286 
287 .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`,
288 `PetscDeviceContextGetDeviceType()`
289 @*/
290 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
291 {
292   PetscFunctionBegin;
293   PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_TRUE));
294   PetscFunctionReturn(PETSC_SUCCESS);
295 }
296 
297 /*@C
298   PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext`
299 
300   Not Collective
301 
302   Input Parameter:
303 . dctx - the `PetscDeviceContext`
304 
305   Output Parameter:
306 . device - The `PetscDevice`
307 
308   Level: intermediate
309 
310   Note:
311   This is a borrowed reference, the user should not destroy `device`.
312 
313 .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`, `PetscDeviceContextGetDeviceType()`
314 @*/
315 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
316 {
317   PetscFunctionBegin;
318   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
319   PetscAssertPointer(device, 2);
320   PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt64_FMT " has no attached PetscDevice to get", PetscObjectCast(dctx)->id);
321   *device = dctx->device;
322   PetscFunctionReturn(PETSC_SUCCESS);
323 }
324 
325 /*@C
326   PetscDeviceContextGetDeviceType - Get the `PetscDeviceType` for a `PetscDeviceContext`
327 
328   Not Collective
329 
330   Input Parameter:
331 . dctx - The `PetscDeviceContext`
332 
333   Output Parameter:
334 . type - The `PetscDeviceType`
335 
336   Level: beginner
337 
338   Note:
339   This routine is a convenience shorthand for `PetscDeviceContextGetDevice()` ->
340   `PetscDeviceGetType()`.
341 
342 .seealso: `PetscDeviceType`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetType()`, `PetscDevice`
343 @*/
344 PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext dctx, PetscDeviceType *type)
345 {
346   PetscDevice device = nullptr;
347 
348   PetscFunctionBegin;
349   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
350   PetscAssertPointer(type, 2);
351   PetscCall(PetscDeviceContextGetDevice(dctx, &device));
352   PetscCall(PetscDeviceGetType(device, type));
353   PetscFunctionReturn(PETSC_SUCCESS);
354 }
355 
356 /*@C
357   PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use
358 
359   Not Collective
360 
361   Input Parameter:
362 . dctx - The `PetscDeviceContext`
363 
364   Level: beginner
365 
366   Developer Notes:
367   This routine is usually the stage where a `PetscDeviceContext` acquires device-side data
368   structures such as streams, events, and (possibly) handles.
369 
370 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
371 `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()`
372 @*/
373 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
374 {
375   PetscFunctionBegin;
376   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
377   if (dctx->setup) PetscFunctionReturn(PETSC_SUCCESS);
378   if (!dctx->device) {
379     const auto default_dtype = PETSC_DEVICE_DEFAULT();
380 
381     PetscCall(PetscInfo(dctx, "PetscDeviceContext %" PetscInt64_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", PetscObjectCast(dctx)->id, PetscDeviceTypes[default_dtype]));
382     PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, default_dtype));
383   }
384   PetscCall(PetscLogEventBegin(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
385   PetscUseTypeMethod(dctx, setup);
386   PetscCall(PetscLogEventEnd(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
387   dctx->setup = PETSC_TRUE;
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 static PetscErrorCode PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx, PetscStreamType stype, PetscDeviceContext *dctxdup)
392 {
393   PetscFunctionBegin;
394   PetscCall(PetscLogEventBegin(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
395   PetscCall(PetscDeviceContextCreate(dctxdup));
396   PetscCall(PetscDeviceContextSetStreamType(*dctxdup, stype));
397   if (const auto device = dctx->device) PetscCall(PetscDeviceContextSetDevice_Private(*dctxdup, device, dctx->usersetdevice));
398   PetscCall(PetscDeviceContextSetUp(*dctxdup));
399   PetscCall(PetscLogEventEnd(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
404 /*@C
405   PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object
406 
407   Not Collective
408 
409   Input Parameter:
410 . dctx - The `PetscDeviceContext` to duplicate
411 
412   Output Parameter:
413 . dctxdup - The duplicated `PetscDeviceContext`
414 
415   Level: beginner
416 
417   Notes:
418   This is a shorthand method for creating a `PetscDeviceContext` with the exact same settings as
419   another. Note however that `dctxdup` does not share any of the underlying data with `dctx`,
420   (including its current stream-state) they are completely separate objects.
421 
422   There is no implied ordering between `dctx` or `dctxdup`.
423 
424   DAG representation:
425 .vb
426   time ->
427 
428   -> dctx - |= CALL =| - dctx ---->
429                        - dctxdup ->
430 .ve
431 
432 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
433 `PetscDeviceContextSetStreamType()`
434 @*/
435 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
436 {
437   auto stype = PETSC_STREAM_DEFAULT_BLOCKING;
438 
439   PetscFunctionBegin;
440   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
441   PetscAssertPointer(dctxdup, 2);
442   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
443   PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, dctxdup));
444   PetscFunctionReturn(PETSC_SUCCESS);
445 }
446 
447 /*@C
448   PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle
449 
450   Not Collective
451 
452   Input Parameter:
453 . dctx - The `PetscDeviceContext`
454 
455   Output Parameter:
456 . idle - `PETSC_TRUE` if `dctx` has NO work, `PETSC_FALSE` if it has work
457 
458   Level: intermediate
459 
460   Note:
461   This routine only refers a singular context and does NOT take any of its children into
462   account. That is, if `dctx` is idle but has dependents who do have work this routine still
463   returns `PETSC_TRUE`.
464 
465 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()`
466 @*/
467 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
468 {
469   PetscFunctionBegin;
470   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
471   PetscAssertPointer(idle, 2);
472   PetscCall(PetscLogEventBegin(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
473   PetscUseTypeMethod(dctx, query, idle);
474   PetscCall(PetscLogEventEnd(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
475   PetscCall(PetscInfo(dctx, "PetscDeviceContext ('%s', id %" PetscInt64_FMT ") %s idle\n", PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, *idle ? "was" : "was not"));
476   PetscFunctionReturn(PETSC_SUCCESS);
477 }
478 
479 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
480 /*@C
481   PetscDeviceContextWaitForContext - Make one context wait for another context to finish
482 
483   Not Collective
484 
485   Input Parameters:
486 + dctxa - The `PetscDeviceContext` object that is waiting
487 - dctxb - The `PetscDeviceContext` object that is being waited on
488 
489   Level: beginner
490 
491   Notes:
492   Serializes two `PetscDeviceContext`s. Serialization is performed asynchronously; the host
493   does not wait for the serialization to actually occur.
494 
495   This routine uses only the state of `dctxb` at the moment this routine was called, so any
496   future work queued will not affect `dctxa`. It is safe to pass the same context to both
497   arguments (in which case this routine does nothing).
498 
499   DAG representation:
500 .vb
501   time ->
502 
503   -> dctxa ---/- |= CALL =| - dctxa ->
504              /
505   -> dctxb -/------------------------>
506 .ve
507 
508 .N ASYNC_API
509 
510 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()`
511 @*/
512 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
513 {
514   PetscObject aobj;
515 
516   PetscFunctionBegin;
517   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxa));
518   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxb));
519   PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2);
520   if (dctxa == dctxb) PetscFunctionReturn(PETSC_SUCCESS);
521   aobj = PetscObjectCast(dctxa);
522   PetscCall(PetscLogEventBegin(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
523   PetscUseTypeMethod(dctxa, waitforcontext, dctxb);
524   PetscCallCXX(CxxDataCast(dctxa)->upstream[dctxb] = CxxDataParent(dctxb));
525   PetscCall(PetscLogEventEnd(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
526   PetscCall(PetscInfo(dctxa, "dctx %" PetscInt64_FMT " waiting on dctx %" PetscInt64_FMT "\n", aobj->id, PetscObjectCast(dctxb)->id));
527   PetscCall(PetscObjectStateIncrease(aobj));
528   PetscFunctionReturn(PETSC_SUCCESS);
529 }
530 
531 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
532 /*@C
533   PetscDeviceContextForkWithStreamType - Create a set of dependent child contexts from a parent
534   context with a prescribed `PetscStreamType`
535 
536   Not Collective, Asynchronous
537 
538   Input Parameters:
539 + dctx  - The parent `PetscDeviceContext`
540 . stype - The prescribed `PetscStreamType`
541 - n     - The number of children to create
542 
543   Output Parameter:
544 . dsub - The created child context(s)
545 
546   Level: intermediate
547 
548   Notes:
549   This routine creates `n` edges of a DAG from a source node which are causally dependent on the
550   source node. This causal dependency is established as-if by calling
551   `PetscDeviceContextWaitForContext()` on every child.
552 
553   `dsub` is allocated by this routine and has its lifetime bounded by `dctx`. That is, `dctx`
554   expects to free `dsub` (via `PetscDeviceContextJoin()`) before it itself is destroyed.
555 
556   This routine only accounts for work queued on `dctx` up until calling this routine, any
557   subsequent work enqueued on `dctx` has no effect on `dsub`.
558 
559   The `PetscStreamType` of `dctx` does not have to equal `stype`. In fact, it is often the case
560   that they are different. This is useful in cases where a routine can locally exploit stream
561   parallelism without needing to worry about what stream type the incoming `PetscDeviceContext`
562   carries.
563 
564   DAG representation:
565 .vb
566   time ->
567 
568   -> dctx - |= CALL =| -\----> dctx ------>
569                          \---> dsub[0] --->
570                           \--> ... ------->
571                            \-> dsub[n-1] ->
572 .ve
573 
574 .N ASYNC_API
575 
576 .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`,
577 `PetscDeviceContextQueryIdle()`, `PetscDeviceContextWaitForContext()`
578 @*/
579 PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx, PetscStreamType stype, PetscInt n, PetscDeviceContext **dsub)
580 {
581   // debugging only
582   std::string idList;
583   auto        ninput = n;
584 
585   PetscFunctionBegin;
586   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
587   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n);
588   PetscAssertPointer(dsub, 4);
589   *dsub = nullptr;
590   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
591   if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
592   PetscCall(PetscLogEventBegin(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
593   /* update child totals */
594   dctx->numChildren += n;
595   /* now to find out if we have room */
596   if (dctx->numChildren > dctx->maxNumChildren) {
597     const auto numChildren    = dctx->numChildren;
598     auto      &maxNumChildren = dctx->maxNumChildren;
599     auto       numAllocated   = numChildren;
600 
601     /* no room, either from having too many kids or not having any */
602     if (auto &childIDs = dctx->childIDs) {
603       // the difference is backwards because we have not updated maxNumChildren yet
604       numAllocated -= maxNumChildren;
605       /* have existing children, must reallocate them */
606       PetscCall(PetscRealloc(numChildren * sizeof(*childIDs), &childIDs));
607       /* clear the extra memory since realloc doesn't do it for us */
608       PetscCall(PetscArrayzero(std::next(childIDs, maxNumChildren), numAllocated));
609     } else {
610       /* have no children */
611       PetscCall(PetscCalloc1(numChildren, &childIDs));
612     }
613     /* update total number of children */
614     maxNumChildren = numChildren;
615   }
616   PetscCall(PetscMalloc1(n, dsub));
617   for (PetscInt i = 0; ninput && (i < dctx->numChildren); ++i) {
618     auto &childID = dctx->childIDs[i];
619     /* empty child slot */
620     if (!childID) {
621       auto &childctx = (*dsub)[i];
622 
623       /* create the child context in the image of its parent */
624       PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, &childctx));
625       PetscCall(PetscDeviceContextWaitForContext(childctx, dctx));
626       /* register the child with its parent */
627       PetscCall(PetscObjectGetId(PetscObjectCast(childctx), &childID));
628       if (PetscDefined(USE_DEBUG_AND_INFO)) {
629         PetscCallCXX(idList += std::to_string(childID));
630         if (ninput != 1) PetscCallCXX(idList += ", ");
631       }
632       --ninput;
633     }
634   }
635   PetscCall(PetscLogEventEnd(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
636   PetscCall(PetscDebugInfo(dctx, "Forked %" PetscInt_FMT " children from parent %" PetscInt64_FMT " with IDs: %s\n", n, PetscObjectCast(dctx)->id, idList.c_str()));
637   PetscFunctionReturn(PETSC_SUCCESS);
638 }
639 
640 /*@C
641   PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
642 
643   Not Collective, Asynchronous
644 
645   Input Parameters:
646 + dctx - The parent `PetscDeviceContext`
647 - n    - The number of children to create
648 
649   Output Parameter:
650 . dsub - The created child context(s)
651 
652   Level: beginner
653 
654   Notes:
655   Behaves identically to `PetscDeviceContextForkWithStreamType()` except that the prescribed
656   `PetscStreamType` is taken from `dctx`. In effect this routine is shorthand for\:
657 
658 .vb
659   PetscStreamType stype;
660 
661   PetscDeviceContextGetStreamType(dctx, &stype);
662   PetscDeviceContextForkWithStreamType(dctx, stype, ...);
663 .ve
664 
665 .N ASYNC_API
666 
667 .seealso: `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextJoin()`,
668 `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()`
669 @*/
670 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
671 {
672   auto stype = PETSC_STREAM_DEFAULT_BLOCKING;
673 
674   PetscFunctionBegin;
675   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
676   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
677   PetscCall(PetscDeviceContextForkWithStreamType(dctx, stype, n, dsub));
678   PetscFunctionReturn(PETSC_SUCCESS);
679 }
680 
681 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
682 /*@C
683   PetscDeviceContextJoin - Converge a set of child contexts
684 
685   Not Collective, Asynchronous
686 
687   Input Parameters:
688 + dctx     - A `PetscDeviceContext` to converge on
689 . n        - The number of sub contexts to converge
690 . joinMode - The type of join to perform
691 - dsub     - The sub contexts to converge
692 
693   Level: beginner
694 
695   Notes:
696   If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the source
697   node, then this routine is the exact mirror. That is, it creates a node (represented in `dctx`)
698   which receives `n` edges (and optionally destroys them) which is dependent on the completion
699   of all incoming edges.
700 
701   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`. All contexts in `dsub` will be
702   destroyed by this routine. Thus all sub contexts must have been created with the `dctx`
703   passed to this routine.
704 
705   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`. All sub contexts will additionally wait on
706   `dctx` after converging. This has the effect of "synchronizing" the outgoing edges. Note the
707   sync suffix does NOT refer to the host, i.e. this routine does NOT call
708   `PetscDeviceSynchronize()`.
709 
710   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`. `dctx` waits for all sub contexts but
711   the sub contexts do not wait for one another or `dctx` afterwards.
712 
713   DAG representations:
714   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`
715 .vb
716   time ->
717 
718   -> dctx ---------/- |= CALL =| - dctx ->
719   -> dsub[0] -----/
720   ->  ... -------/
721   -> dsub[n-1] -/
722 .ve
723   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`
724 .vb
725   time ->
726 
727   -> dctx ---------/- |= CALL =| -\----> dctx ------>
728   -> dsub[0] -----/                \---> dsub[0] --->
729   ->  ... -------/                  \--> ... ------->
730   -> dsub[n-1] -/                    \-> dsub[n-1] ->
731 .ve
732   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`
733 .vb
734   time ->
735 
736   -> dctx ----------/- |= CALL =| - dctx ->
737   -> dsub[0] ------/----------------------->
738   ->  ... --------/------------------------>
739   -> dsub[n-1] --/------------------------->
740 .ve
741 
742 .N ASYNC_API
743 
744 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextForkWithStreamType()`,
745 `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode`
746 @*/
747 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
748 {
749   // debugging only
750   std::string idList;
751 
752   PetscFunctionBegin;
753   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
754   /* validity of dctx is checked in the wait-for loop */
755   PetscAssertPointer(dsub, 4);
756   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n);
757   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
758   if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
759   /* first dctx waits on all the incoming edges */
760   PetscCall(PetscLogEventBegin(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
761   for (PetscInt i = 0; i < n; ++i) {
762     PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4);
763     PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i]));
764     if (PetscDefined(USE_DEBUG_AND_INFO)) {
765       PetscCallCXX(idList += std::to_string(PetscObjectCast((*dsub)[i])->id));
766       if (i + 1 < n) PetscCallCXX(idList += ", ");
767     }
768   }
769 
770   /* now we handle the aftermath */
771   switch (joinMode) {
772   case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: {
773     const auto children = dctx->childIDs;
774     const auto maxchild = dctx->maxNumChildren;
775     auto      &nchild   = dctx->numChildren;
776     PetscInt   j        = 0;
777 
778     PetscCheck(n <= nchild, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, nchild);
779     /* update child count while it's still fresh in memory */
780     nchild -= n;
781     for (PetscInt i = 0; i < maxchild; ++i) {
782       if (children[i] && (children[i] == PetscObjectCast((*dsub)[j])->id)) {
783         /* child is one of ours, can destroy it */
784         PetscCall(PetscDeviceContextDestroy((*dsub) + j));
785         /* reset the child slot */
786         children[i] = 0;
787         if (++j == n) break;
788       }
789     }
790     /* gone through the loop but did not find every child */
791     PetscCheck(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in", n - j);
792     PetscCall(PetscFree(*dsub));
793   } break;
794   case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
795     for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx));
796   case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
797     break;
798   default:
799     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given");
800   }
801   PetscCall(PetscLogEventEnd(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
802 
803   PetscCall(PetscDebugInfo(dctx, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt64_FMT ", mode %s with IDs: %s\n", n, PetscObjectCast(dctx)->id, PetscDeviceContextJoinModes[joinMode], idList.c_str()));
804   PetscFunctionReturn(PETSC_SUCCESS);
805 }
806 
807 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
808 /*@C
809   PetscDeviceContextSynchronize - Block the host until all work queued on a
810   `PetscDeviceContext` has finished
811 
812   Not Collective
813 
814   Input Parameter:
815 . dctx - The `PetscDeviceContext` to synchronize
816 
817   Level: beginner
818 
819   Notes:
820   The host will not return from this routine until `dctx` is idle. Any and all memory
821   operations queued on or otherwise associated with (either explicitly or implicitly via
822   dependencies) are guaranteed to have finished and be globally visible on return.
823 
824   In effect, this routine serves as memory and execution barrier.
825 
826   DAG representation:
827 .vb
828   time ->
829 
830   -> dctx - |= CALL =| - dctx ->
831 .ve
832 
833 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()`
834 @*/
835 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
836 {
837   PetscFunctionBegin;
838   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
839   PetscCall(PetscLogEventBegin(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
840   /* if it isn't setup there is nothing to sync on */
841   if (dctx->setup) {
842     PetscUseTypeMethod(dctx, synchronize);
843     PetscCall(PetscDeviceContextSyncClearMap_Internal(dctx));
844   }
845   PetscCall(PetscLogEventEnd(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
846   PetscFunctionReturn(PETSC_SUCCESS);
847 }
848 
849 /* every device type has a vector of null PetscDeviceContexts -- one for each device */
850 static auto nullContexts          = std::array<std::vector<PetscDeviceContext>, PETSC_DEVICE_MAX>{};
851 static auto nullContextsFinalizer = false;
852 
853 static PetscErrorCode PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device, PetscDevice device, PetscDeviceContext *dctx)
854 {
855   PetscInt        devid;
856   PetscDeviceType dtype;
857 
858   PetscFunctionBegin;
859   PetscValidDevice(device, 2);
860   PetscAssertPointer(dctx, 3);
861   if (PetscUnlikely(!nullContextsFinalizer)) {
862     nullContextsFinalizer = true;
863     PetscCall(PetscRegisterFinalize([] {
864       PetscFunctionBegin;
865       for (auto &&dvec : nullContexts) {
866         for (auto &&dctx : dvec) PetscCall(PetscDeviceContextDestroy(&dctx));
867         PetscCallCXX(dvec.clear());
868       }
869       nullContextsFinalizer = false;
870       PetscFunctionReturn(PETSC_SUCCESS);
871     }));
872   }
873   PetscCall(PetscDeviceGetDeviceId(device, &devid));
874   PetscCall(PetscDeviceGetType(device, &dtype));
875   {
876     auto &ctxlist = nullContexts[dtype];
877 
878     PetscCheck(devid >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Device ID (%" PetscInt_FMT ") must be positive", devid);
879     // need to resize the container if not big enough because incrementing the iterator in
880     // std::next() (if we haven't initialized that ctx yet) may cause it to fall outside the
881     // current size of the container.
882     if (static_cast<std::size_t>(devid) >= ctxlist.size()) PetscCallCXX(ctxlist.resize(devid + 1));
883     if (PetscUnlikely(!ctxlist[devid])) {
884       // we have not seen this device before
885       PetscCall(PetscDeviceContextCreate(dctx));
886       PetscCall(PetscInfo(*dctx, "Initializing null PetscDeviceContext (of type %s) for device %" PetscInt_FMT "\n", PetscDeviceTypes[dtype], devid));
887       {
888         const auto pobj   = PetscObjectCast(*dctx);
889         const auto name   = "null context " + std::to_string(devid);
890         const auto prefix = "null_context_" + std::to_string(devid) + '_';
891 
892         PetscCall(PetscObjectSetName(pobj, name.c_str()));
893         PetscCall(PetscObjectSetOptionsPrefix(pobj, prefix.c_str()));
894       }
895       PetscCall(PetscDeviceContextSetStreamType(*dctx, PETSC_STREAM_GLOBAL_BLOCKING));
896       PetscCall(PetscDeviceContextSetDevice_Private(*dctx, device, user_set_device));
897       PetscCall(PetscDeviceContextSetUp(*dctx));
898       // would use ctxlist.cbegin() but GCC 4.8 can't handle const iterator insert!
899       PetscCallCXX(ctxlist.insert(std::next(ctxlist.begin(), devid), *dctx));
900     } else *dctx = ctxlist[devid];
901   }
902   PetscFunctionReturn(PETSC_SUCCESS);
903 }
904 
905 /*
906   Gets the "NULL" context for the current PetscDeviceType and PetscDevice. NULL contexts are
907   guaranteed to always be globally blocking.
908 */
909 PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *dctx)
910 {
911   PetscDeviceContext gctx;
912   PetscDevice        gdev = nullptr;
913 
914   PetscFunctionBegin;
915   PetscAssertPointer(dctx, 1);
916   PetscCall(PetscDeviceContextGetCurrentContext(&gctx));
917   PetscCall(PetscDeviceContextGetDevice(gctx, &gdev));
918   PetscCall(PetscDeviceContextGetNullContextForDevice_Private(gctx->usersetdevice, gdev, dctx));
919   PetscFunctionReturn(PETSC_SUCCESS);
920 }
921 
922 /*@C
923   PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database
924 
925   Collective on `comm` or `dctx`
926 
927   Input Parameters:
928 + comm - MPI communicator on which to query the options database (optional)
929 - dctx - The `PetscDeviceContext` to configure
930 
931   Output Parameter:
932 . dctx - The `PetscDeviceContext`
933 
934   Options Database Keys:
935 + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` -
936    `PetscDeviceContextSetStreamType()`
937 - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType`
938 
939   Level: beginner
940 
941   Note:
942   The user may pass `MPI_COMM_NULL` for `comm` in which case the communicator of `dctx` is
943   used (which is always `PETSC_COMM_SELF`).
944 
945 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`,
946 `PetscDeviceContextView()`
947 @*/
948 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, PetscDeviceContext dctx)
949 {
950   const auto pobj     = PetscObjectCast(dctx);
951   auto       dtype    = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE);
952   auto       stype    = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE);
953   MPI_Comm   old_comm = PETSC_COMM_SELF;
954 
955   PetscFunctionBegin;
956   // do not user getoptionalnullcontext here, the user is not allowed to set it from options!
957   PetscValidDeviceContext(dctx, 2);
958   /* set the device type first */
959   if (const auto device = dctx->device) PetscCall(PetscDeviceGetType(device, &dtype.first));
960   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype.first));
961 
962   if (comm == MPI_COMM_NULL) {
963     PetscCall(PetscObjectGetComm(pobj, &comm));
964   } else {
965     // briefly set the communicator for dctx (it is always PETSC_COMM_SELF) so
966     // PetscObjectOptionsBegin() behaves as if dctx had comm
967     old_comm = Petsc::util::exchange(pobj->comm, comm);
968   }
969 
970   PetscObjectOptionsBegin(pobj);
971   PetscCall(PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype));
972   PetscOptionsEnd();
973   // reset the comm (should be PETSC_COMM_SELF)
974   if (comm != MPI_COMM_NULL) pobj->comm = old_comm;
975   if (dtype.second) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, dtype.first));
976   if (stype.second) PetscCall(PetscDeviceContextSetStreamType(dctx, stype.first));
977   PetscCall(PetscDeviceContextSetUp(dctx));
978   PetscFunctionReturn(PETSC_SUCCESS);
979 }
980 
981 /*@C
982   PetscDeviceContextView - View a `PetscDeviceContext`
983 
984   Collective on `viewer`
985 
986   Input Parameters:
987 + dctx   - The `PetscDeviceContext`
988 - viewer - The `PetscViewer` to view `dctx` with (may be `NULL`)
989 
990   Level: beginner
991 
992   Note:
993   If `viewer` is `NULL`, `PETSC_VIEWER_STDOUT_WORLD` is used instead, in which case this
994   routine is collective on `PETSC_COMM_WORLD`.
995 
996 .seealso: `PetscDeviceContextViewFromOptions()`, `PetscDeviceView()`, `PETSC_VIEWER_STDOUT_WORLD`, `PetscDeviceContextCreate()`
997 @*/
998 PetscErrorCode PetscDeviceContextView(PetscDeviceContext dctx, PetscViewer viewer)
999 {
1000   PetscBool iascii;
1001 
1002   PetscFunctionBegin;
1003   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1004   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer));
1005   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1006   PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &iascii));
1007   if (iascii) {
1008     auto        stype = PETSC_STREAM_DEFAULT_BLOCKING;
1009     PetscViewer sub;
1010 
1011     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub));
1012     PetscCall(PetscObjectPrintClassNamePrefixType(PetscObjectCast(dctx), sub));
1013     PetscCall(PetscViewerASCIIPushTab(sub));
1014     PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
1015     PetscCall(PetscViewerASCIIPrintf(sub, "stream type: %s\n", PetscStreamTypes[stype]));
1016     PetscCall(PetscViewerASCIIPrintf(sub, "children: %" PetscInt_FMT "\n", dctx->numChildren));
1017     if (const auto nchild = dctx->numChildren) {
1018       PetscCall(PetscViewerASCIIPushTab(sub));
1019       for (PetscInt i = 0; i < nchild; ++i) {
1020         if (i == nchild - 1) {
1021           PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT, dctx->childIDs[i]));
1022         } else {
1023           PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT ", ", dctx->childIDs[i]));
1024         }
1025       }
1026     }
1027     PetscCall(PetscViewerASCIIPopTab(sub));
1028     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub));
1029     PetscCall(PetscViewerFlush(viewer));
1030     PetscCall(PetscViewerASCIIPushTab(viewer));
1031   }
1032   if (const auto device = dctx->device) PetscCall(PetscDeviceView(device, viewer));
1033   if (iascii) PetscCall(PetscViewerASCIIPopTab(viewer));
1034   PetscFunctionReturn(PETSC_SUCCESS);
1035 }
1036 
1037 /*@C
1038   PetscDeviceContextViewFromOptions - View a `PetscDeviceContext` from options
1039 
1040   Input Parameters:
1041 + dctx - The `PetscDeviceContext` to view
1042 . obj  - Optional `PetscObject` to associate (may be `NULL`)
1043 - name - The command line option
1044 
1045   Level: beginner
1046 
1047 .seealso: `PetscDeviceContextView()`, `PetscObjectViewFromOptions()`, `PetscDeviceContextCreate()`
1048 @*/
1049 PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext dctx, PetscObject obj, const char name[])
1050 {
1051   PetscFunctionBegin;
1052   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1053   if (obj) PetscValidHeader(obj, 2);
1054   PetscAssertPointer(name, 3);
1055   PetscCall(PetscObjectViewFromOptions(PetscObjectCast(dctx), obj, name));
1056   PetscFunctionReturn(PETSC_SUCCESS);
1057 }
1058 
1059 /*@C
1060   PetscDeviceContextGetStreamHandle - Return a handle to the underlying stream of the current device context
1061 
1062   Input Parameters:
1063 + dctx   - The `PetscDeviceContext` to get the stream from
1064 - handle - A handle to the stream
1065 
1066   Level: developer
1067 
1068   Note:
1069   This routine is dangerous. It exists only for the most experienced users and
1070   internal PETSc developement.
1071 
1072   There is no way for PETSc's auto-dependency system to track what the caller does with the
1073   stream.
1074 
1075   If the user uses the stream to copy memory that was previously modified by PETSc, or launches
1076   kernels that modify memory with the stream, it is the users responsibility to inform PETSc of
1077   their actions via `PetscDeviceContextMarkIntentFromID()`. Failure to do so may introduce a
1078   race condition. This race condition may manifest in nondeterministic ways.
1079 
1080   Alternatively, the user may synchronize the stream immediately before and after use. This is
1081   the safest option.
1082 
1083   Example Usage:
1084 .vb
1085   PetscDeviceContext dctx;
1086   PetscDeviceType    type;
1087   void               *handle;
1088 
1089   PetscDeviceContextGetCurrentContext(&dctx);
1090   PetscDeviceContextGetStreamHandle(dctx, &handle);
1091   PetscDeviceContextGetDeviceType(dctx, &type);
1092 
1093   if (type == PETSC_DEVICE_CUDA) {
1094     cudsStream_t stream = *(cudaStream_t*)handle;
1095 
1096     my_cuda_kernel<<<1, 2, 3, stream>>>();
1097   }
1098 .ve
1099 
1100 .N ASYNC_API
1101 
1102 .seealso: `PetscDeviceContext`
1103 @*/
1104 PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext dctx, void *handle)
1105 {
1106   PetscFunctionBegin;
1107   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1108   PetscAssertPointer(handle, 2);
1109   PetscCall(PetscDeviceContextGetStreamHandle_Internal(dctx, (void **)handle));
1110   PetscFunctionReturn(PETSC_SUCCESS);
1111 }
1112