xref: /petsc/src/sys/objects/device/interface/dcontext.cxx (revision 74df5e01f481fb3fe90b32c3b4345ef0122eb3ce)
1 #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/
2 #include <petsc/private/viewerimpl.h>         // _p_PetscViewer for PetscObjectCast()
3 
4 #include <petsc/private/cpp/object_pool.hpp>
5 #include <petsc/private/cpp/utility.hpp>
6 #include <petsc/private/cpp/array.hpp>
7 
8 #include <vector>
9 #include <string> // std::to_string among other things
10 
11 /* Define the allocator */
12 class PetscDeviceContextConstructor : public Petsc::ConstructorInterface<_p_PetscDeviceContext, PetscDeviceContextConstructor> {
13 public:
14   PetscErrorCode construct_(PetscDeviceContext dctx) const noexcept
15   {
16     PetscFunctionBegin;
17     PetscCall(PetscArrayzero(dctx, 1));
18     PetscCall(PetscHeaderCreate_Private((PetscObject)dctx, PETSC_DEVICE_CONTEXT_CLASSID, "PetscDeviceContext", "PetscDeviceContext", "Sys", PETSC_COMM_SELF, (PetscObjectDestroyFn *)PetscDeviceContextDestroy, (PetscObjectViewFn *)PetscDeviceContextView));
19     PetscCall(PetscLogObjectCreate((PetscObject)dctx));
20 
21     PetscCallCXX(PetscObjectCast(dctx)->cpp = new CxxData{dctx});
22     PetscCall(underlying().reset(dctx, false));
23     PetscFunctionReturn(PETSC_SUCCESS);
24   }
25 
26   static PetscErrorCode destroy_(PetscDeviceContext dctx) noexcept
27   {
28     PetscFunctionBegin;
29     PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren);
30     PetscTryTypeMethod(dctx, destroy);
31     PetscCall(PetscDeviceDestroy(&dctx->device));
32     PetscCall(PetscFree(dctx->childIDs));
33     delete CxxDataCast(dctx);
34     PetscCall(PetscHeaderDestroy_Private(PetscObjectCast(dctx), PETSC_FALSE));
35     PetscFunctionReturn(PETSC_SUCCESS);
36   }
37 
38   static PetscErrorCode reset_(PetscDeviceContext dctx, bool zero = true) noexcept
39   {
40     PetscFunctionBegin;
41     if (zero) {
42       // reset the device if the user set it
43       if (Petsc::util::exchange(dctx->usersetdevice, PETSC_FALSE)) {
44         PetscTryTypeMethod(dctx, destroy);
45         PetscCall(PetscDeviceDestroy(&dctx->device));
46         PetscCall(PetscArrayzero(dctx->ops, 1));
47         dctx->data = nullptr;
48       }
49       PetscCall(PetscHeaderReset_Internal(PetscObjectCast(dctx)));
50       dctx->numChildren = 0;
51       dctx->setup       = PETSC_FALSE;
52       // don't deallocate the child array, rather just zero it out
53       PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren));
54       PetscCall(CxxDataCast(dctx)->clear());
55       PetscCall(CxxDataCast(dctx)->reset_self(dctx));
56     }
57     dctx->streamType = PETSC_STREAM_DEFAULT;
58     PetscFunctionReturn(PETSC_SUCCESS);
59   }
60 
61   static PetscErrorCode invalidate_(PetscDeviceContext dctx) noexcept
62   {
63     PetscFunctionBegin;
64     PetscCall(CxxDataCast(dctx)->reset_self(dctx));
65     PetscFunctionReturn(PETSC_SUCCESS);
66   }
67 };
68 
69 static Petsc::ObjectPool<_p_PetscDeviceContext, PetscDeviceContextConstructor> contextPool;
70 
71 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
72 /*@C
73   PetscDeviceContextCreate - Creates a `PetscDeviceContext`
74 
75   Not Collective
76 
77   Output Parameter:
78 . dctx - The `PetscDeviceContext`
79 
80   Level: beginner
81 
82   Note:
83   Unlike almost every other PETSc class it is advised that most users use
84   `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts of
85   different types are incompatible with one another; using `PetscDeviceContextDuplicate()`
86   ensures compatible types.
87 
88   DAG representation:
89 .vb
90   time ->
91 
92   |= CALL =| - dctx ->
93 .ve
94 
95 .N ASYNC_API
96 
97 .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`,
98 `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`,
99 `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextView()`, `PetscDeviceContextDestroy()`
100 @*/
101 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
102 {
103   PetscFunctionBegin;
104   PetscAssertPointer(dctx, 1);
105   PetscCall(PetscDeviceInitializePackage());
106   PetscCall(PetscLogEventBegin(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
107   PetscCall(contextPool.allocate(dctx));
108   PetscCall(PetscLogEventEnd(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
113 /*@C
114   PetscDeviceContextDestroy - Frees a `PetscDeviceContext`
115 
116   Not Collective
117 
118   Input Parameter:
119 . dctx - The `PetscDeviceContext`
120 
121   Level: beginner
122 
123   Notes:
124   No implicit synchronization occurs due to this routine, all resources are released completely
125   asynchronously w.r.t. the host. If one needs to guarantee access to the data produced on
126   `dctx`'s stream the user is responsible for calling `PetscDeviceContextSynchronize()` before
127   calling this routine.
128 
129   DAG representation:
130 .vb
131   time ->
132 
133   -> dctx - |= CALL =|
134 .ve
135 
136   Developer Notes:
137   `dctx` is never actually "destroyed" in the classical sense. It is returned to an ever
138   growing pool of `PetscDeviceContext`s. There are currently no limits on the size of the pool,
139   this should perhaps be implemented.
140 
141 .N ASYNC_API
142 
143 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
144 `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()`
145 @*/
146 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
147 {
148   PetscFunctionBegin;
149   PetscAssertPointer(dctx, 1);
150   if (!*dctx) PetscFunctionReturn(PETSC_SUCCESS);
151   PetscCall(PetscLogEventBegin(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
152   if (--(PetscObjectCast(*dctx)->refct) <= 0) {
153     PetscCall(PetscDeviceContextCheckNotOrphaned_Internal(*dctx));
154     PetscCall(contextPool.deallocate(dctx));
155   }
156   PetscCall(PetscLogEventEnd(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
157   *dctx = nullptr;
158   PetscFunctionReturn(PETSC_SUCCESS);
159 }
160 
161 /*@C
162   PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a
163   `PetscDeviceContext`
164 
165   Not Collective
166 
167   Input Parameters:
168 + dctx - The `PetscDeviceContext`
169 - type - The `PetscStreamType`
170 
171   Level: beginner
172 
173   Note:
174   See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
175   types and their interactions. If the `PetscDeviceContext` was previously set up and stream
176   type was changed, you must call `PetscDeviceContextSetUp()` again after this routine.
177 
178 .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`,
179 `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()`
180 @*/
181 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
182 {
183   PetscFunctionBegin;
184   // do not use getoptionalnullcontext here since we do not want the user to change the stream
185   // type
186   PetscValidDeviceContext(dctx, 1);
187   PetscValidStreamType(type, 2);
188   // only need to do complex swapping if the object has already been setup
189   if (dctx->setup && (dctx->streamType != type)) {
190     dctx->setup = PETSC_FALSE;
191     PetscCall(PetscLogEventBegin(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
192     PetscUseTypeMethod(dctx, changestreamtype, type);
193     PetscCall(PetscLogEventEnd(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
194   }
195   dctx->streamType = type;
196   PetscFunctionReturn(PETSC_SUCCESS);
197 }
198 
199 /*@C
200   PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a
201   `PetscDeviceContext`
202 
203   Not Collective
204 
205   Input Parameter:
206 . dctx - The `PetscDeviceContext`
207 
208   Output Parameter:
209 . type - The `PetscStreamType`
210 
211   Level: beginner
212 
213   Note:
214   See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
215   types and their interactions
216 
217 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`,
218 `PetscDeviceContextSetFromOptions()`
219 @*/
220 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
221 {
222   PetscFunctionBegin;
223   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
224   PetscAssertPointer(type, 2);
225   *type = dctx->streamType;
226   PetscFunctionReturn(PETSC_SUCCESS);
227 }
228 
229 /*
230   Actual function to set the device.
231 
232   1. Repeatedly destroying and recreating internal data structures (like streams and events)
233      for recycled PetscDeviceContexts is not free. If done often, it does add up.
234   2. The vast majority of PetscDeviceContexts are created by PETSc either as children or
235      default contexts. The default contexts *never* change type, and the children are extremely
236      unlikely to (chances are if you fork once, you will fork again very soon).
237   3. The only time this calculus changes is if the user themselves sets the device type. In
238      this case we do not know what the user has changed, so must always wipe the slate clean.
239 
240   Thus we need to keep track whether the user explicitly sets the device contexts device.
241 */
242 static PetscErrorCode PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx, PetscDevice device, PetscBool user_set)
243 {
244   PetscFunctionBegin;
245   // do not use getoptionalnullcontext here since we do not want the user to change its device
246   PetscValidDeviceContext(dctx, 1);
247   PetscValidDevice(device, 2);
248   if (dctx->device && (dctx->device->id == device->id)) PetscFunctionReturn(PETSC_SUCCESS);
249   PetscCall(PetscLogEventBegin(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
250   PetscTryTypeMethod(dctx, destroy);
251   PetscCall(PetscDeviceDestroy(&dctx->device));
252   PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops)));
253   PetscCall(PetscDeviceReference_Internal(device));
254   // set it before calling the method
255   dctx->device = device;
256   PetscCall((*device->ops->createcontext)(dctx));
257   PetscCall(PetscLogEventEnd(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
258   dctx->setup         = PETSC_FALSE;
259   dctx->usersetdevice = user_set;
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
263 PetscErrorCode PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx, PetscDeviceType type)
264 {
265   PetscDevice device;
266 
267   PetscFunctionBegin;
268   PetscCall(PetscDeviceGetDefaultForType_Internal(type, &device));
269   PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_FALSE));
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 /*@C
274   PetscDeviceContextSetDevice - Set the underlying `PetscDevice` for a `PetscDeviceContext`
275 
276   Not Collective
277 
278   Input Parameters:
279 + dctx   - The `PetscDeviceContext`
280 - device - The `PetscDevice`
281 
282   Level: intermediate
283 
284   Notes:
285   This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` must
286   also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is not strictly
287   necessary to set a contexts device to enable usage, any created `PetscDeviceContext`s will
288   always come equipped with the "default" device.
289 
290   This routine is a no-op if `device` is already attached to `dctx`.
291 
292   This routine may (but is very unlikely to) initialize the backend device and may incur
293   synchronization.
294 
295 .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`,
296 `PetscDeviceContextGetDeviceType()`
297 @*/
298 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
299 {
300   PetscFunctionBegin;
301   PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_TRUE));
302   PetscFunctionReturn(PETSC_SUCCESS);
303 }
304 
305 /*@C
306   PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext`
307 
308   Not Collective
309 
310   Input Parameter:
311 . dctx - the `PetscDeviceContext`
312 
313   Output Parameter:
314 . device - The `PetscDevice`
315 
316   Level: intermediate
317 
318   Note:
319   This is a borrowed reference, the user should not destroy `device`.
320 
321 .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`, `PetscDeviceContextGetDeviceType()`
322 @*/
323 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
324 {
325   PetscFunctionBegin;
326   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
327   PetscAssertPointer(device, 2);
328   PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt64_FMT " has no attached PetscDevice to get", PetscObjectCast(dctx)->id);
329   *device = dctx->device;
330   PetscFunctionReturn(PETSC_SUCCESS);
331 }
332 
333 /*@C
334   PetscDeviceContextGetDeviceType - Get the `PetscDeviceType` for a `PetscDeviceContext`
335 
336   Not Collective
337 
338   Input Parameter:
339 . dctx - The `PetscDeviceContext`
340 
341   Output Parameter:
342 . type - The `PetscDeviceType`
343 
344   Level: beginner
345 
346   Note:
347   This routine is a convenience shorthand for `PetscDeviceContextGetDevice()` ->
348   `PetscDeviceGetType()`.
349 
350 .seealso: `PetscDeviceType`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetType()`, `PetscDevice`
351 @*/
352 PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext dctx, PetscDeviceType *type)
353 {
354   PetscDevice device = nullptr;
355 
356   PetscFunctionBegin;
357   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
358   PetscAssertPointer(type, 2);
359   PetscCall(PetscDeviceContextGetDevice(dctx, &device));
360   PetscCall(PetscDeviceGetType(device, type));
361   PetscFunctionReturn(PETSC_SUCCESS);
362 }
363 
364 /*@C
365   PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use
366 
367   Not Collective
368 
369   Input Parameter:
370 . dctx - The `PetscDeviceContext`
371 
372   Level: beginner
373 
374   Developer Notes:
375   This routine is usually the stage where a `PetscDeviceContext` acquires device-side data
376   structures such as streams, events, and (possibly) handles.
377 
378 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
379 `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()`
380 @*/
381 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
382 {
383   PetscFunctionBegin;
384   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
385   if (dctx->setup) PetscFunctionReturn(PETSC_SUCCESS);
386   if (!dctx->device) {
387     const auto default_dtype = PETSC_DEVICE_DEFAULT();
388 
389     PetscCall(PetscInfo(dctx, "PetscDeviceContext %" PetscInt64_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", PetscObjectCast(dctx)->id, PetscDeviceTypes[default_dtype]));
390     PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, default_dtype));
391   }
392   PetscCall(PetscLogEventBegin(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
393   PetscUseTypeMethod(dctx, setup);
394   PetscCall(PetscLogEventEnd(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
395   dctx->setup = PETSC_TRUE;
396   PetscFunctionReturn(PETSC_SUCCESS);
397 }
398 
399 static PetscErrorCode PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx, PetscStreamType stype, PetscDeviceContext *dctxdup)
400 {
401   PetscFunctionBegin;
402   PetscCall(PetscLogEventBegin(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
403   PetscCall(PetscDeviceContextCreate(dctxdup));
404   PetscCall(PetscDeviceContextSetStreamType(*dctxdup, stype));
405   if (const auto device = dctx->device) PetscCall(PetscDeviceContextSetDevice_Private(*dctxdup, device, dctx->usersetdevice));
406   PetscCall(PetscDeviceContextSetUp(*dctxdup));
407   PetscCall(PetscLogEventEnd(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
408   PetscFunctionReturn(PETSC_SUCCESS);
409 }
410 
411 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
412 /*@C
413   PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object
414 
415   Not Collective
416 
417   Input Parameter:
418 . dctx - The `PetscDeviceContext` to duplicate
419 
420   Output Parameter:
421 . dctxdup - The duplicated `PetscDeviceContext`
422 
423   Level: beginner
424 
425   Notes:
426   This is a shorthand method for creating a `PetscDeviceContext` with the exact same settings as
427   another. Note however that `dctxdup` does not share any of the underlying data with `dctx`,
428   (including its current stream-state) they are completely separate objects.
429 
430   There is no implied ordering between `dctx` or `dctxdup`.
431 
432   DAG representation:
433 .vb
434   time ->
435 
436   -> dctx - |= CALL =| - dctx ---->
437                        - dctxdup ->
438 .ve
439 
440 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
441 `PetscDeviceContextSetStreamType()`
442 @*/
443 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
444 {
445   auto stype = PETSC_STREAM_DEFAULT;
446 
447   PetscFunctionBegin;
448   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
449   PetscAssertPointer(dctxdup, 2);
450   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
451   PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, dctxdup));
452   PetscFunctionReturn(PETSC_SUCCESS);
453 }
454 
455 /*@C
456   PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle
457 
458   Not Collective
459 
460   Input Parameter:
461 . dctx - The `PetscDeviceContext`
462 
463   Output Parameter:
464 . idle - `PETSC_TRUE` if `dctx` has NO work, `PETSC_FALSE` if it has work
465 
466   Level: intermediate
467 
468   Note:
469   This routine only refers a singular context and does NOT take any of its children into
470   account. That is, if `dctx` is idle but has dependents who do have work this routine still
471   returns `PETSC_TRUE`.
472 
473 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()`
474 @*/
475 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
476 {
477   PetscFunctionBegin;
478   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
479   PetscAssertPointer(idle, 2);
480   PetscCall(PetscLogEventBegin(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
481   PetscUseTypeMethod(dctx, query, idle);
482   PetscCall(PetscLogEventEnd(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
483   PetscCall(PetscInfo(dctx, "PetscDeviceContext ('%s', id %" PetscInt64_FMT ") %s idle\n", PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, *idle ? "was" : "was not"));
484   PetscFunctionReturn(PETSC_SUCCESS);
485 }
486 
487 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
488 /*@C
489   PetscDeviceContextWaitForContext - Make one context wait for another context to finish
490 
491   Not Collective
492 
493   Input Parameters:
494 + dctxa - The `PetscDeviceContext` object that is waiting
495 - dctxb - The `PetscDeviceContext` object that is being waited on
496 
497   Level: beginner
498 
499   Notes:
500   Serializes two `PetscDeviceContext`s. Serialization is performed asynchronously; the host
501   does not wait for the serialization to actually occur.
502 
503   This routine uses only the state of `dctxb` at the moment this routine was called, so any
504   future work queued will not affect `dctxa`. It is safe to pass the same context to both
505   arguments (in which case this routine does nothing).
506 
507   DAG representation:
508 .vb
509   time ->
510 
511   -> dctxa ---/- |= CALL =| - dctxa ->
512              /
513   -> dctxb -/------------------------>
514 .ve
515 
516 .N ASYNC_API
517 
518 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()`
519 @*/
520 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
521 {
522   PetscObjectId bid;
523 
524   PetscFunctionBegin;
525   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxa));
526   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxb));
527   PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2);
528   if (dctxa == dctxb) PetscFunctionReturn(PETSC_SUCCESS);
529   bid = PetscObjectCast(dctxb)->id;
530   PetscCall(PetscLogEventBegin(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
531   PetscUseTypeMethod(dctxa, waitforcontext, dctxb);
532   PetscCallCXX(CxxDataCast(dctxa)->upstream()[bid] = CxxDataCast(dctxb)->weak_snapshot());
533   PetscCall(PetscLogEventEnd(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
534   PetscCall(PetscInfo(dctxa, "dctx %" PetscInt64_FMT " waiting on dctx %" PetscInt64_FMT "\n", PetscObjectCast(dctxa)->id, bid));
535   PetscCall(PetscObjectStateIncrease(PetscObjectCast(dctxa)));
536   PetscFunctionReturn(PETSC_SUCCESS);
537 }
538 
539 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
540 /*@C
541   PetscDeviceContextForkWithStreamType - Create a set of dependent child contexts from a parent
542   context with a prescribed `PetscStreamType`
543 
544   Not Collective, Asynchronous
545 
546   Input Parameters:
547 + dctx  - The parent `PetscDeviceContext`
548 . stype - The prescribed `PetscStreamType`
549 - n     - The number of children to create
550 
551   Output Parameter:
552 . dsub - The created child context(s)
553 
554   Level: intermediate
555 
556   Notes:
557   This routine creates `n` edges of a DAG from a source node which are causally dependent on the
558   source node. This causal dependency is established as-if by calling
559   `PetscDeviceContextWaitForContext()` on every child.
560 
561   `dsub` is allocated by this routine and has its lifetime bounded by `dctx`. That is, `dctx`
562   expects to free `dsub` (via `PetscDeviceContextJoin()`) before it itself is destroyed.
563 
564   This routine only accounts for work queued on `dctx` up until calling this routine, any
565   subsequent work enqueued on `dctx` has no effect on `dsub`.
566 
567   The `PetscStreamType` of `dctx` does not have to equal `stype`. In fact, it is often the case
568   that they are different. This is useful in cases where a routine can locally exploit stream
569   parallelism without needing to worry about what stream type the incoming `PetscDeviceContext`
570   carries.
571 
572   DAG representation:
573 .vb
574   time ->
575 
576   -> dctx - |= CALL =| -\----> dctx ------>
577                          \---> dsub[0] --->
578                           \--> ... ------->
579                            \-> dsub[n-1] ->
580 .ve
581 
582 .N ASYNC_API
583 
584 .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`,
585 `PetscDeviceContextQueryIdle()`, `PetscDeviceContextWaitForContext()`
586 @*/
587 PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx, PetscStreamType stype, PetscInt n, PetscDeviceContext **dsub)
588 {
589   // debugging only
590   std::string idList;
591   auto        ninput = n;
592 
593   PetscFunctionBegin;
594   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
595   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n);
596   PetscAssertPointer(dsub, 4);
597   *dsub = nullptr;
598   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
599   if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
600   PetscCall(PetscLogEventBegin(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
601   /* update child totals */
602   dctx->numChildren += n;
603   /* now to find out if we have room */
604   if (dctx->numChildren > dctx->maxNumChildren) {
605     const auto numChildren    = dctx->numChildren;
606     auto      &maxNumChildren = dctx->maxNumChildren;
607     auto       numAllocated   = numChildren;
608 
609     /* no room, either from having too many kids or not having any */
610     if (auto &childIDs = dctx->childIDs) {
611       // the difference is backwards because we have not updated maxNumChildren yet
612       numAllocated -= maxNumChildren;
613       /* have existing children, must reallocate them */
614       PetscCall(PetscRealloc(numChildren * sizeof(*childIDs), &childIDs));
615       /* clear the extra memory since realloc doesn't do it for us */
616       PetscCall(PetscArrayzero(std::next(childIDs, maxNumChildren), numAllocated));
617     } else {
618       /* have no children */
619       PetscCall(PetscCalloc1(numChildren, &childIDs));
620     }
621     /* update total number of children */
622     maxNumChildren = numChildren;
623   }
624   PetscCall(PetscMalloc1(n, dsub));
625   for (PetscInt i = 0; ninput && (i < dctx->numChildren); ++i) {
626     auto &childID = dctx->childIDs[i];
627     /* empty child slot */
628     if (!childID) {
629       auto &childctx = (*dsub)[i];
630 
631       /* create the child context in the image of its parent */
632       PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, &childctx));
633       PetscCall(PetscDeviceContextWaitForContext(childctx, dctx));
634       /* register the child with its parent */
635       PetscCall(PetscObjectGetId(PetscObjectCast(childctx), &childID));
636       if (PetscDefined(USE_DEBUG_AND_INFO)) {
637         PetscCallCXX(idList += std::to_string(childID));
638         if (ninput != 1) PetscCallCXX(idList += ", ");
639       }
640       --ninput;
641     }
642   }
643   PetscCall(PetscLogEventEnd(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
644   PetscCall(PetscDebugInfo(dctx, "Forked %" PetscInt_FMT " children from parent %" PetscInt64_FMT " with IDs: %s\n", n, PetscObjectCast(dctx)->id, idList.c_str()));
645   PetscFunctionReturn(PETSC_SUCCESS);
646 }
647 
648 /*@C
649   PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
650 
651   Not Collective, Asynchronous
652 
653   Input Parameters:
654 + dctx - The parent `PetscDeviceContext`
655 - n    - The number of children to create
656 
657   Output Parameter:
658 . dsub - The created child context(s)
659 
660   Level: beginner
661 
662   Notes:
663   Behaves identically to `PetscDeviceContextForkWithStreamType()` except that the prescribed
664   `PetscStreamType` is taken from `dctx`. In effect this routine is shorthand for\:
665 
666 .vb
667   PetscStreamType stype;
668 
669   PetscDeviceContextGetStreamType(dctx, &stype);
670   PetscDeviceContextForkWithStreamType(dctx, stype, ...);
671 .ve
672 
673 .N ASYNC_API
674 
675 .seealso: `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextJoin()`,
676 `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()`
677 @*/
678 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
679 {
680   auto stype = PETSC_STREAM_DEFAULT;
681 
682   PetscFunctionBegin;
683   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
684   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
685   PetscCall(PetscDeviceContextForkWithStreamType(dctx, stype, n, dsub));
686   PetscFunctionReturn(PETSC_SUCCESS);
687 }
688 
689 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
690 /*@C
691   PetscDeviceContextJoin - Converge a set of child contexts
692 
693   Not Collective, Asynchronous
694 
695   Input Parameters:
696 + dctx     - A `PetscDeviceContext` to converge on
697 . n        - The number of sub contexts to converge
698 . joinMode - The type of join to perform
699 - dsub     - The sub contexts to converge
700 
701   Level: beginner
702 
703   Notes:
704   If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the source
705   node, then this routine is the exact mirror. That is, it creates a node (represented in `dctx`)
706   which receives `n` edges (and optionally destroys them) which is dependent on the completion
707   of all incoming edges.
708 
709   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`. All contexts in `dsub` will be
710   destroyed by this routine. Thus all sub contexts must have been created with the `dctx`
711   passed to this routine.
712 
713   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`. All sub contexts will additionally wait on
714   `dctx` after converging. This has the effect of "synchronizing" the outgoing edges. Note the
715   sync suffix does NOT refer to the host, i.e. this routine does NOT call
716   `PetscDeviceSynchronize()`.
717 
718   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`. `dctx` waits for all sub contexts but
719   the sub contexts do not wait for one another or `dctx` afterwards.
720 
721   DAG representations:
722   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`
723 .vb
724   time ->
725 
726   -> dctx ---------/- |= CALL =| - dctx ->
727   -> dsub[0] -----/
728   ->  ... -------/
729   -> dsub[n-1] -/
730 .ve
731   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`
732 .vb
733   time ->
734 
735   -> dctx ---------/- |= CALL =| -\----> dctx ------>
736   -> dsub[0] -----/                \---> dsub[0] --->
737   ->  ... -------/                  \--> ... ------->
738   -> dsub[n-1] -/                    \-> dsub[n-1] ->
739 .ve
740   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`
741 .vb
742   time ->
743 
744   -> dctx ----------/- |= CALL =| - dctx ->
745   -> dsub[0] ------/----------------------->
746   ->  ... --------/------------------------>
747   -> dsub[n-1] --/------------------------->
748 .ve
749 
750 .N ASYNC_API
751 
752 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextForkWithStreamType()`,
753 `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode`
754 @*/
755 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
756 {
757   // debugging only
758   std::string idList;
759 
760   PetscFunctionBegin;
761   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
762   /* validity of dctx is checked in the wait-for loop */
763   PetscAssertPointer(dsub, 4);
764   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n);
765   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
766   if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
767   /* first dctx waits on all the incoming edges */
768   PetscCall(PetscLogEventBegin(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
769   for (PetscInt i = 0; i < n; ++i) {
770     PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4);
771     PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i]));
772     if (PetscDefined(USE_DEBUG_AND_INFO)) {
773       PetscCallCXX(idList += std::to_string(PetscObjectCast((*dsub)[i])->id));
774       if (i + 1 < n) PetscCallCXX(idList += ", ");
775     }
776   }
777 
778   /* now we handle the aftermath */
779   switch (joinMode) {
780   case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: {
781     const auto children = dctx->childIDs;
782     const auto maxchild = dctx->maxNumChildren;
783     auto      &nchild   = dctx->numChildren;
784     PetscInt   j        = 0;
785 
786     PetscCheck(n <= nchild, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, nchild);
787     /* update child count while it's still fresh in memory */
788     nchild -= n;
789     for (PetscInt i = 0; i < maxchild; ++i) {
790       if (children[i] && (children[i] == PetscObjectCast((*dsub)[j])->id)) {
791         /* child is one of ours, can destroy it */
792         PetscCall(PetscDeviceContextDestroy((*dsub) + j));
793         /* reset the child slot */
794         children[i] = 0;
795         if (++j == n) break;
796       }
797     }
798     /* gone through the loop but did not find every child */
799     PetscCheck(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out in", n - j);
800     PetscCall(PetscFree(*dsub));
801   } break;
802   case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
803     for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx));
804   case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
805     break;
806   default:
807     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given");
808   }
809   PetscCall(PetscLogEventEnd(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
810 
811   PetscCall(PetscDebugInfo(dctx, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt64_FMT ", mode %s with IDs: %s\n", n, PetscObjectCast(dctx)->id, PetscDeviceContextJoinModes[joinMode], idList.c_str()));
812   PetscFunctionReturn(PETSC_SUCCESS);
813 }
814 
815 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
816 /*@C
817   PetscDeviceContextSynchronize - Block the host until all work queued on a
818   `PetscDeviceContext` has finished
819 
820   Not Collective
821 
822   Input Parameter:
823 . dctx - The `PetscDeviceContext` to synchronize
824 
825   Level: beginner
826 
827   Notes:
828   The host will not return from this routine until `dctx` is idle. Any and all memory
829   operations queued on or otherwise associated with (either explicitly or implicitly via
830   dependencies) are guaranteed to have finished and be globally visible on return.
831 
832   In effect, this routine serves as memory and execution barrier.
833 
834   DAG representation:
835 .vb
836   time ->
837 
838   -> dctx - |= CALL =| - dctx ->
839 .ve
840 
841 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()`
842 @*/
843 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
844 {
845   PetscFunctionBegin;
846   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
847   PetscCall(PetscLogEventBegin(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
848   /* if it isn't setup there is nothing to sync on */
849   if (dctx->setup) {
850     PetscUseTypeMethod(dctx, synchronize);
851     PetscCall(PetscDeviceContextSyncClearMap_Internal(dctx));
852   }
853   PetscCall(PetscLogEventEnd(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
854   PetscFunctionReturn(PETSC_SUCCESS);
855 }
856 
857 /* every device type has a vector of null PetscDeviceContexts -- one for each device */
858 static auto nullContexts          = std::array<std::vector<PetscDeviceContext>, PETSC_DEVICE_MAX>{};
859 static auto nullContextsFinalizer = false;
860 
861 static PetscErrorCode PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device, PetscDevice device, PetscDeviceContext *dctx)
862 {
863   PetscInt        devid;
864   PetscDeviceType dtype;
865 
866   PetscFunctionBegin;
867   PetscValidDevice(device, 2);
868   PetscAssertPointer(dctx, 3);
869   if (PetscUnlikely(!nullContextsFinalizer)) {
870     nullContextsFinalizer = true;
871     PetscCall(PetscRegisterFinalize([] {
872       PetscFunctionBegin;
873       for (auto &&dvec : nullContexts) {
874         for (auto &&dctx : dvec) PetscCall(PetscDeviceContextDestroy(&dctx));
875         PetscCallCXX(dvec.clear());
876       }
877       nullContextsFinalizer = false;
878       PetscFunctionReturn(PETSC_SUCCESS);
879     }));
880   }
881   PetscCall(PetscDeviceGetDeviceId(device, &devid));
882   PetscCall(PetscDeviceGetType(device, &dtype));
883   {
884     auto &ctxlist = nullContexts[dtype];
885 
886     PetscCheck(devid >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Device ID (%" PetscInt_FMT ") must be positive", devid);
887     // need to resize the container if not big enough because incrementing the iterator in
888     // std::next() (if we haven't initialized that ctx yet) may cause it to fall outside the
889     // current size of the container.
890     if (static_cast<std::size_t>(devid) >= ctxlist.size()) PetscCallCXX(ctxlist.resize(devid + 1));
891     if (PetscUnlikely(!ctxlist[devid])) {
892       // we have not seen this device before
893       PetscCall(PetscDeviceContextCreate(dctx));
894       PetscCall(PetscInfo(*dctx, "Initializing null PetscDeviceContext (of type %s) for device %" PetscInt_FMT "\n", PetscDeviceTypes[dtype], devid));
895       {
896         const auto pobj   = PetscObjectCast(*dctx);
897         const auto name   = "null context " + std::to_string(devid);
898         const auto prefix = "null_context_" + std::to_string(devid) + '_';
899 
900         PetscCall(PetscObjectSetName(pobj, name.c_str()));
901         PetscCall(PetscObjectSetOptionsPrefix(pobj, prefix.c_str()));
902       }
903       PetscCall(PetscDeviceContextSetStreamType(*dctx, PETSC_STREAM_DEFAULT));
904       PetscCall(PetscDeviceContextSetDevice_Private(*dctx, device, user_set_device));
905       PetscCall(PetscDeviceContextSetUp(*dctx));
906       // would use ctxlist.cbegin() but GCC 4.8 can't handle const iterator insert!
907       PetscCallCXX(ctxlist.insert(std::next(ctxlist.begin(), devid), *dctx));
908     } else *dctx = ctxlist[devid];
909   }
910   PetscFunctionReturn(PETSC_SUCCESS);
911 }
912 
913 /*
914   Gets the "NULL" context for the current PetscDeviceType and PetscDevice. NULL contexts are
915   guaranteed to always be globally blocking.
916 */
917 PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *dctx)
918 {
919   PetscDeviceContext gctx;
920   PetscDevice        gdev = nullptr;
921 
922   PetscFunctionBegin;
923   PetscAssertPointer(dctx, 1);
924   PetscCall(PetscDeviceContextGetCurrentContext(&gctx));
925   PetscCall(PetscDeviceContextGetDevice(gctx, &gdev));
926   PetscCall(PetscDeviceContextGetNullContextForDevice_Private(gctx->usersetdevice, gdev, dctx));
927   PetscFunctionReturn(PETSC_SUCCESS);
928 }
929 
930 /*@C
931   PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database
932 
933   Collective on `comm` or `dctx`
934 
935   Input Parameters:
936 + comm - MPI communicator on which to query the options database (optional)
937 - dctx - The `PetscDeviceContext` to configure
938 
939   Output Parameter:
940 . dctx - The `PetscDeviceContext`
941 
942   Options Database Keys:
943 + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` -
944    `PetscDeviceContextSetStreamType()`
945 - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType`
946 
947   Level: beginner
948 
949   Note:
950   The user may pass `MPI_COMM_NULL` for `comm` in which case the communicator of `dctx` is
951   used (which is always `PETSC_COMM_SELF`).
952 
953 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`,
954 `PetscDeviceContextView()`
955 @*/
956 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, PetscDeviceContext dctx)
957 {
958   const auto pobj     = PetscObjectCast(dctx);
959   auto       dtype    = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE);
960   auto       stype    = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE);
961   MPI_Comm   old_comm = PETSC_COMM_SELF;
962 
963   PetscFunctionBegin;
964   // do not user getoptionalnullcontext here, the user is not allowed to set it from options!
965   PetscValidDeviceContext(dctx, 2);
966   /* set the device type first */
967   if (const auto device = dctx->device) PetscCall(PetscDeviceGetType(device, &dtype.first));
968   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype.first));
969 
970   if (comm == MPI_COMM_NULL) {
971     PetscCall(PetscObjectGetComm(pobj, &comm));
972   } else {
973     // briefly set the communicator for dctx (it is always PETSC_COMM_SELF) so
974     // PetscObjectOptionsBegin() behaves as if dctx had comm
975     old_comm = Petsc::util::exchange(pobj->comm, comm);
976   }
977 
978   PetscObjectOptionsBegin(pobj);
979   PetscCall(PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype));
980   PetscOptionsEnd();
981   // reset the comm (should be PETSC_COMM_SELF)
982   if (comm != MPI_COMM_NULL) pobj->comm = old_comm;
983   if (dtype.second) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, dtype.first));
984   if (stype.second) PetscCall(PetscDeviceContextSetStreamType(dctx, stype.first));
985   PetscCall(PetscDeviceContextSetUp(dctx));
986   PetscFunctionReturn(PETSC_SUCCESS);
987 }
988 
989 /*@
990   PetscDeviceContextView - View a `PetscDeviceContext`
991 
992   Collective on `viewer`
993 
994   Input Parameters:
995 + dctx   - The `PetscDeviceContext`
996 - viewer - The `PetscViewer` to view `dctx` with (may be `NULL`)
997 
998   Level: beginner
999 
1000   Note:
1001   If `viewer` is `NULL`, `PETSC_VIEWER_STDOUT_WORLD` is used instead, in which case this
1002   routine is collective on `PETSC_COMM_WORLD`.
1003 
1004 .seealso: `PetscDeviceContextViewFromOptions()`, `PetscDeviceView()`, `PETSC_VIEWER_STDOUT_WORLD`, `PetscDeviceContextCreate()`
1005 @*/
1006 PetscErrorCode PetscDeviceContextView(PetscDeviceContext dctx, PetscViewer viewer)
1007 {
1008   PetscBool isascii;
1009 
1010   PetscFunctionBegin;
1011   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1012   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer));
1013   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1014   PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &isascii));
1015   if (isascii) {
1016     auto        stype = PETSC_STREAM_DEFAULT;
1017     PetscViewer sub;
1018 
1019     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub));
1020     PetscCall(PetscObjectPrintClassNamePrefixType(PetscObjectCast(dctx), sub));
1021     PetscCall(PetscViewerASCIIPushTab(sub));
1022     PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
1023     PetscCall(PetscViewerASCIIPrintf(sub, "stream type: %s\n", PetscStreamTypes[stype]));
1024     PetscCall(PetscViewerASCIIPrintf(sub, "children: %" PetscInt_FMT "\n", dctx->numChildren));
1025     if (const auto nchild = dctx->numChildren) {
1026       PetscCall(PetscViewerASCIIPushTab(sub));
1027       for (PetscInt i = 0; i < nchild; ++i) {
1028         if (i == nchild - 1) {
1029           PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT, dctx->childIDs[i]));
1030         } else {
1031           PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT ", ", dctx->childIDs[i]));
1032         }
1033       }
1034     }
1035     PetscCall(PetscViewerASCIIPopTab(sub));
1036     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub));
1037     PetscCall(PetscViewerASCIIPushTab(viewer));
1038   }
1039   if (const auto device = dctx->device) PetscCall(PetscDeviceView(device, viewer));
1040   if (isascii) PetscCall(PetscViewerASCIIPopTab(viewer));
1041   PetscFunctionReturn(PETSC_SUCCESS);
1042 }
1043 
1044 /*@
1045   PetscDeviceContextViewFromOptions - View a `PetscDeviceContext` from options
1046 
1047   Input Parameters:
1048 + dctx - The `PetscDeviceContext` to view
1049 . obj  - Optional `PetscObject` to associate (may be `NULL`)
1050 - name - The command line option
1051 
1052   Level: beginner
1053 
1054 .seealso: `PetscDeviceContextView()`, `PetscObjectViewFromOptions()`, `PetscDeviceContextCreate()`
1055 @*/
1056 PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext dctx, PetscObject obj, const char name[])
1057 {
1058   PetscFunctionBegin;
1059   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1060   if (obj) PetscValidHeader(obj, 2);
1061   PetscAssertPointer(name, 3);
1062   PetscCall(PetscObjectViewFromOptions(PetscObjectCast(dctx), obj, name));
1063   PetscFunctionReturn(PETSC_SUCCESS);
1064 }
1065 
1066 /*@C
1067   PetscDeviceContextGetStreamHandle - Return a handle to the underlying stream of the current device context
1068 
1069   Input Parameter:
1070 . dctx - The `PetscDeviceContext` to get the stream from
1071 
1072   Output Parameter:
1073 . handle - A pointer to the handle to the stream
1074 
1075   Level: developer
1076 
1077   Note:
1078   This routine is dangerous. It exists only for the most experienced users and
1079   internal PETSc development.
1080 
1081   There is no way for PETSc's auto-dependency system to track what the caller does with the
1082   stream.
1083 
1084   If the user uses the stream to copy memory that was previously modified by PETSc, or launches
1085   kernels that modify memory with the stream, it is the users responsibility to inform PETSc of
1086   their actions via `PetscDeviceContextMarkIntentFromID()`. Failure to do so may introduce a
1087   race condition. This race condition may manifest in nondeterministic ways.
1088 
1089   Alternatively, the user may synchronize the stream immediately before and after use. This is
1090   the safest option.
1091 
1092   Example Usage:
1093 .vb
1094   PetscDeviceContext dctx;
1095   PetscDeviceType    type;
1096   void               *handle;
1097 
1098   PetscDeviceContextGetCurrentContext(&dctx);
1099   PetscDeviceContextGetStreamHandle(dctx, &handle);
1100   PetscDeviceContextGetDeviceType(dctx, &type);
1101 
1102   if (type == PETSC_DEVICE_CUDA) {
1103     cudaStream_t stream = *(cudaStream_t *)handle;
1104 
1105     my_cuda_kernel<<<1, 2, 3, stream>>>();
1106   }
1107 .ve
1108   Alternatively, if type of `PetscDeviceContext` is known (for example `PETSC_DEVICE_HIP`), the
1109   user may pass in a pointer to stream handle directly\:
1110 .vb
1111   hipStream_t *stream;
1112 
1113   // note the cast to void **
1114   PetscDeviceContextGetStreamHandle(dctx, (void **)&stream);
1115   // note the dereference
1116   my_hip_kernel<<<1, 2, 3, *stream>>>();
1117 .ve
1118 
1119 .N ASYNC_API
1120 
1121 .seealso: `PetscDeviceContext`
1122 @*/
1123 PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext dctx, void **handle)
1124 {
1125   PetscFunctionBegin;
1126   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1127   PetscAssertPointer(handle, 2);
1128   PetscCall(PetscDeviceContextGetStreamHandle_Internal(dctx, handle));
1129   PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131