xref: /petsc/src/sys/objects/device/interface/dcontext.cxx (revision 47f0ea9ef136739b95e023b641b40b5b205c25bf)
1 #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/
2 #include <petsc/private/viewerimpl.h>         // _p_PetscViewer for PetscObjectCast()
3 
4 #include <petsc/private/cpp/object_pool.hpp>
5 #include <petsc/private/cpp/utility.hpp>
6 #include <petsc/private/cpp/array.hpp>
7 
8 #include <vector>
9 #include <string> // std::to_string among other things
10 
11 /* Define the allocator */
12 class PetscDeviceContextConstructor : public Petsc::ConstructorInterface<_p_PetscDeviceContext, PetscDeviceContextConstructor> {
13 public:
14   PetscErrorCode construct_(PetscDeviceContext dctx) const noexcept
15   {
16     PetscFunctionBegin;
17     PetscCall(PetscArrayzero(dctx, 1));
18     PetscCall(PetscHeaderCreate_Private((PetscObject)dctx, PETSC_DEVICE_CONTEXT_CLASSID, "PetscDeviceContext", "PetscDeviceContext", "Sys", PETSC_COMM_SELF, (PetscObjectDestroyFunction)PetscDeviceContextDestroy, (PetscObjectViewFunction)PetscDeviceContextView));
19     PetscCall(PetscLogObjectCreate((PetscObject)dctx));
20 
21     PetscCallCXX(PetscObjectCast(dctx)->cpp = new CxxData{dctx});
22     PetscCall(underlying().reset(dctx, false));
23     PetscFunctionReturn(PETSC_SUCCESS);
24   }
25 
26   static PetscErrorCode destroy_(PetscDeviceContext dctx) noexcept
27   {
28     PetscFunctionBegin;
29     PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren);
30     PetscTryTypeMethod(dctx, destroy);
31     PetscCall(PetscDeviceDestroy(&dctx->device));
32     PetscCall(PetscFree(dctx->childIDs));
33     delete CxxDataCast(dctx);
34     PetscCall(PetscHeaderDestroy_Private(PetscObjectCast(dctx), PETSC_FALSE));
35     PetscFunctionReturn(PETSC_SUCCESS);
36   }
37 
38   static PetscErrorCode reset_(PetscDeviceContext dctx, bool zero = true) noexcept
39   {
40     PetscFunctionBegin;
41     if (zero) {
42       // reset the device if the user set it
43       if (Petsc::util::exchange(dctx->usersetdevice, PETSC_FALSE)) {
44         PetscTryTypeMethod(dctx, destroy);
45         PetscCall(PetscDeviceDestroy(&dctx->device));
46         PetscCall(PetscArrayzero(dctx->ops, 1));
47         dctx->data = nullptr;
48       }
49       PetscCall(PetscHeaderReset_Internal(PetscObjectCast(dctx)));
50       dctx->numChildren = 0;
51       dctx->setup       = PETSC_FALSE;
52       // don't deallocate the child array, rather just zero it out
53       PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren));
54       PetscCall(CxxDataCast(dctx)->clear());
55       PetscCall(CxxDataCast(dctx)->reset_self(dctx));
56     }
57     dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING;
58     PetscFunctionReturn(PETSC_SUCCESS);
59   }
60 
61   static PetscErrorCode invalidate_(PetscDeviceContext dctx) noexcept
62   {
63     PetscFunctionBegin;
64     PetscCall(CxxDataCast(dctx)->reset_self(dctx));
65     PetscFunctionReturn(PETSC_SUCCESS);
66   }
67 };
68 
69 static Petsc::ObjectPool<_p_PetscDeviceContext, PetscDeviceContextConstructor> contextPool;
70 
71 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
72 /*@C
73   PetscDeviceContextCreate - Creates a `PetscDeviceContext`
74 
75   Not Collective
76 
77   Output Parameter:
78 . dctx - The `PetscDeviceContext`
79 
80   Level: beginner
81 
82   Note:
83   Unlike almost every other PETSc class it is advised that most users use
84   `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts of
85   different types are incompatible with one another; using `PetscDeviceContextDuplicate()`
86   ensures compatible types.
87 
88   DAG representation:
89 .vb
90   time ->
91 
92   |= CALL =| - dctx ->
93 .ve
94 
95 .N ASYNC_API
96 
97 .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`,
98 `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`,
99 `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextView()`, `PetscDeviceContextDestroy()`
100 @*/
101 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
102 {
103   PetscFunctionBegin;
104   PetscAssertPointer(dctx, 1);
105   PetscCall(PetscDeviceInitializePackage());
106   PetscCall(PetscLogEventBegin(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
107   PetscCall(contextPool.allocate(dctx));
108   PetscCall(PetscLogEventEnd(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
113 /*@C
114   PetscDeviceContextDestroy - Frees a `PetscDeviceContext`
115 
116   Not Collective
117 
118   Input Parameter:
119 . dctx - The `PetscDeviceContext`
120 
121   Level: beginner
122 
123   Notes:
124   No implicit synchronization occurs due to this routine, all resources are released completely
125   asynchronously w.r.t. the host. If one needs to guarantee access to the data produced on
126   `dctx`'s stream the user is responsible for calling `PetscDeviceContextSynchronize()` before
127   calling this routine.
128 
129   DAG representation:
130 .vb
131   time ->
132 
133   -> dctx - |= CALL =|
134 .ve
135 
136   Developer Notes:
137   `dctx` is never actually "destroyed" in the classical sense. It is returned to an ever
138   growing pool of `PetscDeviceContext`s. There are currently no limits on the size of the pool,
139   this should perhaps be implemented.
140 
141 .N ASYNC_API
142 
143 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
144 `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()`
145 @*/
146 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
147 {
148   PetscFunctionBegin;
149   PetscAssertPointer(dctx, 1);
150   if (!*dctx) PetscFunctionReturn(PETSC_SUCCESS);
151   PetscCall(PetscLogEventBegin(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
152   if (--(PetscObjectCast(*dctx)->refct) <= 0) {
153     PetscCall(PetscDeviceContextCheckNotOrphaned_Internal(*dctx));
154     PetscCall(contextPool.deallocate(dctx));
155   }
156   PetscCall(PetscLogEventEnd(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
157   *dctx = nullptr;
158   PetscFunctionReturn(PETSC_SUCCESS);
159 }
160 
161 /*@C
162   PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a
163   `PetscDeviceContext`
164 
165   Not Collective
166 
167   Input Parameters:
168 + dctx - The `PetscDeviceContext`
169 - type - The `PetscStreamType`
170 
171   Level: beginner
172 
173   Note:
174   See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
175   types and their interactions. If the `PetscDeviceContext` was previously set up and stream
176   type was changed, you must call `PetscDeviceContextSetUp()` again after this routine.
177 
178 .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`,
179 `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()`
180 @*/
181 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
182 {
183   PetscFunctionBegin;
184   // do not use getoptionalnullcontext here since we do not want the user to change the stream
185   // type
186   PetscValidDeviceContext(dctx, 1);
187   PetscValidStreamType(type, 2);
188   // only need to do complex swapping if the object has already been setup
189   if (dctx->setup && (dctx->streamType != type)) {
190     dctx->setup = PETSC_FALSE;
191     PetscCall(PetscLogEventBegin(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
192     PetscUseTypeMethod(dctx, changestreamtype, type);
193     PetscCall(PetscLogEventEnd(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
194   }
195   dctx->streamType = type;
196   PetscFunctionReturn(PETSC_SUCCESS);
197 }
198 
199 /*@C
200   PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a
201   `PetscDeviceContext`
202 
203   Not Collective
204 
205   Input Parameter:
206 . dctx - The `PetscDeviceContext`
207 
208   Output Parameter:
209 . type - The `PetscStreamType`
210 
211   Level: beginner
212 
213   Note:
214   See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
215   types and their interactions
216 
217 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`,
218 `PetscDeviceContextSetFromOptions()`
219 @*/
220 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
221 {
222   PetscFunctionBegin;
223   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
224   PetscAssertPointer(type, 2);
225   *type = dctx->streamType;
226   PetscFunctionReturn(PETSC_SUCCESS);
227 }
228 
229 /*
230   Actual function to set the device.
231 
232   1. Repeatedly destroying and recreating internal data structures (like streams and events)
233      for recycled PetscDeviceContexts is not free. If done often, it does add up.
234   2. The vast majority of PetscDeviceContexts are created by PETSc either as children or
235      default contexts. The default contexts *never* change type, and the children are extremely
236      unlikely to (chances are if you fork once, you will fork again very soon).
237   3. The only time this calculus changes is if the user themselves sets the device type. In
238      this case we do not know what the user has changed, so must always wipe the slate clean.
239 
240   Thus we need to keep track whether the user explicitly sets the device contexts device.
241 */
242 static PetscErrorCode PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx, PetscDevice device, PetscBool user_set)
243 {
244   PetscFunctionBegin;
245   // do not use getoptionalnullcontext here since we do not want the user to change its device
246   PetscValidDeviceContext(dctx, 1);
247   PetscValidDevice(device, 2);
248   if (dctx->device && (dctx->device->id == device->id)) PetscFunctionReturn(PETSC_SUCCESS);
249   PetscCall(PetscLogEventBegin(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
250   PetscTryTypeMethod(dctx, destroy);
251   PetscCall(PetscDeviceDestroy(&dctx->device));
252   PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops)));
253   PetscCall(PetscDeviceReference_Internal(device));
254   // set it before calling the method
255   dctx->device = device;
256   PetscCall((*device->ops->createcontext)(dctx));
257   PetscCall(PetscLogEventEnd(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
258   dctx->setup         = PETSC_FALSE;
259   dctx->usersetdevice = user_set;
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
263 PetscErrorCode PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx, PetscDeviceType type)
264 {
265   PetscDevice device;
266 
267   PetscFunctionBegin;
268   PetscCall(PetscDeviceGetDefaultForType_Internal(type, &device));
269   PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_FALSE));
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 /*@C
274   PetscDeviceContextSetDevice - Set the underlying `PetscDevice` for a `PetscDeviceContext`
275 
276   Not Collective
277 
278   Input Parameters:
279 + dctx   - The `PetscDeviceContext`
280 - device - The `PetscDevice`
281 
282   Level: intermediate
283 
284   Notes:
285   This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` must
286   also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is not strictly
287   necessary to set a contexts device to enable usage, any created `PetscDeviceContext`s will
288   always come equipped with the "default" device.
289 
290   This routine is a no-op if `device` is already attached to `dctx`.
291 
292   This routine may (but is very unlikely to) initialize the backend device and may incur
293   synchronization.
294 
295 .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`,
296 `PetscDeviceContextGetDeviceType()`
297 @*/
298 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
299 {
300   PetscFunctionBegin;
301   PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_TRUE));
302   PetscFunctionReturn(PETSC_SUCCESS);
303 }
304 
305 /*@C
306   PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext`
307 
308   Not Collective
309 
310   Input Parameter:
311 . dctx - the `PetscDeviceContext`
312 
313   Output Parameter:
314 . device - The `PetscDevice`
315 
316   Level: intermediate
317 
318   Note:
319   This is a borrowed reference, the user should not destroy `device`.
320 
321 .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`, `PetscDeviceContextGetDeviceType()`
322 @*/
323 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
324 {
325   PetscFunctionBegin;
326   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
327   PetscAssertPointer(device, 2);
328   PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt64_FMT " has no attached PetscDevice to get", PetscObjectCast(dctx)->id);
329   *device = dctx->device;
330   PetscFunctionReturn(PETSC_SUCCESS);
331 }
332 
333 /*@C
334   PetscDeviceContextGetDeviceType - Get the `PetscDeviceType` for a `PetscDeviceContext`
335 
336   Not Collective
337 
338   Input Parameter:
339 . dctx - The `PetscDeviceContext`
340 
341   Output Parameter:
342 . type - The `PetscDeviceType`
343 
344   Level: beginner
345 
346   Note:
347   This routine is a convenience shorthand for `PetscDeviceContextGetDevice()` ->
348   `PetscDeviceGetType()`.
349 
350 .seealso: `PetscDeviceType`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetType()`, `PetscDevice`
351 @*/
352 PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext dctx, PetscDeviceType *type)
353 {
354   PetscDevice device = nullptr;
355 
356   PetscFunctionBegin;
357   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
358   PetscAssertPointer(type, 2);
359   PetscCall(PetscDeviceContextGetDevice(dctx, &device));
360   PetscCall(PetscDeviceGetType(device, type));
361   PetscFunctionReturn(PETSC_SUCCESS);
362 }
363 
364 /*@C
365   PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use
366 
367   Not Collective
368 
369   Input Parameter:
370 . dctx - The `PetscDeviceContext`
371 
372   Level: beginner
373 
374   Developer Notes:
375   This routine is usually the stage where a `PetscDeviceContext` acquires device-side data
376   structures such as streams, events, and (possibly) handles.
377 
378 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
379 `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()`
380 @*/
381 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
382 {
383   PetscFunctionBegin;
384   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
385   if (dctx->setup) PetscFunctionReturn(PETSC_SUCCESS);
386   if (!dctx->device) {
387     const auto default_dtype = PETSC_DEVICE_DEFAULT();
388 
389     PetscCall(PetscInfo(dctx, "PetscDeviceContext %" PetscInt64_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", PetscObjectCast(dctx)->id, PetscDeviceTypes[default_dtype]));
390     PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, default_dtype));
391   }
392   PetscCall(PetscLogEventBegin(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
393   PetscUseTypeMethod(dctx, setup);
394   PetscCall(PetscLogEventEnd(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
395   dctx->setup = PETSC_TRUE;
396   PetscFunctionReturn(PETSC_SUCCESS);
397 }
398 
399 static PetscErrorCode PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx, PetscStreamType stype, PetscDeviceContext *dctxdup)
400 {
401   PetscFunctionBegin;
402   PetscCall(PetscLogEventBegin(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
403   PetscCall(PetscDeviceContextCreate(dctxdup));
404   PetscCall(PetscDeviceContextSetStreamType(*dctxdup, stype));
405   if (const auto device = dctx->device) PetscCall(PetscDeviceContextSetDevice_Private(*dctxdup, device, dctx->usersetdevice));
406   PetscCall(PetscDeviceContextSetUp(*dctxdup));
407   PetscCall(PetscLogEventEnd(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
408   PetscFunctionReturn(PETSC_SUCCESS);
409 }
410 
411 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
412 /*@C
413   PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object
414 
415   Not Collective
416 
417   Input Parameter:
418 . dctx - The `PetscDeviceContext` to duplicate
419 
420   Output Parameter:
421 . dctxdup - The duplicated `PetscDeviceContext`
422 
423   Level: beginner
424 
425   Notes:
426   This is a shorthand method for creating a `PetscDeviceContext` with the exact same settings as
427   another. Note however that `dctxdup` does not share any of the underlying data with `dctx`,
428   (including its current stream-state) they are completely separate objects.
429 
430   There is no implied ordering between `dctx` or `dctxdup`.
431 
432   DAG representation:
433 .vb
434   time ->
435 
436   -> dctx - |= CALL =| - dctx ---->
437                        - dctxdup ->
438 .ve
439 
440 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
441 `PetscDeviceContextSetStreamType()`
442 @*/
443 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
444 {
445   auto stype = PETSC_STREAM_DEFAULT_BLOCKING;
446 
447   PetscFunctionBegin;
448   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
449   PetscAssertPointer(dctxdup, 2);
450   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
451   PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, dctxdup));
452   PetscFunctionReturn(PETSC_SUCCESS);
453 }
454 
455 /*@C
456   PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle
457 
458   Not Collective
459 
460   Input Parameter:
461 . dctx - The `PetscDeviceContext`
462 
463   Output Parameter:
464 . idle - `PETSC_TRUE` if `dctx` has NO work, `PETSC_FALSE` if it has work
465 
466   Level: intermediate
467 
468   Note:
469   This routine only refers a singular context and does NOT take any of its children into
470   account. That is, if `dctx` is idle but has dependents who do have work this routine still
471   returns `PETSC_TRUE`.
472 
473 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()`
474 @*/
475 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
476 {
477   PetscFunctionBegin;
478   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
479   PetscAssertPointer(idle, 2);
480   PetscCall(PetscLogEventBegin(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
481   PetscUseTypeMethod(dctx, query, idle);
482   PetscCall(PetscLogEventEnd(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
483   PetscCall(PetscInfo(dctx, "PetscDeviceContext ('%s', id %" PetscInt64_FMT ") %s idle\n", PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, *idle ? "was" : "was not"));
484   PetscFunctionReturn(PETSC_SUCCESS);
485 }
486 
487 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
488 /*@C
489   PetscDeviceContextWaitForContext - Make one context wait for another context to finish
490 
491   Not Collective
492 
493   Input Parameters:
494 + dctxa - The `PetscDeviceContext` object that is waiting
495 - dctxb - The `PetscDeviceContext` object that is being waited on
496 
497   Level: beginner
498 
499   Notes:
500   Serializes two `PetscDeviceContext`s. Serialization is performed asynchronously; the host
501   does not wait for the serialization to actually occur.
502 
503   This routine uses only the state of `dctxb` at the moment this routine was called, so any
504   future work queued will not affect `dctxa`. It is safe to pass the same context to both
505   arguments (in which case this routine does nothing).
506 
507   DAG representation:
508 .vb
509   time ->
510 
511   -> dctxa ---/- |= CALL =| - dctxa ->
512              /
513   -> dctxb -/------------------------>
514 .ve
515 
516 .N ASYNC_API
517 
518 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()`
519 @*/
520 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
521 {
522   PetscObjectId bid;
523 
524   PetscFunctionBegin;
525   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxa));
526   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxb));
527   PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2);
528   if (dctxa == dctxb) PetscFunctionReturn(PETSC_SUCCESS);
529   bid = PetscObjectCast(dctxb)->id;
530   PetscCall(PetscLogEventBegin(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
531   PetscUseTypeMethod(dctxa, waitforcontext, dctxb);
532   PetscCallCXX(CxxDataCast(dctxa)->upstream()[bid] = CxxDataCast(dctxb)->weak_snapshot());
533   PetscCall(PetscLogEventEnd(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
534   PetscCall(PetscInfo(dctxa, "dctx %" PetscInt64_FMT " waiting on dctx %" PetscInt64_FMT "\n", PetscObjectCast(dctxa)->id, bid));
535   PetscCall(PetscObjectStateIncrease(PetscObjectCast(dctxa)));
536   PetscFunctionReturn(PETSC_SUCCESS);
537 }
538 
539 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
540 /*@C
541   PetscDeviceContextForkWithStreamType - Create a set of dependent child contexts from a parent
542   context with a prescribed `PetscStreamType`
543 
544   Not Collective, Asynchronous
545 
546   Input Parameters:
547 + dctx  - The parent `PetscDeviceContext`
548 . stype - The prescribed `PetscStreamType`
549 - n     - The number of children to create
550 
551   Output Parameter:
552 . dsub - The created child context(s)
553 
554   Level: intermediate
555 
556   Notes:
557   This routine creates `n` edges of a DAG from a source node which are causally dependent on the
558   source node. This causal dependency is established as-if by calling
559   `PetscDeviceContextWaitForContext()` on every child.
560 
561   `dsub` is allocated by this routine and has its lifetime bounded by `dctx`. That is, `dctx`
562   expects to free `dsub` (via `PetscDeviceContextJoin()`) before it itself is destroyed.
563 
564   This routine only accounts for work queued on `dctx` up until calling this routine, any
565   subsequent work enqueued on `dctx` has no effect on `dsub`.
566 
567   The `PetscStreamType` of `dctx` does not have to equal `stype`. In fact, it is often the case
568   that they are different. This is useful in cases where a routine can locally exploit stream
569   parallelism without needing to worry about what stream type the incoming `PetscDeviceContext`
570   carries.
571 
572   DAG representation:
573 .vb
574   time ->
575 
576   -> dctx - |= CALL =| -\----> dctx ------>
577                          \---> dsub[0] --->
578                           \--> ... ------->
579                            \-> dsub[n-1] ->
580 .ve
581 
582 .N ASYNC_API
583 
584 .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`,
585 `PetscDeviceContextQueryIdle()`, `PetscDeviceContextWaitForContext()`
586 @*/
587 PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx, PetscStreamType stype, PetscInt n, PetscDeviceContext **dsub)
588 {
589   // debugging only
590   std::string idList;
591   auto        ninput = n;
592 
593   PetscFunctionBegin;
594   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
595   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n);
596   PetscAssertPointer(dsub, 4);
597   *dsub = nullptr;
598   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
599   if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
600   PetscCall(PetscLogEventBegin(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
601   /* update child totals */
602   dctx->numChildren += n;
603   /* now to find out if we have room */
604   if (dctx->numChildren > dctx->maxNumChildren) {
605     const auto numChildren    = dctx->numChildren;
606     auto      &maxNumChildren = dctx->maxNumChildren;
607     auto       numAllocated   = numChildren;
608 
609     /* no room, either from having too many kids or not having any */
610     if (auto &childIDs = dctx->childIDs) {
611       // the difference is backwards because we have not updated maxNumChildren yet
612       numAllocated -= maxNumChildren;
613       /* have existing children, must reallocate them */
614       PetscCall(PetscRealloc(numChildren * sizeof(*childIDs), &childIDs));
615       /* clear the extra memory since realloc doesn't do it for us */
616       PetscCall(PetscArrayzero(std::next(childIDs, maxNumChildren), numAllocated));
617     } else {
618       /* have no children */
619       PetscCall(PetscCalloc1(numChildren, &childIDs));
620     }
621     /* update total number of children */
622     maxNumChildren = numChildren;
623   }
624   PetscCall(PetscMalloc1(n, dsub));
625   for (PetscInt i = 0; ninput && (i < dctx->numChildren); ++i) {
626     auto &childID = dctx->childIDs[i];
627     /* empty child slot */
628     if (!childID) {
629       auto &childctx = (*dsub)[i];
630 
631       /* create the child context in the image of its parent */
632       PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, &childctx));
633       PetscCall(PetscDeviceContextWaitForContext(childctx, dctx));
634       /* register the child with its parent */
635       PetscCall(PetscObjectGetId(PetscObjectCast(childctx), &childID));
636       if (PetscDefined(USE_DEBUG_AND_INFO)) {
637         PetscCallCXX(idList += std::to_string(childID));
638         if (ninput != 1) PetscCallCXX(idList += ", ");
639       }
640       --ninput;
641     }
642   }
643   PetscCall(PetscLogEventEnd(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
644   PetscCall(PetscDebugInfo(dctx, "Forked %" PetscInt_FMT " children from parent %" PetscInt64_FMT " with IDs: %s\n", n, PetscObjectCast(dctx)->id, idList.c_str()));
645   PetscFunctionReturn(PETSC_SUCCESS);
646 }
647 
648 /*@C
649   PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
650 
651   Not Collective, Asynchronous
652 
653   Input Parameters:
654 + dctx - The parent `PetscDeviceContext`
655 - n    - The number of children to create
656 
657   Output Parameter:
658 . dsub - The created child context(s)
659 
660   Level: beginner
661 
662   Notes:
663   Behaves identically to `PetscDeviceContextForkWithStreamType()` except that the prescribed
664   `PetscStreamType` is taken from `dctx`. In effect this routine is shorthand for\:
665 
666 .vb
667   PetscStreamType stype;
668 
669   PetscDeviceContextGetStreamType(dctx, &stype);
670   PetscDeviceContextForkWithStreamType(dctx, stype, ...);
671 .ve
672 
673 .N ASYNC_API
674 
675 .seealso: `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextJoin()`,
676 `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()`
677 @*/
678 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
679 {
680   auto stype = PETSC_STREAM_DEFAULT_BLOCKING;
681 
682   PetscFunctionBegin;
683   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
684   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
685   PetscCall(PetscDeviceContextForkWithStreamType(dctx, stype, n, dsub));
686   PetscFunctionReturn(PETSC_SUCCESS);
687 }
688 
689 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
690 /*@C
691   PetscDeviceContextJoin - Converge a set of child contexts
692 
693   Not Collective, Asynchronous
694 
695   Input Parameters:
696 + dctx     - A `PetscDeviceContext` to converge on
697 . n        - The number of sub contexts to converge
698 . joinMode - The type of join to perform
699 - dsub     - The sub contexts to converge
700 
701   Level: beginner
702 
703   Notes:
704   If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the source
705   node, then this routine is the exact mirror. That is, it creates a node (represented in `dctx`)
706   which receives `n` edges (and optionally destroys them) which is dependent on the completion
707   of all incoming edges.
708 
709   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`. All contexts in `dsub` will be
710   destroyed by this routine. Thus all sub contexts must have been created with the `dctx`
711   passed to this routine.
712 
713   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`. All sub contexts will additionally wait on
714   `dctx` after converging. This has the effect of "synchronizing" the outgoing edges. Note the
715   sync suffix does NOT refer to the host, i.e. this routine does NOT call
716   `PetscDeviceSynchronize()`.
717 
718   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`. `dctx` waits for all sub contexts but
719   the sub contexts do not wait for one another or `dctx` afterwards.
720 
721   DAG representations:
722   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`
723 .vb
724   time ->
725 
726   -> dctx ---------/- |= CALL =| - dctx ->
727   -> dsub[0] -----/
728   ->  ... -------/
729   -> dsub[n-1] -/
730 .ve
731   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`
732 .vb
733   time ->
734 
735   -> dctx ---------/- |= CALL =| -\----> dctx ------>
736   -> dsub[0] -----/                \---> dsub[0] --->
737   ->  ... -------/                  \--> ... ------->
738   -> dsub[n-1] -/                    \-> dsub[n-1] ->
739 .ve
740   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`
741 .vb
742   time ->
743 
744   -> dctx ----------/- |= CALL =| - dctx ->
745   -> dsub[0] ------/----------------------->
746   ->  ... --------/------------------------>
747   -> dsub[n-1] --/------------------------->
748 .ve
749 
750 .N ASYNC_API
751 
752 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextForkWithStreamType()`,
753 `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode`
754 @*/
755 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
756 {
757   // debugging only
758   std::string idList;
759 
760   PetscFunctionBegin;
761   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
762   /* validity of dctx is checked in the wait-for loop */
763   PetscAssertPointer(dsub, 4);
764   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n);
765   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
766   if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
767   /* first dctx waits on all the incoming edges */
768   PetscCall(PetscLogEventBegin(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
769   for (PetscInt i = 0; i < n; ++i) {
770     PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4);
771     PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i]));
772     if (PetscDefined(USE_DEBUG_AND_INFO)) {
773       PetscCallCXX(idList += std::to_string(PetscObjectCast((*dsub)[i])->id));
774       if (i + 1 < n) PetscCallCXX(idList += ", ");
775     }
776   }
777 
778   /* now we handle the aftermath */
779   switch (joinMode) {
780   case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: {
781     const auto children = dctx->childIDs;
782     const auto maxchild = dctx->maxNumChildren;
783     auto      &nchild   = dctx->numChildren;
784     PetscInt   j        = 0;
785 
786     PetscCheck(n <= nchild, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, nchild);
787     /* update child count while it's still fresh in memory */
788     nchild -= n;
789     for (PetscInt i = 0; i < maxchild; ++i) {
790       if (children[i] && (children[i] == PetscObjectCast((*dsub)[j])->id)) {
791         /* child is one of ours, can destroy it */
792         PetscCall(PetscDeviceContextDestroy((*dsub) + j));
793         /* reset the child slot */
794         children[i] = 0;
795         if (++j == n) break;
796       }
797     }
798     /* gone through the loop but did not find every child */
799     PetscCheck(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in", n - j);
800     PetscCall(PetscFree(*dsub));
801   } break;
802   case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
803     for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx));
804   case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
805     break;
806   default:
807     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given");
808   }
809   PetscCall(PetscLogEventEnd(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
810 
811   PetscCall(PetscDebugInfo(dctx, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt64_FMT ", mode %s with IDs: %s\n", n, PetscObjectCast(dctx)->id, PetscDeviceContextJoinModes[joinMode], idList.c_str()));
812   PetscFunctionReturn(PETSC_SUCCESS);
813 }
814 
815 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
816 /*@C
817   PetscDeviceContextSynchronize - Block the host until all work queued on a
818   `PetscDeviceContext` has finished
819 
820   Not Collective
821 
822   Input Parameter:
823 . dctx - The `PetscDeviceContext` to synchronize
824 
825   Level: beginner
826 
827   Notes:
828   The host will not return from this routine until `dctx` is idle. Any and all memory
829   operations queued on or otherwise associated with (either explicitly or implicitly via
830   dependencies) are guaranteed to have finished and be globally visible on return.
831 
832   In effect, this routine serves as memory and execution barrier.
833 
834   DAG representation:
835 .vb
836   time ->
837 
838   -> dctx - |= CALL =| - dctx ->
839 .ve
840 
841 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()`
842 @*/
843 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
844 {
845   PetscFunctionBegin;
846   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
847   PetscCall(PetscLogEventBegin(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
848   /* if it isn't setup there is nothing to sync on */
849   if (dctx->setup) {
850     PetscUseTypeMethod(dctx, synchronize);
851     PetscCall(PetscDeviceContextSyncClearMap_Internal(dctx));
852   }
853   PetscCall(PetscLogEventEnd(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
854   PetscFunctionReturn(PETSC_SUCCESS);
855 }
856 
857 /* every device type has a vector of null PetscDeviceContexts -- one for each device */
858 static auto nullContexts          = std::array<std::vector<PetscDeviceContext>, PETSC_DEVICE_MAX>{};
859 static auto nullContextsFinalizer = false;
860 
861 static PetscErrorCode PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device, PetscDevice device, PetscDeviceContext *dctx)
862 {
863   PetscInt        devid;
864   PetscDeviceType dtype;
865 
866   PetscFunctionBegin;
867   PetscValidDevice(device, 2);
868   PetscAssertPointer(dctx, 3);
869   if (PetscUnlikely(!nullContextsFinalizer)) {
870     nullContextsFinalizer = true;
871     PetscCall(PetscRegisterFinalize([] {
872       PetscFunctionBegin;
873       for (auto &&dvec : nullContexts) {
874         for (auto &&dctx : dvec) PetscCall(PetscDeviceContextDestroy(&dctx));
875         PetscCallCXX(dvec.clear());
876       }
877       nullContextsFinalizer = false;
878       PetscFunctionReturn(PETSC_SUCCESS);
879     }));
880   }
881   PetscCall(PetscDeviceGetDeviceId(device, &devid));
882   PetscCall(PetscDeviceGetType(device, &dtype));
883   {
884     auto &ctxlist = nullContexts[dtype];
885 
886     PetscCheck(devid >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Device ID (%" PetscInt_FMT ") must be positive", devid);
887     // need to resize the container if not big enough because incrementing the iterator in
888     // std::next() (if we haven't initialized that ctx yet) may cause it to fall outside the
889     // current size of the container.
890     if (static_cast<std::size_t>(devid) >= ctxlist.size()) PetscCallCXX(ctxlist.resize(devid + 1));
891     if (PetscUnlikely(!ctxlist[devid])) {
892       // we have not seen this device before
893       PetscCall(PetscDeviceContextCreate(dctx));
894       PetscCall(PetscInfo(*dctx, "Initializing null PetscDeviceContext (of type %s) for device %" PetscInt_FMT "\n", PetscDeviceTypes[dtype], devid));
895       {
896         const auto pobj   = PetscObjectCast(*dctx);
897         const auto name   = "null context " + std::to_string(devid);
898         const auto prefix = "null_context_" + std::to_string(devid) + '_';
899 
900         PetscCall(PetscObjectSetName(pobj, name.c_str()));
901         PetscCall(PetscObjectSetOptionsPrefix(pobj, prefix.c_str()));
902       }
903       PetscCall(PetscDeviceContextSetStreamType(*dctx, PETSC_STREAM_GLOBAL_BLOCKING));
904       PetscCall(PetscDeviceContextSetDevice_Private(*dctx, device, user_set_device));
905       PetscCall(PetscDeviceContextSetUp(*dctx));
906       // would use ctxlist.cbegin() but GCC 4.8 can't handle const iterator insert!
907       PetscCallCXX(ctxlist.insert(std::next(ctxlist.begin(), devid), *dctx));
908     } else *dctx = ctxlist[devid];
909   }
910   PetscFunctionReturn(PETSC_SUCCESS);
911 }
912 
913 /*
914   Gets the "NULL" context for the current PetscDeviceType and PetscDevice. NULL contexts are
915   guaranteed to always be globally blocking.
916 */
917 PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *dctx)
918 {
919   PetscDeviceContext gctx;
920   PetscDevice        gdev = nullptr;
921 
922   PetscFunctionBegin;
923   PetscAssertPointer(dctx, 1);
924   PetscCall(PetscDeviceContextGetCurrentContext(&gctx));
925   PetscCall(PetscDeviceContextGetDevice(gctx, &gdev));
926   PetscCall(PetscDeviceContextGetNullContextForDevice_Private(gctx->usersetdevice, gdev, dctx));
927   PetscFunctionReturn(PETSC_SUCCESS);
928 }
929 
930 /*@C
931   PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database
932 
933   Collective on `comm` or `dctx`
934 
935   Input Parameters:
936 + comm - MPI communicator on which to query the options database (optional)
937 - dctx - The `PetscDeviceContext` to configure
938 
939   Output Parameter:
940 . dctx - The `PetscDeviceContext`
941 
942   Options Database Keys:
943 + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` -
944    `PetscDeviceContextSetStreamType()`
945 - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType`
946 
947   Level: beginner
948 
949   Note:
950   The user may pass `MPI_COMM_NULL` for `comm` in which case the communicator of `dctx` is
951   used (which is always `PETSC_COMM_SELF`).
952 
953 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`,
954 `PetscDeviceContextView()`
955 @*/
956 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, PetscDeviceContext dctx)
957 {
958   const auto pobj     = PetscObjectCast(dctx);
959   auto       dtype    = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE);
960   auto       stype    = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE);
961   MPI_Comm   old_comm = PETSC_COMM_SELF;
962 
963   PetscFunctionBegin;
964   // do not user getoptionalnullcontext here, the user is not allowed to set it from options!
965   PetscValidDeviceContext(dctx, 2);
966   /* set the device type first */
967   if (const auto device = dctx->device) PetscCall(PetscDeviceGetType(device, &dtype.first));
968   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype.first));
969 
970   if (comm == MPI_COMM_NULL) {
971     PetscCall(PetscObjectGetComm(pobj, &comm));
972   } else {
973     // briefly set the communicator for dctx (it is always PETSC_COMM_SELF) so
974     // PetscObjectOptionsBegin() behaves as if dctx had comm
975     old_comm = Petsc::util::exchange(pobj->comm, comm);
976   }
977 
978   PetscObjectOptionsBegin(pobj);
979   PetscCall(PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype));
980   PetscOptionsEnd();
981   // reset the comm (should be PETSC_COMM_SELF)
982   if (comm != MPI_COMM_NULL) pobj->comm = old_comm;
983   if (dtype.second) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, dtype.first));
984   if (stype.second) PetscCall(PetscDeviceContextSetStreamType(dctx, stype.first));
985   PetscCall(PetscDeviceContextSetUp(dctx));
986   PetscFunctionReturn(PETSC_SUCCESS);
987 }
988 
989 /*@C
990   PetscDeviceContextView - View a `PetscDeviceContext`
991 
992   Collective on `viewer`
993 
994   Input Parameters:
995 + dctx   - The `PetscDeviceContext`
996 - viewer - The `PetscViewer` to view `dctx` with (may be `NULL`)
997 
998   Level: beginner
999 
1000   Note:
1001   If `viewer` is `NULL`, `PETSC_VIEWER_STDOUT_WORLD` is used instead, in which case this
1002   routine is collective on `PETSC_COMM_WORLD`.
1003 
1004 .seealso: `PetscDeviceContextViewFromOptions()`, `PetscDeviceView()`, `PETSC_VIEWER_STDOUT_WORLD`, `PetscDeviceContextCreate()`
1005 @*/
1006 PetscErrorCode PetscDeviceContextView(PetscDeviceContext dctx, PetscViewer viewer)
1007 {
1008   PetscBool iascii;
1009 
1010   PetscFunctionBegin;
1011   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1012   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer));
1013   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1014   PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &iascii));
1015   if (iascii) {
1016     auto        stype = PETSC_STREAM_DEFAULT_BLOCKING;
1017     PetscViewer sub;
1018 
1019     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub));
1020     PetscCall(PetscObjectPrintClassNamePrefixType(PetscObjectCast(dctx), sub));
1021     PetscCall(PetscViewerASCIIPushTab(sub));
1022     PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
1023     PetscCall(PetscViewerASCIIPrintf(sub, "stream type: %s\n", PetscStreamTypes[stype]));
1024     PetscCall(PetscViewerASCIIPrintf(sub, "children: %" PetscInt_FMT "\n", dctx->numChildren));
1025     if (const auto nchild = dctx->numChildren) {
1026       PetscCall(PetscViewerASCIIPushTab(sub));
1027       for (PetscInt i = 0; i < nchild; ++i) {
1028         if (i == nchild - 1) {
1029           PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT, dctx->childIDs[i]));
1030         } else {
1031           PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT ", ", dctx->childIDs[i]));
1032         }
1033       }
1034     }
1035     PetscCall(PetscViewerASCIIPopTab(sub));
1036     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub));
1037     PetscCall(PetscViewerFlush(viewer));
1038     PetscCall(PetscViewerASCIIPushTab(viewer));
1039   }
1040   if (const auto device = dctx->device) PetscCall(PetscDeviceView(device, viewer));
1041   if (iascii) PetscCall(PetscViewerASCIIPopTab(viewer));
1042   PetscFunctionReturn(PETSC_SUCCESS);
1043 }
1044 
1045 /*@C
1046   PetscDeviceContextViewFromOptions - View a `PetscDeviceContext` from options
1047 
1048   Input Parameters:
1049 + dctx - The `PetscDeviceContext` to view
1050 . obj  - Optional `PetscObject` to associate (may be `NULL`)
1051 - name - The command line option
1052 
1053   Level: beginner
1054 
1055 .seealso: `PetscDeviceContextView()`, `PetscObjectViewFromOptions()`, `PetscDeviceContextCreate()`
1056 @*/
1057 PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext dctx, PetscObject obj, const char name[])
1058 {
1059   PetscFunctionBegin;
1060   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1061   if (obj) PetscValidHeader(obj, 2);
1062   PetscAssertPointer(name, 3);
1063   PetscCall(PetscObjectViewFromOptions(PetscObjectCast(dctx), obj, name));
1064   PetscFunctionReturn(PETSC_SUCCESS);
1065 }
1066 
1067 /*@C
1068   PetscDeviceContextGetStreamHandle - Return a handle to the underlying stream of the current device context
1069 
1070   Input Parameters:
1071 + dctx   - The `PetscDeviceContext` to get the stream from
1072 - handle - A handle to the stream
1073 
1074   Level: developer
1075 
1076   Note:
1077   This routine is dangerous. It exists only for the most experienced users and
1078   internal PETSc developement.
1079 
1080   There is no way for PETSc's auto-dependency system to track what the caller does with the
1081   stream.
1082 
1083   If the user uses the stream to copy memory that was previously modified by PETSc, or launches
1084   kernels that modify memory with the stream, it is the users responsibility to inform PETSc of
1085   their actions via `PetscDeviceContextMarkIntentFromID()`. Failure to do so may introduce a
1086   race condition. This race condition may manifest in nondeterministic ways.
1087 
1088   Alternatively, the user may synchronize the stream immediately before and after use. This is
1089   the safest option.
1090 
1091   Example Usage:
1092 .vb
1093   PetscDeviceContext dctx;
1094   PetscDeviceType    type;
1095   void               *handle;
1096 
1097   PetscDeviceContextGetCurrentContext(&dctx);
1098   PetscDeviceContextGetStreamHandle(dctx, &handle);
1099   PetscDeviceContextGetDeviceType(dctx, &type);
1100 
1101   if (type == PETSC_DEVICE_CUDA) {
1102     cudsStream_t stream = *(cudaStream_t*)handle;
1103 
1104     my_cuda_kernel<<<1, 2, 3, stream>>>();
1105   }
1106 .ve
1107 
1108 .N ASYNC_API
1109 
1110 .seealso: `PetscDeviceContext`
1111 @*/
1112 PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext dctx, void *handle)
1113 {
1114   PetscFunctionBegin;
1115   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1116   PetscAssertPointer(handle, 2);
1117   PetscCall(PetscDeviceContextGetStreamHandle_Internal(dctx, (void **)handle));
1118   PetscFunctionReturn(PETSC_SUCCESS);
1119 }
1120