xref: /petsc/src/sys/objects/device/interface/dcontext.cxx (revision dcf958e2725ed5b45a9e2d55931865b30238645c)
1 #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/
2 #include <petsc/private/viewerimpl.h>         // _p_PetscViewer for PetscObjectCast()
3 
4 #include <petsc/private/cpp/object_pool.hpp>
5 #include <petsc/private/cpp/utility.hpp>
6 #include <petsc/private/cpp/array.hpp>
7 
8 #include <vector>
9 #include <string> // std::to_string among other things
10 
11 /* Define the allocator */
12 class PetscDeviceContextConstructor : public Petsc::ConstructorInterface<_p_PetscDeviceContext, PetscDeviceContextConstructor> {
13 public:
14   PetscErrorCode construct_(PetscDeviceContext dctx) const noexcept
15   {
16     PetscFunctionBegin;
17     PetscCall(PetscArrayzero(dctx, 1));
18     PetscCall(PetscHeaderInitialize_Private(dctx, PETSC_DEVICE_CONTEXT_CLASSID, "PetscDeviceContext", "PetscDeviceContext", "Sys", PETSC_COMM_SELF, PetscDeviceContextDestroy, PetscDeviceContextView));
19     PetscCallCXX(PetscObjectCast(dctx)->cpp = new CxxData{dctx});
20     PetscCall(underlying().reset(dctx, false));
21     PetscFunctionReturn(PETSC_SUCCESS);
22   }
23 
24   static PetscErrorCode destroy_(PetscDeviceContext dctx) noexcept
25   {
26     PetscFunctionBegin;
27     PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren);
28     PetscTryTypeMethod(dctx, destroy);
29     PetscCall(PetscDeviceDestroy(&dctx->device));
30     PetscCall(PetscFree(dctx->childIDs));
31     delete CxxDataCast(dctx);
32     PetscCall(PetscHeaderDestroy_Private(PetscObjectCast(dctx), PETSC_FALSE));
33     PetscFunctionReturn(PETSC_SUCCESS);
34   }
35 
36   static PetscErrorCode reset_(PetscDeviceContext dctx, bool zero = true) noexcept
37   {
38     PetscFunctionBegin;
39     if (zero) {
40       // reset the device if the user set it
41       if (Petsc::util::exchange(dctx->usersetdevice, PETSC_FALSE)) {
42         PetscTryTypeMethod(dctx, destroy);
43         PetscCall(PetscDeviceDestroy(&dctx->device));
44         PetscCall(PetscArrayzero(dctx->ops, 1));
45         dctx->data = nullptr;
46       }
47       PetscCall(PetscHeaderReset_Internal(PetscObjectCast(dctx)));
48       dctx->numChildren = 0;
49       dctx->setup       = PETSC_FALSE;
50       // don't deallocate the child array, rather just zero it out
51       PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren));
52       PetscCall(CxxDataCast(dctx)->clear());
53       PetscCall(CxxDataCast(dctx)->reset_self(dctx));
54     }
55     dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING;
56     PetscFunctionReturn(PETSC_SUCCESS);
57   }
58 
59   static PetscErrorCode invalidate_(PetscDeviceContext dctx) noexcept
60   {
61     PetscFunctionBegin;
62     PetscCall(CxxDataCast(dctx)->reset_self(dctx));
63     PetscFunctionReturn(PETSC_SUCCESS);
64   }
65 };
66 
67 static Petsc::ObjectPool<_p_PetscDeviceContext, PetscDeviceContextConstructor> contextPool;
68 
69 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
70 /*@C
71   PetscDeviceContextCreate - Creates a `PetscDeviceContext`
72 
73   Not Collective
74 
75   Output Parameter:
76 . dctx - The `PetscDeviceContext`
77 
78   Level: beginner
79 
80   Note:
81   Unlike almost every other PETSc class it is advised that most users use
82   `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts of
83   different types are incompatible with one another; using `PetscDeviceContextDuplicate()`
84   ensures compatible types.
85 
86   DAG representation:
87 .vb
88   time ->
89 
90   |= CALL =| - dctx ->
91 .ve
92 
93 .N ASYNC_API
94 
95 .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`,
96 `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`,
97 `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextView()`, `PetscDeviceContextDestroy()`
98 @*/
99 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
100 {
101   PetscFunctionBegin;
102   PetscAssertPointer(dctx, 1);
103   PetscCall(PetscDeviceInitializePackage());
104   PetscCall(PetscLogEventBegin(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
105   PetscCall(contextPool.allocate(dctx));
106   PetscCall(PetscLogEventEnd(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
107   PetscFunctionReturn(PETSC_SUCCESS);
108 }
109 
110 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
111 /*@C
112   PetscDeviceContextDestroy - Frees a `PetscDeviceContext`
113 
114   Not Collective
115 
116   Input Parameter:
117 . dctx - The `PetscDeviceContext`
118 
119   Level: beginner
120 
121   Notes:
122   No implicit synchronization occurs due to this routine, all resources are released completely
123   asynchronously w.r.t. the host. If one needs to guarantee access to the data produced on
124   `dctx`'s stream the user is responsible for calling `PetscDeviceContextSynchronize()` before
125   calling this routine.
126 
127   DAG representation:
128 .vb
129   time ->
130 
131   -> dctx - |= CALL =|
132 .ve
133 
134   Developer Notes:
135   `dctx` is never actually "destroyed" in the classical sense. It is returned to an ever
136   growing pool of `PetscDeviceContext`s. There are currently no limits on the size of the pool,
137   this should perhaps be implemented.
138 
139 .N ASYNC_API
140 
141 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
142 `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()`
143 @*/
144 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
145 {
146   PetscFunctionBegin;
147   PetscAssertPointer(dctx, 1);
148   if (!*dctx) PetscFunctionReturn(PETSC_SUCCESS);
149   PetscCall(PetscLogEventBegin(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
150   if (--(PetscObjectCast(*dctx)->refct) <= 0) {
151     PetscCall(PetscDeviceContextCheckNotOrphaned_Internal(*dctx));
152     PetscCall(contextPool.deallocate(dctx));
153   }
154   PetscCall(PetscLogEventEnd(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
155   *dctx = nullptr;
156   PetscFunctionReturn(PETSC_SUCCESS);
157 }
158 
159 /*@C
160   PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a
161   `PetscDeviceContext`
162 
163   Not Collective
164 
165   Input Parameters:
166 + dctx - The `PetscDeviceContext`
167 - type - The `PetscStreamType`
168 
169   Level: beginner
170 
171   Note:
172   See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
173   types and their interactions. If the `PetscDeviceContext` was previously set up and stream
174   type was changed, you must call `PetscDeviceContextSetUp()` again after this routine.
175 
176 .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`,
177 `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()`
178 @*/
179 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
180 {
181   PetscFunctionBegin;
182   // do not use getoptionalnullcontext here since we do not want the user to change the stream
183   // type
184   PetscValidDeviceContext(dctx, 1);
185   PetscValidStreamType(type, 2);
186   // only need to do complex swapping if the object has already been setup
187   if (dctx->setup && (dctx->streamType != type)) {
188     dctx->setup = PETSC_FALSE;
189     PetscCall(PetscLogEventBegin(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
190     PetscUseTypeMethod(dctx, changestreamtype, type);
191     PetscCall(PetscLogEventEnd(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
192   }
193   dctx->streamType = type;
194   PetscFunctionReturn(PETSC_SUCCESS);
195 }
196 
197 /*@C
198   PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a
199   `PetscDeviceContext`
200 
201   Not Collective
202 
203   Input Parameter:
204 . dctx - The `PetscDeviceContext`
205 
206   Output Parameter:
207 . type - The `PetscStreamType`
208 
209   Level: beginner
210 
211   Note:
212   See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
213   types and their interactions
214 
215 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`,
216 `PetscDeviceContextSetFromOptions()`
217 @*/
218 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
219 {
220   PetscFunctionBegin;
221   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
222   PetscAssertPointer(type, 2);
223   *type = dctx->streamType;
224   PetscFunctionReturn(PETSC_SUCCESS);
225 }
226 
227 /*
228   Actual function to set the device.
229 
230   1. Repeatedly destroying and recreating internal data structures (like streams and events)
231      for recycled PetscDeviceContexts is not free. If done often, it does add up.
232   2. The vast majority of PetscDeviceContexts are created by PETSc either as children or
233      default contexts. The default contexts *never* change type, and the children are extremely
234      unlikely to (chances are if you fork once, you will fork again very soon).
235   3. The only time this calculus changes is if the user themselves sets the device type. In
236      this case we do not know what the user has changed, so must always wipe the slate clean.
237 
238   Thus we need to keep track whether the user explicitly sets the device contexts device.
239 */
240 static PetscErrorCode PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx, PetscDevice device, PetscBool user_set)
241 {
242   PetscFunctionBegin;
243   // do not use getoptionalnullcontext here since we do not want the user to change its device
244   PetscValidDeviceContext(dctx, 1);
245   PetscValidDevice(device, 2);
246   if (dctx->device && (dctx->device->id == device->id)) PetscFunctionReturn(PETSC_SUCCESS);
247   PetscCall(PetscLogEventBegin(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
248   PetscTryTypeMethod(dctx, destroy);
249   PetscCall(PetscDeviceDestroy(&dctx->device));
250   PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops)));
251   PetscCall(PetscDeviceReference_Internal(device));
252   // set it before calling the method
253   dctx->device = device;
254   PetscCall((*device->ops->createcontext)(dctx));
255   PetscCall(PetscLogEventEnd(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
256   dctx->setup         = PETSC_FALSE;
257   dctx->usersetdevice = user_set;
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 PetscErrorCode PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx, PetscDeviceType type)
262 {
263   PetscDevice device;
264 
265   PetscFunctionBegin;
266   PetscCall(PetscDeviceGetDefaultForType_Internal(type, &device));
267   PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_FALSE));
268   PetscFunctionReturn(PETSC_SUCCESS);
269 }
270 
271 /*@C
272   PetscDeviceContextSetDevice - Set the underlying `PetscDevice` for a `PetscDeviceContext`
273 
274   Not Collective
275 
276   Input Parameters:
277 + dctx   - The `PetscDeviceContext`
278 - device - The `PetscDevice`
279 
280   Level: intermediate
281 
282   Notes:
283   This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` must
284   also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is not strictly
285   necessary to set a contexts device to enable usage, any created `PetscDeviceContext`s will
286   always come equipped with the "default" device.
287 
288   This routine is a no-op if `device` is already attached to `dctx`.
289 
290   This routine may (but is very unlikely to) initialize the backend device and may incur
291   synchronization.
292 
293 .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`,
294 `PetscDeviceContextGetDeviceType()`
295 @*/
296 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
297 {
298   PetscFunctionBegin;
299   PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_TRUE));
300   PetscFunctionReturn(PETSC_SUCCESS);
301 }
302 
303 /*@C
304   PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext`
305 
306   Not Collective
307 
308   Input Parameter:
309 . dctx - the `PetscDeviceContext`
310 
311   Output Parameter:
312 . device - The `PetscDevice`
313 
314   Level: intermediate
315 
316   Note:
317   This is a borrowed reference, the user should not destroy `device`.
318 
319 .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`, `PetscDeviceContextGetDeviceType()`
320 @*/
321 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
322 {
323   PetscFunctionBegin;
324   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
325   PetscAssertPointer(device, 2);
326   PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt64_FMT " has no attached PetscDevice to get", PetscObjectCast(dctx)->id);
327   *device = dctx->device;
328   PetscFunctionReturn(PETSC_SUCCESS);
329 }
330 
331 /*@C
332   PetscDeviceContextGetDeviceType - Get the `PetscDeviceType` for a `PetscDeviceContext`
333 
334   Not Collective
335 
336   Input Parameter:
337 . dctx - The `PetscDeviceContext`
338 
339   Output Parameter:
340 . type - The `PetscDeviceType`
341 
342   Level: beginner
343 
344   Note:
345   This routine is a convenience shorthand for `PetscDeviceContextGetDevice()` ->
346   `PetscDeviceGetType()`.
347 
348 .seealso: `PetscDeviceType`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetType()`, `PetscDevice`
349 @*/
350 PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext dctx, PetscDeviceType *type)
351 {
352   PetscDevice device = nullptr;
353 
354   PetscFunctionBegin;
355   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
356   PetscAssertPointer(type, 2);
357   PetscCall(PetscDeviceContextGetDevice(dctx, &device));
358   PetscCall(PetscDeviceGetType(device, type));
359   PetscFunctionReturn(PETSC_SUCCESS);
360 }
361 
362 /*@C
363   PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use
364 
365   Not Collective
366 
367   Input Parameter:
368 . dctx - The `PetscDeviceContext`
369 
370   Level: beginner
371 
372   Developer Notes:
373   This routine is usually the stage where a `PetscDeviceContext` acquires device-side data
374   structures such as streams, events, and (possibly) handles.
375 
376 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
377 `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()`
378 @*/
379 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
380 {
381   PetscFunctionBegin;
382   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
383   if (dctx->setup) PetscFunctionReturn(PETSC_SUCCESS);
384   if (!dctx->device) {
385     const auto default_dtype = PETSC_DEVICE_DEFAULT();
386 
387     PetscCall(PetscInfo(dctx, "PetscDeviceContext %" PetscInt64_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", PetscObjectCast(dctx)->id, PetscDeviceTypes[default_dtype]));
388     PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, default_dtype));
389   }
390   PetscCall(PetscLogEventBegin(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
391   PetscUseTypeMethod(dctx, setup);
392   PetscCall(PetscLogEventEnd(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
393   dctx->setup = PETSC_TRUE;
394   PetscFunctionReturn(PETSC_SUCCESS);
395 }
396 
397 static PetscErrorCode PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx, PetscStreamType stype, PetscDeviceContext *dctxdup)
398 {
399   PetscFunctionBegin;
400   PetscCall(PetscLogEventBegin(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
401   PetscCall(PetscDeviceContextCreate(dctxdup));
402   PetscCall(PetscDeviceContextSetStreamType(*dctxdup, stype));
403   if (const auto device = dctx->device) PetscCall(PetscDeviceContextSetDevice_Private(*dctxdup, device, dctx->usersetdevice));
404   PetscCall(PetscDeviceContextSetUp(*dctxdup));
405   PetscCall(PetscLogEventEnd(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
406   PetscFunctionReturn(PETSC_SUCCESS);
407 }
408 
409 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
410 /*@C
411   PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object
412 
413   Not Collective
414 
415   Input Parameter:
416 . dctx - The `PetscDeviceContext` to duplicate
417 
418   Output Parameter:
419 . dctxdup - The duplicated `PetscDeviceContext`
420 
421   Level: beginner
422 
423   Notes:
424   This is a shorthand method for creating a `PetscDeviceContext` with the exact same settings as
425   another. Note however that `dctxdup` does not share any of the underlying data with `dctx`,
426   (including its current stream-state) they are completely separate objects.
427 
428   There is no implied ordering between `dctx` or `dctxdup`.
429 
430   DAG representation:
431 .vb
432   time ->
433 
434   -> dctx - |= CALL =| - dctx ---->
435                        - dctxdup ->
436 .ve
437 
438 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
439 `PetscDeviceContextSetStreamType()`
440 @*/
441 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
442 {
443   auto stype = PETSC_STREAM_DEFAULT_BLOCKING;
444 
445   PetscFunctionBegin;
446   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
447   PetscAssertPointer(dctxdup, 2);
448   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
449   PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, dctxdup));
450   PetscFunctionReturn(PETSC_SUCCESS);
451 }
452 
453 /*@C
454   PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle
455 
456   Not Collective
457 
458   Input Parameter:
459 . dctx - The `PetscDeviceContext`
460 
461   Output Parameter:
462 . idle - `PETSC_TRUE` if `dctx` has NO work, `PETSC_FALSE` if it has work
463 
464   Level: intermediate
465 
466   Note:
467   This routine only refers a singular context and does NOT take any of its children into
468   account. That is, if `dctx` is idle but has dependents who do have work this routine still
469   returns `PETSC_TRUE`.
470 
471 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()`
472 @*/
473 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
474 {
475   PetscFunctionBegin;
476   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
477   PetscAssertPointer(idle, 2);
478   PetscCall(PetscLogEventBegin(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
479   PetscUseTypeMethod(dctx, query, idle);
480   PetscCall(PetscLogEventEnd(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
481   PetscCall(PetscInfo(dctx, "PetscDeviceContext ('%s', id %" PetscInt64_FMT ") %s idle\n", PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, *idle ? "was" : "was not"));
482   PetscFunctionReturn(PETSC_SUCCESS);
483 }
484 
485 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
486 /*@C
487   PetscDeviceContextWaitForContext - Make one context wait for another context to finish
488 
489   Not Collective
490 
491   Input Parameters:
492 + dctxa - The `PetscDeviceContext` object that is waiting
493 - dctxb - The `PetscDeviceContext` object that is being waited on
494 
495   Level: beginner
496 
497   Notes:
498   Serializes two `PetscDeviceContext`s. Serialization is performed asynchronously; the host
499   does not wait for the serialization to actually occur.
500 
501   This routine uses only the state of `dctxb` at the moment this routine was called, so any
502   future work queued will not affect `dctxa`. It is safe to pass the same context to both
503   arguments (in which case this routine does nothing).
504 
505   DAG representation:
506 .vb
507   time ->
508 
509   -> dctxa ---/- |= CALL =| - dctxa ->
510              /
511   -> dctxb -/------------------------>
512 .ve
513 
514 .N ASYNC_API
515 
516 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()`
517 @*/
518 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
519 {
520   PetscObjectId bid;
521 
522   PetscFunctionBegin;
523   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxa));
524   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxb));
525   PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2);
526   if (dctxa == dctxb) PetscFunctionReturn(PETSC_SUCCESS);
527   bid = PetscObjectCast(dctxb)->id;
528   PetscCall(PetscLogEventBegin(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
529   PetscUseTypeMethod(dctxa, waitforcontext, dctxb);
530   PetscCallCXX(CxxDataCast(dctxa)->upstream()[bid] = CxxDataCast(dctxb)->weak_snapshot());
531   PetscCall(PetscLogEventEnd(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
532   PetscCall(PetscInfo(dctxa, "dctx %" PetscInt64_FMT " waiting on dctx %" PetscInt64_FMT "\n", PetscObjectCast(dctxa)->id, bid));
533   PetscCall(PetscObjectStateIncrease(PetscObjectCast(dctxa)));
534   PetscFunctionReturn(PETSC_SUCCESS);
535 }
536 
537 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
538 /*@C
539   PetscDeviceContextForkWithStreamType - Create a set of dependent child contexts from a parent
540   context with a prescribed `PetscStreamType`
541 
542   Not Collective, Asynchronous
543 
544   Input Parameters:
545 + dctx  - The parent `PetscDeviceContext`
546 . stype - The prescribed `PetscStreamType`
547 - n     - The number of children to create
548 
549   Output Parameter:
550 . dsub - The created child context(s)
551 
552   Level: intermediate
553 
554   Notes:
555   This routine creates `n` edges of a DAG from a source node which are causally dependent on the
556   source node. This causal dependency is established as-if by calling
557   `PetscDeviceContextWaitForContext()` on every child.
558 
559   `dsub` is allocated by this routine and has its lifetime bounded by `dctx`. That is, `dctx`
560   expects to free `dsub` (via `PetscDeviceContextJoin()`) before it itself is destroyed.
561 
562   This routine only accounts for work queued on `dctx` up until calling this routine, any
563   subsequent work enqueued on `dctx` has no effect on `dsub`.
564 
565   The `PetscStreamType` of `dctx` does not have to equal `stype`. In fact, it is often the case
566   that they are different. This is useful in cases where a routine can locally exploit stream
567   parallelism without needing to worry about what stream type the incoming `PetscDeviceContext`
568   carries.
569 
570   DAG representation:
571 .vb
572   time ->
573 
574   -> dctx - |= CALL =| -\----> dctx ------>
575                          \---> dsub[0] --->
576                           \--> ... ------->
577                            \-> dsub[n-1] ->
578 .ve
579 
580 .N ASYNC_API
581 
582 .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`,
583 `PetscDeviceContextQueryIdle()`, `PetscDeviceContextWaitForContext()`
584 @*/
585 PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx, PetscStreamType stype, PetscInt n, PetscDeviceContext **dsub)
586 {
587   // debugging only
588   std::string idList;
589   auto        ninput = n;
590 
591   PetscFunctionBegin;
592   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
593   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n);
594   PetscAssertPointer(dsub, 4);
595   *dsub = nullptr;
596   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
597   if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
598   PetscCall(PetscLogEventBegin(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
599   /* update child totals */
600   dctx->numChildren += n;
601   /* now to find out if we have room */
602   if (dctx->numChildren > dctx->maxNumChildren) {
603     const auto numChildren    = dctx->numChildren;
604     auto      &maxNumChildren = dctx->maxNumChildren;
605     auto       numAllocated   = numChildren;
606 
607     /* no room, either from having too many kids or not having any */
608     if (auto &childIDs = dctx->childIDs) {
609       // the difference is backwards because we have not updated maxNumChildren yet
610       numAllocated -= maxNumChildren;
611       /* have existing children, must reallocate them */
612       PetscCall(PetscRealloc(numChildren * sizeof(*childIDs), &childIDs));
613       /* clear the extra memory since realloc doesn't do it for us */
614       PetscCall(PetscArrayzero(std::next(childIDs, maxNumChildren), numAllocated));
615     } else {
616       /* have no children */
617       PetscCall(PetscCalloc1(numChildren, &childIDs));
618     }
619     /* update total number of children */
620     maxNumChildren = numChildren;
621   }
622   PetscCall(PetscMalloc1(n, dsub));
623   for (PetscInt i = 0; ninput && (i < dctx->numChildren); ++i) {
624     auto &childID = dctx->childIDs[i];
625     /* empty child slot */
626     if (!childID) {
627       auto &childctx = (*dsub)[i];
628 
629       /* create the child context in the image of its parent */
630       PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, &childctx));
631       PetscCall(PetscDeviceContextWaitForContext(childctx, dctx));
632       /* register the child with its parent */
633       PetscCall(PetscObjectGetId(PetscObjectCast(childctx), &childID));
634       if (PetscDefined(USE_DEBUG_AND_INFO)) {
635         PetscCallCXX(idList += std::to_string(childID));
636         if (ninput != 1) PetscCallCXX(idList += ", ");
637       }
638       --ninput;
639     }
640   }
641   PetscCall(PetscLogEventEnd(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
642   PetscCall(PetscDebugInfo(dctx, "Forked %" PetscInt_FMT " children from parent %" PetscInt64_FMT " with IDs: %s\n", n, PetscObjectCast(dctx)->id, idList.c_str()));
643   PetscFunctionReturn(PETSC_SUCCESS);
644 }
645 
646 /*@C
647   PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
648 
649   Not Collective, Asynchronous
650 
651   Input Parameters:
652 + dctx - The parent `PetscDeviceContext`
653 - n    - The number of children to create
654 
655   Output Parameter:
656 . dsub - The created child context(s)
657 
658   Level: beginner
659 
660   Notes:
661   Behaves identically to `PetscDeviceContextForkWithStreamType()` except that the prescribed
662   `PetscStreamType` is taken from `dctx`. In effect this routine is shorthand for\:
663 
664 .vb
665   PetscStreamType stype;
666 
667   PetscDeviceContextGetStreamType(dctx, &stype);
668   PetscDeviceContextForkWithStreamType(dctx, stype, ...);
669 .ve
670 
671 .N ASYNC_API
672 
673 .seealso: `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextJoin()`,
674 `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()`
675 @*/
676 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
677 {
678   auto stype = PETSC_STREAM_DEFAULT_BLOCKING;
679 
680   PetscFunctionBegin;
681   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
682   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
683   PetscCall(PetscDeviceContextForkWithStreamType(dctx, stype, n, dsub));
684   PetscFunctionReturn(PETSC_SUCCESS);
685 }
686 
687 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
688 /*@C
689   PetscDeviceContextJoin - Converge a set of child contexts
690 
691   Not Collective, Asynchronous
692 
693   Input Parameters:
694 + dctx     - A `PetscDeviceContext` to converge on
695 . n        - The number of sub contexts to converge
696 . joinMode - The type of join to perform
697 - dsub     - The sub contexts to converge
698 
699   Level: beginner
700 
701   Notes:
702   If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the source
703   node, then this routine is the exact mirror. That is, it creates a node (represented in `dctx`)
704   which receives `n` edges (and optionally destroys them) which is dependent on the completion
705   of all incoming edges.
706 
707   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`. All contexts in `dsub` will be
708   destroyed by this routine. Thus all sub contexts must have been created with the `dctx`
709   passed to this routine.
710 
711   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`. All sub contexts will additionally wait on
712   `dctx` after converging. This has the effect of "synchronizing" the outgoing edges. Note the
713   sync suffix does NOT refer to the host, i.e. this routine does NOT call
714   `PetscDeviceSynchronize()`.
715 
716   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`. `dctx` waits for all sub contexts but
717   the sub contexts do not wait for one another or `dctx` afterwards.
718 
719   DAG representations:
720   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`
721 .vb
722   time ->
723 
724   -> dctx ---------/- |= CALL =| - dctx ->
725   -> dsub[0] -----/
726   ->  ... -------/
727   -> dsub[n-1] -/
728 .ve
729   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`
730 .vb
731   time ->
732 
733   -> dctx ---------/- |= CALL =| -\----> dctx ------>
734   -> dsub[0] -----/                \---> dsub[0] --->
735   ->  ... -------/                  \--> ... ------->
736   -> dsub[n-1] -/                    \-> dsub[n-1] ->
737 .ve
738   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`
739 .vb
740   time ->
741 
742   -> dctx ----------/- |= CALL =| - dctx ->
743   -> dsub[0] ------/----------------------->
744   ->  ... --------/------------------------>
745   -> dsub[n-1] --/------------------------->
746 .ve
747 
748 .N ASYNC_API
749 
750 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextForkWithStreamType()`,
751 `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode`
752 @*/
753 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
754 {
755   // debugging only
756   std::string idList;
757 
758   PetscFunctionBegin;
759   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
760   /* validity of dctx is checked in the wait-for loop */
761   PetscAssertPointer(dsub, 4);
762   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n);
763   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
764   if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
765   /* first dctx waits on all the incoming edges */
766   PetscCall(PetscLogEventBegin(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
767   for (PetscInt i = 0; i < n; ++i) {
768     PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4);
769     PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i]));
770     if (PetscDefined(USE_DEBUG_AND_INFO)) {
771       PetscCallCXX(idList += std::to_string(PetscObjectCast((*dsub)[i])->id));
772       if (i + 1 < n) PetscCallCXX(idList += ", ");
773     }
774   }
775 
776   /* now we handle the aftermath */
777   switch (joinMode) {
778   case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: {
779     const auto children = dctx->childIDs;
780     const auto maxchild = dctx->maxNumChildren;
781     auto      &nchild   = dctx->numChildren;
782     PetscInt   j        = 0;
783 
784     PetscCheck(n <= nchild, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, nchild);
785     /* update child count while it's still fresh in memory */
786     nchild -= n;
787     for (PetscInt i = 0; i < maxchild; ++i) {
788       if (children[i] && (children[i] == PetscObjectCast((*dsub)[j])->id)) {
789         /* child is one of ours, can destroy it */
790         PetscCall(PetscDeviceContextDestroy((*dsub) + j));
791         /* reset the child slot */
792         children[i] = 0;
793         if (++j == n) break;
794       }
795     }
796     /* gone through the loop but did not find every child */
797     PetscCheck(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in", n - j);
798     PetscCall(PetscFree(*dsub));
799   } break;
800   case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
801     for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx));
802   case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
803     break;
804   default:
805     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given");
806   }
807   PetscCall(PetscLogEventEnd(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
808 
809   PetscCall(PetscDebugInfo(dctx, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt64_FMT ", mode %s with IDs: %s\n", n, PetscObjectCast(dctx)->id, PetscDeviceContextJoinModes[joinMode], idList.c_str()));
810   PetscFunctionReturn(PETSC_SUCCESS);
811 }
812 
813 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
814 /*@C
815   PetscDeviceContextSynchronize - Block the host until all work queued on a
816   `PetscDeviceContext` has finished
817 
818   Not Collective
819 
820   Input Parameter:
821 . dctx - The `PetscDeviceContext` to synchronize
822 
823   Level: beginner
824 
825   Notes:
826   The host will not return from this routine until `dctx` is idle. Any and all memory
827   operations queued on or otherwise associated with (either explicitly or implicitly via
828   dependencies) are guaranteed to have finished and be globally visible on return.
829 
830   In effect, this routine serves as memory and execution barrier.
831 
832   DAG representation:
833 .vb
834   time ->
835 
836   -> dctx - |= CALL =| - dctx ->
837 .ve
838 
839 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()`
840 @*/
841 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
842 {
843   PetscFunctionBegin;
844   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
845   PetscCall(PetscLogEventBegin(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
846   /* if it isn't setup there is nothing to sync on */
847   if (dctx->setup) {
848     PetscUseTypeMethod(dctx, synchronize);
849     PetscCall(PetscDeviceContextSyncClearMap_Internal(dctx));
850   }
851   PetscCall(PetscLogEventEnd(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
852   PetscFunctionReturn(PETSC_SUCCESS);
853 }
854 
855 /* every device type has a vector of null PetscDeviceContexts -- one for each device */
856 static auto nullContexts          = std::array<std::vector<PetscDeviceContext>, PETSC_DEVICE_MAX>{};
857 static auto nullContextsFinalizer = false;
858 
859 static PetscErrorCode PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device, PetscDevice device, PetscDeviceContext *dctx)
860 {
861   PetscInt        devid;
862   PetscDeviceType dtype;
863 
864   PetscFunctionBegin;
865   PetscValidDevice(device, 2);
866   PetscAssertPointer(dctx, 3);
867   if (PetscUnlikely(!nullContextsFinalizer)) {
868     nullContextsFinalizer = true;
869     PetscCall(PetscRegisterFinalize([] {
870       PetscFunctionBegin;
871       for (auto &&dvec : nullContexts) {
872         for (auto &&dctx : dvec) PetscCall(PetscDeviceContextDestroy(&dctx));
873         PetscCallCXX(dvec.clear());
874       }
875       nullContextsFinalizer = false;
876       PetscFunctionReturn(PETSC_SUCCESS);
877     }));
878   }
879   PetscCall(PetscDeviceGetDeviceId(device, &devid));
880   PetscCall(PetscDeviceGetType(device, &dtype));
881   {
882     auto &ctxlist = nullContexts[dtype];
883 
884     PetscCheck(devid >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Device ID (%" PetscInt_FMT ") must be positive", devid);
885     // need to resize the container if not big enough because incrementing the iterator in
886     // std::next() (if we haven't initialized that ctx yet) may cause it to fall outside the
887     // current size of the container.
888     if (static_cast<std::size_t>(devid) >= ctxlist.size()) PetscCallCXX(ctxlist.resize(devid + 1));
889     if (PetscUnlikely(!ctxlist[devid])) {
890       // we have not seen this device before
891       PetscCall(PetscDeviceContextCreate(dctx));
892       PetscCall(PetscInfo(*dctx, "Initializing null PetscDeviceContext (of type %s) for device %" PetscInt_FMT "\n", PetscDeviceTypes[dtype], devid));
893       {
894         const auto pobj   = PetscObjectCast(*dctx);
895         const auto name   = "null context " + std::to_string(devid);
896         const auto prefix = "null_context_" + std::to_string(devid) + '_';
897 
898         PetscCall(PetscObjectSetName(pobj, name.c_str()));
899         PetscCall(PetscObjectSetOptionsPrefix(pobj, prefix.c_str()));
900       }
901       PetscCall(PetscDeviceContextSetStreamType(*dctx, PETSC_STREAM_GLOBAL_BLOCKING));
902       PetscCall(PetscDeviceContextSetDevice_Private(*dctx, device, user_set_device));
903       PetscCall(PetscDeviceContextSetUp(*dctx));
904       // would use ctxlist.cbegin() but GCC 4.8 can't handle const iterator insert!
905       PetscCallCXX(ctxlist.insert(std::next(ctxlist.begin(), devid), *dctx));
906     } else *dctx = ctxlist[devid];
907   }
908   PetscFunctionReturn(PETSC_SUCCESS);
909 }
910 
911 /*
912   Gets the "NULL" context for the current PetscDeviceType and PetscDevice. NULL contexts are
913   guaranteed to always be globally blocking.
914 */
915 PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *dctx)
916 {
917   PetscDeviceContext gctx;
918   PetscDevice        gdev = nullptr;
919 
920   PetscFunctionBegin;
921   PetscAssertPointer(dctx, 1);
922   PetscCall(PetscDeviceContextGetCurrentContext(&gctx));
923   PetscCall(PetscDeviceContextGetDevice(gctx, &gdev));
924   PetscCall(PetscDeviceContextGetNullContextForDevice_Private(gctx->usersetdevice, gdev, dctx));
925   PetscFunctionReturn(PETSC_SUCCESS);
926 }
927 
928 /*@C
929   PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database
930 
931   Collective on `comm` or `dctx`
932 
933   Input Parameters:
934 + comm - MPI communicator on which to query the options database (optional)
935 - dctx - The `PetscDeviceContext` to configure
936 
937   Output Parameter:
938 . dctx - The `PetscDeviceContext`
939 
940   Options Database Keys:
941 + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` -
942    `PetscDeviceContextSetStreamType()`
943 - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType`
944 
945   Level: beginner
946 
947   Note:
948   The user may pass `MPI_COMM_NULL` for `comm` in which case the communicator of `dctx` is
949   used (which is always `PETSC_COMM_SELF`).
950 
951 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`,
952 `PetscDeviceContextView()`
953 @*/
954 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, PetscDeviceContext dctx)
955 {
956   const auto pobj     = PetscObjectCast(dctx);
957   auto       dtype    = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE);
958   auto       stype    = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE);
959   MPI_Comm   old_comm = PETSC_COMM_SELF;
960 
961   PetscFunctionBegin;
962   // do not user getoptionalnullcontext here, the user is not allowed to set it from options!
963   PetscValidDeviceContext(dctx, 2);
964   /* set the device type first */
965   if (const auto device = dctx->device) PetscCall(PetscDeviceGetType(device, &dtype.first));
966   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype.first));
967 
968   if (comm == MPI_COMM_NULL) {
969     PetscCall(PetscObjectGetComm(pobj, &comm));
970   } else {
971     // briefly set the communicator for dctx (it is always PETSC_COMM_SELF) so
972     // PetscObjectOptionsBegin() behaves as if dctx had comm
973     old_comm = Petsc::util::exchange(pobj->comm, comm);
974   }
975 
976   PetscObjectOptionsBegin(pobj);
977   PetscCall(PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype));
978   PetscOptionsEnd();
979   // reset the comm (should be PETSC_COMM_SELF)
980   if (comm != MPI_COMM_NULL) pobj->comm = old_comm;
981   if (dtype.second) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, dtype.first));
982   if (stype.second) PetscCall(PetscDeviceContextSetStreamType(dctx, stype.first));
983   PetscCall(PetscDeviceContextSetUp(dctx));
984   PetscFunctionReturn(PETSC_SUCCESS);
985 }
986 
987 /*@C
988   PetscDeviceContextView - View a `PetscDeviceContext`
989 
990   Collective on `viewer`
991 
992   Input Parameters:
993 + dctx   - The `PetscDeviceContext`
994 - viewer - The `PetscViewer` to view `dctx` with (may be `NULL`)
995 
996   Level: beginner
997 
998   Note:
999   If `viewer` is `NULL`, `PETSC_VIEWER_STDOUT_WORLD` is used instead, in which case this
1000   routine is collective on `PETSC_COMM_WORLD`.
1001 
1002 .seealso: `PetscDeviceContextViewFromOptions()`, `PetscDeviceView()`, `PETSC_VIEWER_STDOUT_WORLD`, `PetscDeviceContextCreate()`
1003 @*/
1004 PetscErrorCode PetscDeviceContextView(PetscDeviceContext dctx, PetscViewer viewer)
1005 {
1006   PetscBool iascii;
1007 
1008   PetscFunctionBegin;
1009   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1010   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer));
1011   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1012   PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &iascii));
1013   if (iascii) {
1014     auto        stype = PETSC_STREAM_DEFAULT_BLOCKING;
1015     PetscViewer sub;
1016 
1017     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub));
1018     PetscCall(PetscObjectPrintClassNamePrefixType(PetscObjectCast(dctx), sub));
1019     PetscCall(PetscViewerASCIIPushTab(sub));
1020     PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
1021     PetscCall(PetscViewerASCIIPrintf(sub, "stream type: %s\n", PetscStreamTypes[stype]));
1022     PetscCall(PetscViewerASCIIPrintf(sub, "children: %" PetscInt_FMT "\n", dctx->numChildren));
1023     if (const auto nchild = dctx->numChildren) {
1024       PetscCall(PetscViewerASCIIPushTab(sub));
1025       for (PetscInt i = 0; i < nchild; ++i) {
1026         if (i == nchild - 1) {
1027           PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT, dctx->childIDs[i]));
1028         } else {
1029           PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT ", ", dctx->childIDs[i]));
1030         }
1031       }
1032     }
1033     PetscCall(PetscViewerASCIIPopTab(sub));
1034     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub));
1035     PetscCall(PetscViewerFlush(viewer));
1036     PetscCall(PetscViewerASCIIPushTab(viewer));
1037   }
1038   if (const auto device = dctx->device) PetscCall(PetscDeviceView(device, viewer));
1039   if (iascii) PetscCall(PetscViewerASCIIPopTab(viewer));
1040   PetscFunctionReturn(PETSC_SUCCESS);
1041 }
1042 
1043 /*@C
1044   PetscDeviceContextViewFromOptions - View a `PetscDeviceContext` from options
1045 
1046   Input Parameters:
1047 + dctx - The `PetscDeviceContext` to view
1048 . obj  - Optional `PetscObject` to associate (may be `NULL`)
1049 - name - The command line option
1050 
1051   Level: beginner
1052 
1053 .seealso: `PetscDeviceContextView()`, `PetscObjectViewFromOptions()`, `PetscDeviceContextCreate()`
1054 @*/
1055 PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext dctx, PetscObject obj, const char name[])
1056 {
1057   PetscFunctionBegin;
1058   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1059   if (obj) PetscValidHeader(obj, 2);
1060   PetscAssertPointer(name, 3);
1061   PetscCall(PetscObjectViewFromOptions(PetscObjectCast(dctx), obj, name));
1062   PetscFunctionReturn(PETSC_SUCCESS);
1063 }
1064 
1065 /*@C
1066   PetscDeviceContextGetStreamHandle - Return a handle to the underlying stream of the current device context
1067 
1068   Input Parameters:
1069 + dctx   - The `PetscDeviceContext` to get the stream from
1070 - handle - A handle to the stream
1071 
1072   Level: developer
1073 
1074   Note:
1075   This routine is dangerous. It exists only for the most experienced users and
1076   internal PETSc developement.
1077 
1078   There is no way for PETSc's auto-dependency system to track what the caller does with the
1079   stream.
1080 
1081   If the user uses the stream to copy memory that was previously modified by PETSc, or launches
1082   kernels that modify memory with the stream, it is the users responsibility to inform PETSc of
1083   their actions via `PetscDeviceContextMarkIntentFromID()`. Failure to do so may introduce a
1084   race condition. This race condition may manifest in nondeterministic ways.
1085 
1086   Alternatively, the user may synchronize the stream immediately before and after use. This is
1087   the safest option.
1088 
1089   Example Usage:
1090 .vb
1091   PetscDeviceContext dctx;
1092   PetscDeviceType    type;
1093   void               *handle;
1094 
1095   PetscDeviceContextGetCurrentContext(&dctx);
1096   PetscDeviceContextGetStreamHandle(dctx, &handle);
1097   PetscDeviceContextGetDeviceType(dctx, &type);
1098 
1099   if (type == PETSC_DEVICE_CUDA) {
1100     cudsStream_t stream = *(cudaStream_t*)handle;
1101 
1102     my_cuda_kernel<<<1, 2, 3, stream>>>();
1103   }
1104 .ve
1105 
1106 .N ASYNC_API
1107 
1108 .seealso: `PetscDeviceContext`
1109 @*/
1110 PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext dctx, void *handle)
1111 {
1112   PetscFunctionBegin;
1113   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1114   PetscAssertPointer(handle, 2);
1115   PetscCall(PetscDeviceContextGetStreamHandle_Internal(dctx, (void **)handle));
1116   PetscFunctionReturn(PETSC_SUCCESS);
1117 }
1118