xref: /petsc/src/sys/objects/device/interface/dcontext.cxx (revision fbf9dbe564678ed6eff1806adbc4c4f01b9743f4)
1 #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/
2 #include <petsc/private/viewerimpl.h>         // _p_PetscViewer for PetscObjectCast()
3 
4 #include <petsc/private/cpp/object_pool.hpp>
5 #include <petsc/private/cpp/utility.hpp>
6 #include <petsc/private/cpp/array.hpp>
7 
8 #include <vector>
9 #include <string> // std::to_string among other things
10 
11 /* Define the allocator */
12 class PetscDeviceContextConstructor : public Petsc::ConstructorInterface<_p_PetscDeviceContext, PetscDeviceContextConstructor> {
13 public:
14   PetscErrorCode construct_(PetscDeviceContext dctx) const noexcept
15   {
16     PetscFunctionBegin;
17     PetscCall(PetscArrayzero(dctx, 1));
18     PetscCall(PetscHeaderInitialize_Private(dctx, PETSC_DEVICE_CONTEXT_CLASSID, "PetscDeviceContext", "PetscDeviceContext", "Sys", PETSC_COMM_SELF, PetscDeviceContextDestroy, PetscDeviceContextView));
19     PetscCallCXX(PetscObjectCast(dctx)->cpp = new CxxData());
20     PetscCall(underlying().reset(dctx, false));
21     PetscFunctionReturn(PETSC_SUCCESS);
22   }
23 
24   static PetscErrorCode destroy_(PetscDeviceContext dctx) noexcept
25   {
26     PetscFunctionBegin;
27     PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren);
28     PetscTryTypeMethod(dctx, destroy);
29     PetscCall(PetscDeviceDestroy(&dctx->device));
30     PetscCall(PetscFree(dctx->childIDs));
31     delete CxxDataCast(dctx);
32     PetscCall(PetscHeaderDestroy_Private(PetscObjectCast(dctx), PETSC_FALSE));
33     PetscFunctionReturn(PETSC_SUCCESS);
34   }
35 
36   static PetscErrorCode reset_(PetscDeviceContext dctx, bool zero = true) noexcept
37   {
38     PetscFunctionBegin;
39     if (zero) {
40       // reset the device if the user set it
41       if (Petsc::util::exchange(dctx->usersetdevice, PETSC_FALSE)) {
42         PetscTryTypeMethod(dctx, destroy);
43         PetscCall(PetscDeviceDestroy(&dctx->device));
44         PetscCall(PetscArrayzero(dctx->ops, 1));
45         dctx->data = nullptr;
46       }
47       PetscCall(PetscHeaderReset_Internal(PetscObjectCast(dctx)));
48       dctx->numChildren = 0;
49       dctx->setup       = PETSC_FALSE;
50       // don't deallocate the child array, rather just zero it out
51       PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren));
52       PetscCall(CxxDataCast(dctx)->clear());
53     }
54     dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING;
55     PetscFunctionReturn(PETSC_SUCCESS);
56   }
57 
58   static PetscErrorCode invalidate_(PetscDeviceContext) noexcept { return PETSC_SUCCESS; }
59 };
60 
61 static Petsc::ObjectPool<_p_PetscDeviceContext, PetscDeviceContextConstructor> contextPool;
62 
63 /*@C
64   PetscDeviceContextCreate - Creates a `PetscDeviceContext`
65 
66   Not Collective
67 
68   Output Parameter:
69 . dctx - The `PetscDeviceContext`
70 
71   Level: beginner
72 
73   Note:
74   Unlike almost every other PETSc class it is advised that most users use
75   `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts of
76   different types are incompatible with one another; using `PetscDeviceContextDuplicate()`
77   ensures compatible types.
78 
79   DAG representation:
80 .vb
81   time ->
82 
83   |= CALL =| - dctx ->
84 .ve
85 
86 .N ASYNC_API
87 
88 .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`,
89 `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`,
90 `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextView()`, `PetscDeviceContextDestroy()`
91 @*/
92 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
93 {
94   PetscFunctionBegin;
95   PetscValidPointer(dctx, 1);
96   PetscCall(PetscDeviceInitializePackage());
97   PetscCall(PetscLogEventBegin(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
98   PetscCall(contextPool.allocate(dctx));
99   PetscCall(PetscLogEventEnd(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr));
100   PetscFunctionReturn(PETSC_SUCCESS);
101 }
102 
103 /*@C
104   PetscDeviceContextDestroy - Frees a `PetscDeviceContext`
105 
106   Not Collective
107 
108   Input Parameter:
109 . dctx - The `PetscDeviceContext`
110 
111   Level: beginner
112 
113   Notes:
114   No implicit synchronization occurs due to this routine, all resources are released completely
115   asynchronously w.r.t. the host. If one needs to guarantee access to the data produced on
116   `dctx`'s stream the user is responsible for calling `PetscDeviceContextSynchronize()` before
117   calling this routine.
118 
119   DAG representation:
120 .vb
121   time ->
122 
123   -> dctx - |= CALL =|
124 .ve
125 
126   Developer Notes:
127   `dctx` is never actually "destroyed" in the classical sense. It is returned to an ever
128   growing pool of `PetscDeviceContext`s. There are currently no limits on the size of the pool,
129   this should perhaps be implemented.
130 
131 .N ASYNC_API
132 
133 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
134 `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()`
135 @*/
136 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
137 {
138   PetscFunctionBegin;
139   PetscValidPointer(dctx, 1);
140   if (!*dctx) PetscFunctionReturn(PETSC_SUCCESS);
141   PetscCall(PetscLogEventBegin(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
142   if (--(PetscObjectCast(*dctx)->refct) <= 0) {
143     PetscCall(PetscDeviceContextCheckNotOrphaned_Internal(*dctx));
144     PetscCall(contextPool.deallocate(dctx));
145   }
146   PetscCall(PetscLogEventEnd(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr));
147   *dctx = nullptr;
148   PetscFunctionReturn(PETSC_SUCCESS);
149 }
150 
151 /*@C
152   PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a
153   `PetscDeviceContext`
154 
155   Not Collective
156 
157   Input Parameters:
158 + dctx - The `PetscDeviceContext`
159 - type - The `PetscStreamType`
160 
161   Level: beginner
162 
163   Note:
164   See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
165   types and their interactions. If the `PetscDeviceContext` was previously set up and stream
166   type was changed, you must call `PetscDeviceContextSetUp()` again after this routine.
167 
168 .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`,
169 `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()`
170 @*/
171 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
172 {
173   PetscFunctionBegin;
174   // do not use getoptionalnullcontext here since we do not want the user to change the stream
175   // type
176   PetscValidDeviceContext(dctx, 1);
177   PetscValidStreamType(type, 2);
178   // only need to do complex swapping if the object has already been setup
179   if (dctx->setup && (dctx->streamType != type)) {
180     dctx->setup = PETSC_FALSE;
181     PetscCall(PetscLogEventBegin(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
182     PetscUseTypeMethod(dctx, changestreamtype, type);
183     PetscCall(PetscLogEventEnd(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr));
184   }
185   dctx->streamType = type;
186   PetscFunctionReturn(PETSC_SUCCESS);
187 }
188 
189 /*@C
190   PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a
191   `PetscDeviceContext`
192 
193   Not Collective
194 
195   Input Parameter:
196 . dctx - The `PetscDeviceContext`
197 
198   Output Parameter:
199 . type - The `PetscStreamType`
200 
201   Level: beginner
202 
203   Note:
204   See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
205   types and their interactions
206 
207 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`,
208 `PetscDeviceContextSetFromOptions()`
209 @*/
210 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
211 {
212   PetscFunctionBegin;
213   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
214   PetscValidIntPointer(type, 2);
215   *type = dctx->streamType;
216   PetscFunctionReturn(PETSC_SUCCESS);
217 }
218 
219 /*
220   Actual function to set the device.
221 
222   1. Repeatedly destroying and recreating internal data structures (like streams and events)
223      for recycled PetscDeviceContexts is not free. If done often, it does add up.
224   2. The vast majority of PetscDeviceContexts are created by PETSc either as children or
225      default contexts. The default contexts *never* change type, and the children are extremely
226      unlikely to (chances are if you fork once, you will fork again very soon).
227   3. The only time this calculus changes is if the user themselves sets the device type. In
228      this case we do not know what the user has changed, so must always wipe the slate clean.
229 
230   Thus we need to keep track whether the user explicitly sets the device contexts device.
231 */
232 static PetscErrorCode PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx, PetscDevice device, PetscBool user_set)
233 {
234   PetscFunctionBegin;
235   // do not use getoptionalnullcontext here since we do not want the user to change its device
236   PetscValidDeviceContext(dctx, 1);
237   PetscValidDevice(device, 2);
238   if (dctx->device && (dctx->device->id == device->id)) PetscFunctionReturn(PETSC_SUCCESS);
239   PetscCall(PetscLogEventBegin(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
240   PetscTryTypeMethod(dctx, destroy);
241   PetscCall(PetscDeviceDestroy(&dctx->device));
242   PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops)));
243   PetscCall(PetscDeviceReference_Internal(device));
244   // set it before calling the method
245   dctx->device = device;
246   PetscCall((*device->ops->createcontext)(dctx));
247   PetscCall(PetscLogEventEnd(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr));
248   dctx->setup         = PETSC_FALSE;
249   dctx->usersetdevice = user_set;
250   PetscFunctionReturn(PETSC_SUCCESS);
251 }
252 
253 PetscErrorCode PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx, PetscDeviceType type)
254 {
255   PetscDevice device;
256 
257   PetscFunctionBegin;
258   PetscCall(PetscDeviceGetDefaultForType_Internal(type, &device));
259   PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_FALSE));
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
263 /*@C
264   PetscDeviceContextSetDevice - Set the underlying `PetscDevice` for a `PetscDeviceContext`
265 
266   Not Collective
267 
268   Input Parameters:
269 + dctx   - The `PetscDeviceContext`
270 - device - The `PetscDevice`
271 
272   Level: intermediate
273 
274   Notes:
275   This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` must
276   also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is not strictly
277   necessary to set a contexts device to enable usage, any created `PetscDeviceContext`s will
278   always come equipped with the "default" device.
279 
280   This routine is a no-op if `device` is already attached to `dctx`.
281 
282   This routine may (but is very unlikely to) initialize the backend device and may incur
283   synchronization.
284 
285 .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`,
286 `PetscDeviceContextGetDeviceType()`
287 @*/
288 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
289 {
290   PetscFunctionBegin;
291   PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_TRUE));
292   PetscFunctionReturn(PETSC_SUCCESS);
293 }
294 
295 /*@C
296   PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext`
297 
298   Not Collective
299 
300   Input Parameter:
301 . dctx - the `PetscDeviceContext`
302 
303   Output Parameter:
304 . device - The `PetscDevice`
305 
306   Level: intermediate
307 
308   Note:
309   This is a borrowed reference, the user should not destroy `device`.
310 
311 .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`, `PetscDeviceContextGetDeviceType()`
312 @*/
313 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
314 {
315   PetscFunctionBegin;
316   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
317   PetscValidPointer(device, 2);
318   PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt64_FMT " has no attached PetscDevice to get", PetscObjectCast(dctx)->id);
319   *device = dctx->device;
320   PetscFunctionReturn(PETSC_SUCCESS);
321 }
322 
323 /*@C
324   PetscDeviceContextGetDeviceType - Get the `PetscDeviceType` for a `PetscDeviceContext`
325 
326   Not Collective
327 
328   Input Parameter:
329 . dctx - The `PetscDeviceContext`
330 
331   Output Parameter:
332 . type - The `PetscDeviceType`
333 
334   Level: beginner
335 
336   Note:
337   This routine is a convenience shorthand for `PetscDeviceContextGetDevice()` ->
338   `PetscDeviceGetType()`.
339 
340 .seealso: `PetscDeviceType`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetType()`, `PetscDevice`
341 @*/
342 PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext dctx, PetscDeviceType *type)
343 {
344   PetscDevice device = nullptr;
345 
346   PetscFunctionBegin;
347   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
348   PetscValidPointer(type, 2);
349   PetscCall(PetscDeviceContextGetDevice(dctx, &device));
350   PetscCall(PetscDeviceGetType(device, type));
351   PetscFunctionReturn(PETSC_SUCCESS);
352 }
353 
354 /*@C
355   PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use
356 
357   Not Collective
358 
359   Input Parameter:
360 . dctx - The `PetscDeviceContext`
361 
362   Level: beginner
363 
364   Developer Note:
365   This routine is usually the stage where a `PetscDeviceContext` acquires device-side data
366   structures such as streams, events, and (possibly) handles.
367 
368 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
369 `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()`
370 @*/
371 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
372 {
373   PetscFunctionBegin;
374   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
375   if (dctx->setup) PetscFunctionReturn(PETSC_SUCCESS);
376   if (!dctx->device) {
377     const auto default_dtype = PETSC_DEVICE_DEFAULT();
378 
379     PetscCall(PetscInfo(dctx, "PetscDeviceContext %" PetscInt64_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", PetscObjectCast(dctx)->id, PetscDeviceTypes[default_dtype]));
380     PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, default_dtype));
381   }
382   PetscCall(PetscLogEventBegin(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
383   PetscUseTypeMethod(dctx, setup);
384   PetscCall(PetscLogEventEnd(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr));
385   dctx->setup = PETSC_TRUE;
386   PetscFunctionReturn(PETSC_SUCCESS);
387 }
388 
389 static PetscErrorCode PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx, PetscStreamType stype, PetscDeviceContext *dctxdup)
390 {
391   PetscFunctionBegin;
392   PetscCall(PetscLogEventBegin(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
393   PetscCall(PetscDeviceContextCreate(dctxdup));
394   PetscCall(PetscDeviceContextSetStreamType(*dctxdup, stype));
395   if (const auto device = dctx->device) PetscCall(PetscDeviceContextSetDevice_Private(*dctxdup, device, dctx->usersetdevice));
396   PetscCall(PetscDeviceContextSetUp(*dctxdup));
397   PetscCall(PetscLogEventEnd(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr));
398   PetscFunctionReturn(PETSC_SUCCESS);
399 }
400 
401 /*@C
402   PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object
403 
404   Not Collective
405 
406   Input Parameter:
407 . dctx - The `PetscDeviceContext` to duplicate
408 
409   Output Parameter:
410 . dctxdup - The duplicated `PetscDeviceContext`
411 
412   Level: beginner
413 
414   Notes:
415   This is a shorthand method for creating a `PetscDeviceContext` with the exact same settings as
416   another. Note however that `dctxdup` does not share any of the underlying data with `dctx`,
417   (including its current stream-state) they are completely separate objects.
418 
419   There is no implied ordering between `dctx` or `dctxdup`.
420 
421   DAG representation:
422 .vb
423   time ->
424 
425   -> dctx - |= CALL =| - dctx ---->
426                        - dctxdup ->
427 .ve
428 
429 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
430 `PetscDeviceContextSetStreamType()`
431 @*/
432 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
433 {
434   auto stype = PETSC_STREAM_DEFAULT_BLOCKING;
435 
436   PetscFunctionBegin;
437   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
438   PetscValidPointer(dctxdup, 2);
439   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
440   PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, dctxdup));
441   PetscFunctionReturn(PETSC_SUCCESS);
442 }
443 
444 /*@C
445   PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle
446 
447   Not Collective
448 
449   Input Parameter:
450 . dctx - The `PetscDeviceContext`
451 
452   Output Parameter:
453 . idle - `PETSC_TRUE` if `dctx` has NO work, `PETSC_FALSE` if it has work
454 
455   Level: intermediate
456 
457   Note:
458   This routine only refers a singular context and does NOT take any of its children into
459   account. That is, if `dctx` is idle but has dependents who do have work this routine still
460   returns `PETSC_TRUE`.
461 
462 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()`
463 @*/
464 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
465 {
466   PetscFunctionBegin;
467   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
468   PetscValidBoolPointer(idle, 2);
469   PetscCall(PetscLogEventBegin(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
470   PetscUseTypeMethod(dctx, query, idle);
471   PetscCall(PetscLogEventEnd(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr));
472   PetscCall(PetscInfo(dctx, "PetscDeviceContext ('%s', id %" PetscInt64_FMT ") %s idle\n", PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, *idle ? "was" : "was not"));
473   PetscFunctionReturn(PETSC_SUCCESS);
474 }
475 
476 /*@C
477   PetscDeviceContextWaitForContext - Make one context wait for another context to finish
478 
479   Not Collective
480 
481   Input Parameters:
482 + dctxa - The `PetscDeviceContext` object that is waiting
483 - dctxb - The `PetscDeviceContext` object that is being waited on
484 
485   Level: beginner
486 
487   Notes:
488   Serializes two `PetscDeviceContext`s. Serialization is performed asynchronously; the host
489   does not wait for the serialization to actually occur.
490 
491   This routine uses only the state of `dctxb` at the moment this routine was called, so any
492   future work queued will not affect `dctxa`. It is safe to pass the same context to both
493   arguments (in which case this routine does nothing).
494 
495   DAG representation:
496 .vb
497   time ->
498 
499   -> dctxa ---/- |= CALL =| - dctxa ->
500              /
501   -> dctxb -/------------------------>
502 .ve
503 
504 .N ASYNC_API
505 
506 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()`
507 @*/
508 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
509 {
510   PetscObject aobj;
511 
512   PetscFunctionBegin;
513   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxa));
514   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxb));
515   PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2);
516   if (dctxa == dctxb) PetscFunctionReturn(PETSC_SUCCESS);
517   aobj = PetscObjectCast(dctxa);
518   PetscCall(PetscLogEventBegin(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
519   PetscUseTypeMethod(dctxa, waitforcontext, dctxb);
520   PetscCallCXX(CxxDataCast(dctxa)->upstream[dctxb] = CxxDataParent(dctxb));
521   PetscCall(PetscLogEventEnd(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr));
522   PetscCall(PetscInfo(dctxa, "dctx %" PetscInt64_FMT " waiting on dctx %" PetscInt64_FMT "\n", aobj->id, PetscObjectCast(dctxb)->id));
523   PetscCall(PetscObjectStateIncrease(aobj));
524   PetscFunctionReturn(PETSC_SUCCESS);
525 }
526 
527 /*@C
528   PetscDeviceContextForkWithStreamType - Create a set of dependent child contexts from a parent
529   context with a prescribed `PetscStreamType`
530 
531   Not Collective, Asynchronous
532 
533   Input Parameters:
534 + dctx  - The parent `PetscDeviceContext`
535 . stype - The prescribed `PetscStreamType`
536 - n     - The number of children to create
537 
538   Output Parameter:
539 . dsub - The created child context(s)
540 
541   Level: intermediate
542 
543   Notes:
544   This routine creates `n` edges of a DAG from a source node which are causally dependent on the
545   source node. This causal dependency is established as-if by calling
546   `PetscDeviceContextWaitForContext()` on every child.
547 
548   `dsub` is allocated by this routine and has its lifetime bounded by `dctx`. That is, `dctx`
549   expects to free `dsub` (via `PetscDeviceContextJoin()`) before it itself is destroyed.
550 
551   This routine only accounts for work queued on `dctx` up until calling this routine, any
552   subsequent work enqueued on `dctx` has no effect on `dsub`.
553 
554   The `PetscStreamType` of `dctx` does not have to equal `stype`. In fact, it is often the case
555   that they are different. This is useful in cases where a routine can locally exploit stream
556   parallelism without needing to worry about what stream type the incoming `PetscDeviceContext`
557   carries.
558 
559   DAG representation:
560 .vb
561   time ->
562 
563   -> dctx - |= CALL =| -\----> dctx ------>
564                          \---> dsub[0] --->
565                           \--> ... ------->
566                            \-> dsub[n-1] ->
567 .ve
568 
569 .N ASYNC_API
570 
571 .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`,
572 `PetscDeviceContextQueryIdle()`, `PetscDeviceContextWaitForContext()`
573 @*/
574 PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx, PetscStreamType stype, PetscInt n, PetscDeviceContext **dsub)
575 {
576   // debugging only
577   std::string idList;
578   auto        ninput = n;
579 
580   PetscFunctionBegin;
581   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
582   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n);
583   PetscValidPointer(dsub, 4);
584   *dsub = nullptr;
585   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
586   if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
587   PetscCall(PetscLogEventBegin(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
588   /* update child totals */
589   dctx->numChildren += n;
590   /* now to find out if we have room */
591   if (dctx->numChildren > dctx->maxNumChildren) {
592     const auto numChildren    = dctx->numChildren;
593     auto      &maxNumChildren = dctx->maxNumChildren;
594     auto       numAllocated   = numChildren;
595 
596     /* no room, either from having too many kids or not having any */
597     if (auto &childIDs = dctx->childIDs) {
598       // the difference is backwards because we have not updated maxNumChildren yet
599       numAllocated -= maxNumChildren;
600       /* have existing children, must reallocate them */
601       PetscCall(PetscRealloc(numChildren * sizeof(*childIDs), &childIDs));
602       /* clear the extra memory since realloc doesn't do it for us */
603       PetscCall(PetscArrayzero(std::next(childIDs, maxNumChildren), numAllocated));
604     } else {
605       /* have no children */
606       PetscCall(PetscCalloc1(numChildren, &childIDs));
607     }
608     /* update total number of children */
609     maxNumChildren = numChildren;
610   }
611   PetscCall(PetscMalloc1(n, dsub));
612   for (PetscInt i = 0; ninput && (i < dctx->numChildren); ++i) {
613     auto &childID = dctx->childIDs[i];
614     /* empty child slot */
615     if (!childID) {
616       auto &childctx = (*dsub)[i];
617 
618       /* create the child context in the image of its parent */
619       PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, &childctx));
620       PetscCall(PetscDeviceContextWaitForContext(childctx, dctx));
621       /* register the child with its parent */
622       PetscCall(PetscObjectGetId(PetscObjectCast(childctx), &childID));
623       if (PetscDefined(USE_DEBUG_AND_INFO)) {
624         PetscCallCXX(idList += std::to_string(childID));
625         if (ninput != 1) PetscCallCXX(idList += ", ");
626       }
627       --ninput;
628     }
629   }
630   PetscCall(PetscLogEventEnd(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr));
631   PetscCall(PetscDebugInfo(dctx, "Forked %" PetscInt_FMT " children from parent %" PetscInt64_FMT " with IDs: %s\n", n, PetscObjectCast(dctx)->id, idList.c_str()));
632   PetscFunctionReturn(PETSC_SUCCESS);
633 }
634 
635 /*@C
636   PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
637 
638   Not Collective, Asynchronous
639 
640   Input Parameters:
641 + dctx - The parent `PetscDeviceContext`
642 - n    - The number of children to create
643 
644   Output Parameter:
645 . dsub - The created child context(s)
646 
647   Level: beginner
648 
649   Notes:
650   Behaves identically to `PetscDeviceContextForkWithStreamType()` except that the prescribed
651   `PetscStreamType` is taken from `dctx`. In effect this routine is shorthand for\:
652 
653 .vb
654   PetscStreamType stype;
655 
656   PetscDeviceContextGetStreamType(dctx, &stype);
657   PetscDeviceContextForkWithStreamType(dctx, stype, ...);
658 .ve
659 
660 .N ASYNC_API
661 
662 .seealso: `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextJoin()`,
663 `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()`
664 @*/
665 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
666 {
667   auto stype = PETSC_STREAM_DEFAULT_BLOCKING;
668 
669   PetscFunctionBegin;
670   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
671   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
672   PetscCall(PetscDeviceContextForkWithStreamType(dctx, stype, n, dsub));
673   PetscFunctionReturn(PETSC_SUCCESS);
674 }
675 
676 /*@C
677   PetscDeviceContextJoin - Converge a set of child contexts
678 
679   Not Collective, Asynchronous
680 
681   Input Parameters:
682 + dctx         - A `PetscDeviceContext` to converge on
683 . n            - The number of sub contexts to converge
684 . joinMode     - The type of join to perform
685 - dsub         - The sub contexts to converge
686 
687   Level: beginner
688 
689   Notes:
690   If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the source
691   node, then this routine is the exact mirror. That is, it creates a node (represented in `dctx`)
692   which receives `n` edges (and optionally destroys them) which is dependent on the completion
693   of all incoming edges.
694 
695   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`. All contexts in `dsub` will be
696   destroyed by this routine. Thus all sub contexts must have been created with the `dctx`
697   passed to this routine.
698 
699   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`. All sub contexts will additionally wait on
700   `dctx` after converging. This has the effect of "synchronizing" the outgoing edges. Note the
701   sync suffix does NOT refer to the host, i.e. this routine does NOT call
702   `PetscDeviceSynchronize()`.
703 
704   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`. `dctx` waits for all sub contexts but
705   the sub contexts do not wait for one another or `dctx` afterwards.
706 
707   DAG representations:
708   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`
709 .vb
710   time ->
711 
712   -> dctx ---------/- |= CALL =| - dctx ->
713   -> dsub[0] -----/
714   ->  ... -------/
715   -> dsub[n-1] -/
716 .ve
717   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`
718 .vb
719   time ->
720 
721   -> dctx ---------/- |= CALL =| -\----> dctx ------>
722   -> dsub[0] -----/                \---> dsub[0] --->
723   ->  ... -------/                  \--> ... ------->
724   -> dsub[n-1] -/                    \-> dsub[n-1] ->
725 .ve
726   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`
727 .vb
728   time ->
729 
730   -> dctx ----------/- |= CALL =| - dctx ->
731   -> dsub[0] ------/----------------------->
732   ->  ... --------/------------------------>
733   -> dsub[n-1] --/------------------------->
734 .ve
735 
736 .N ASYNC_API
737 
738 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextForkWithStreamType()`,
739 `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode`
740 @*/
741 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
742 {
743   // debugging only
744   std::string idList;
745 
746   PetscFunctionBegin;
747   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
748   /* validity of dctx is checked in the wait-for loop */
749   PetscValidPointer(dsub, 4);
750   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n);
751   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
752   if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n));
753   /* first dctx waits on all the incoming edges */
754   PetscCall(PetscLogEventBegin(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
755   for (PetscInt i = 0; i < n; ++i) {
756     PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4);
757     PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i]));
758     if (PetscDefined(USE_DEBUG_AND_INFO)) {
759       PetscCallCXX(idList += std::to_string(PetscObjectCast((*dsub)[i])->id));
760       if (i + 1 < n) PetscCallCXX(idList += ", ");
761     }
762   }
763 
764   /* now we handle the aftermath */
765   switch (joinMode) {
766   case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: {
767     const auto children = dctx->childIDs;
768     const auto maxchild = dctx->maxNumChildren;
769     auto      &nchild   = dctx->numChildren;
770     PetscInt   j        = 0;
771 
772     PetscCheck(n <= nchild, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, nchild);
773     /* update child count while it's still fresh in memory */
774     nchild -= n;
775     for (PetscInt i = 0; i < maxchild; ++i) {
776       if (children[i] && (children[i] == PetscObjectCast((*dsub)[j])->id)) {
777         /* child is one of ours, can destroy it */
778         PetscCall(PetscDeviceContextDestroy((*dsub) + j));
779         /* reset the child slot */
780         children[i] = 0;
781         if (++j == n) break;
782       }
783     }
784     /* gone through the loop but did not find every child */
785     PetscCheck(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in", n - j);
786     PetscCall(PetscFree(*dsub));
787   } break;
788   case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
789     for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx));
790   case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
791     break;
792   default:
793     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given");
794   }
795   PetscCall(PetscLogEventEnd(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr));
796 
797   PetscCall(PetscDebugInfo(dctx, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt64_FMT ", mode %s with IDs: %s\n", n, PetscObjectCast(dctx)->id, PetscDeviceContextJoinModes[joinMode], idList.c_str()));
798   PetscFunctionReturn(PETSC_SUCCESS);
799 }
800 
801 /*@C
802   PetscDeviceContextSynchronize - Block the host until all work queued on a
803   `PetscDeviceContext` has finished
804 
805   Not Collective
806 
807   Input Parameter:
808 . dctx - The `PetscDeviceContext` to synchronize
809 
810   Level: beginner
811 
812   Notes:
813   The host will not return from this routine until `dctx` is idle. Any and all memory
814   operations queued on or otherwise associated with (either explicitly or implicitly via
815   dependencies) are guaranteed to have finished and be globally visible on return.
816 
817   In effect, this routine serves as memory and execution barrier.
818 
819   DAG representation:
820 .vb
821   time ->
822 
823   -> dctx - |= CALL =| - dctx ->
824 .ve
825 
826 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()`
827 @*/
828 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
829 {
830   PetscFunctionBegin;
831   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
832   PetscCall(PetscLogEventBegin(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
833   /* if it isn't setup there is nothing to sync on */
834   if (dctx->setup) {
835     PetscUseTypeMethod(dctx, synchronize);
836     PetscCall(PetscDeviceContextSyncClearMap_Internal(dctx));
837   }
838   PetscCall(PetscLogEventEnd(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr));
839   PetscFunctionReturn(PETSC_SUCCESS);
840 }
841 
842 /* every device type has a vector of null PetscDeviceContexts -- one for each device */
843 static auto nullContexts          = std::array<std::vector<PetscDeviceContext>, PETSC_DEVICE_MAX>{};
844 static auto nullContextsFinalizer = false;
845 
846 static PetscErrorCode PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device, PetscDevice device, PetscDeviceContext *dctx)
847 {
848   PetscInt        devid;
849   PetscDeviceType dtype;
850 
851   PetscFunctionBegin;
852   PetscValidDevice(device, 2);
853   PetscValidPointer(dctx, 3);
854   if (PetscUnlikely(!nullContextsFinalizer)) {
855     const auto finalizer = [] {
856       PetscFunctionBegin;
857       for (auto &&dvec : nullContexts) {
858         for (auto &&dctx : dvec) PetscCall(PetscDeviceContextDestroy(&dctx));
859         PetscCallCXX(dvec.clear());
860       }
861       nullContextsFinalizer = false;
862       PetscFunctionReturn(PETSC_SUCCESS);
863     };
864 
865     nullContextsFinalizer = true;
866     PetscCall(PetscRegisterFinalize(std::move(finalizer)));
867   }
868   PetscCall(PetscDeviceGetDeviceId(device, &devid));
869   PetscCall(PetscDeviceGetType(device, &dtype));
870   {
871     auto &ctxlist = nullContexts[dtype];
872 
873     PetscCheck(devid >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Device ID (%" PetscInt_FMT ") must be positive", devid);
874     // need to resize the container if not big enough because incrementing the iterator in
875     // std::next() (if we haven't initialized that ctx yet) may cause it to fall outside the
876     // current size of the container.
877     if (static_cast<std::size_t>(devid) >= ctxlist.size()) PetscCallCXX(ctxlist.resize(devid + 1));
878     if (PetscUnlikely(!ctxlist[devid])) {
879       // we have not seen this device before
880       PetscCall(PetscDeviceContextCreate(dctx));
881       PetscCall(PetscInfo(*dctx, "Initializing null PetscDeviceContext (of type %s) for device %" PetscInt_FMT "\n", PetscDeviceTypes[dtype], devid));
882       {
883         const auto pobj   = PetscObjectCast(*dctx);
884         const auto name   = "null context " + std::to_string(devid);
885         const auto prefix = "null_context_" + std::to_string(devid) + '_';
886 
887         PetscCall(PetscObjectSetName(pobj, name.c_str()));
888         PetscCall(PetscObjectSetOptionsPrefix(pobj, prefix.c_str()));
889       }
890       PetscCall(PetscDeviceContextSetStreamType(*dctx, PETSC_STREAM_GLOBAL_BLOCKING));
891       PetscCall(PetscDeviceContextSetDevice_Private(*dctx, device, user_set_device));
892       PetscCall(PetscDeviceContextSetUp(*dctx));
893       // would use ctxlist.cbegin() but GCC 4.8 can't handle const iterator insert!
894       PetscCallCXX(ctxlist.insert(std::next(ctxlist.begin(), devid), *dctx));
895     } else *dctx = ctxlist[devid];
896   }
897   PetscFunctionReturn(PETSC_SUCCESS);
898 }
899 
900 /*
901   Gets the "NULL" context for the current PetscDeviceType and PetscDevice. NULL contexts are
902   guaranteed to always be globally blocking.
903 */
904 PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *dctx)
905 {
906   PetscDeviceContext gctx;
907   PetscDevice        gdev = nullptr;
908 
909   PetscFunctionBegin;
910   PetscValidPointer(dctx, 1);
911   PetscCall(PetscDeviceContextGetCurrentContext(&gctx));
912   PetscCall(PetscDeviceContextGetDevice(gctx, &gdev));
913   PetscCall(PetscDeviceContextGetNullContextForDevice_Private(gctx->usersetdevice, gdev, dctx));
914   PetscFunctionReturn(PETSC_SUCCESS);
915 }
916 
917 /*@C
918   PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database
919 
920   Collective on `comm` or `dctx`
921 
922   Input Parameters:
923 + comm - MPI communicator on which to query the options database (optional)
924 - dctx - The `PetscDeviceContext` to configure
925 
926   Output Parameter:
927 . dctx - The `PetscDeviceContext`
928 
929   Options Database Keys:
930 + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` -
931    `PetscDeviceContextSetStreamType()`
932 - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType`
933 
934   Level: beginner
935 
936   Note:
937   The user may pass `MPI_COMM_NULL` for `comm` in which case the communicator of `dctx` is
938   used (which is always `PETSC_COMM_SELF`).
939 
940 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`,
941 `PetscDeviceContextView()`
942 @*/
943 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, PetscDeviceContext dctx)
944 {
945   const auto pobj     = PetscObjectCast(dctx);
946   auto       dtype    = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE);
947   auto       stype    = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE);
948   MPI_Comm   old_comm = PETSC_COMM_SELF;
949 
950   PetscFunctionBegin;
951   // do not user getoptionalnullcontext here, the user is not allowed to set it from options!
952   PetscValidDeviceContext(dctx, 2);
953   /* set the device type first */
954   if (const auto device = dctx->device) PetscCall(PetscDeviceGetType(device, &dtype.first));
955   PetscCall(PetscDeviceContextGetStreamType(dctx, &stype.first));
956 
957   if (comm == MPI_COMM_NULL) {
958     PetscCall(PetscObjectGetComm(pobj, &comm));
959   } else {
960     // briefly set the communicator for dctx (it is always PETSC_COMM_SELF) so
961     // PetscObjectOptionsBegin() behaves as if dctx had comm
962     old_comm = Petsc::util::exchange(pobj->comm, comm);
963   }
964 
965   PetscObjectOptionsBegin(pobj);
966   PetscCall(PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype));
967   PetscOptionsEnd();
968   // reset the comm (should be PETSC_COMM_SELF)
969   if (comm != MPI_COMM_NULL) pobj->comm = old_comm;
970   if (dtype.second) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, dtype.first));
971   if (stype.second) PetscCall(PetscDeviceContextSetStreamType(dctx, stype.first));
972   PetscCall(PetscDeviceContextSetUp(dctx));
973   PetscFunctionReturn(PETSC_SUCCESS);
974 }
975 
976 /*@C
977   PetscDeviceContextView - View a `PetscDeviceContext`
978 
979   Collective on `viewer`
980 
981   Input Parameters:
982 + dctx - The `PetscDeviceContext`
983 - viewer - The `PetscViewer` to view `dctx` with (may be `NULL`)
984 
985   Level: beginner
986 
987   Note:
988   If `viewer` is `NULL`, `PETSC_VIEWER_STDOUT_WORLD` is used instead, in which case this
989   routine is collective on `PETSC_COMM_WORLD`.
990 
991 .seealso: `PetscDeviceContextViewFromOptions()`, `PetscDeviceView()`, `PETSC_VIEWER_STDOUT_WORLD`, `PetscDeviceContextCreate()`
992 @*/
993 PetscErrorCode PetscDeviceContextView(PetscDeviceContext dctx, PetscViewer viewer)
994 {
995   PetscBool iascii;
996 
997   PetscFunctionBegin;
998   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
999   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer));
1000   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
1001   PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &iascii));
1002   if (iascii) {
1003     auto        stype = PETSC_STREAM_DEFAULT_BLOCKING;
1004     PetscViewer sub;
1005 
1006     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub));
1007     PetscCall(PetscObjectPrintClassNamePrefixType(PetscObjectCast(dctx), sub));
1008     PetscCall(PetscViewerASCIIPushTab(sub));
1009     PetscCall(PetscDeviceContextGetStreamType(dctx, &stype));
1010     PetscCall(PetscViewerASCIIPrintf(sub, "stream type: %s\n", PetscStreamTypes[stype]));
1011     PetscCall(PetscViewerASCIIPrintf(sub, "children: %" PetscInt_FMT "\n", dctx->numChildren));
1012     if (const auto nchild = dctx->numChildren) {
1013       PetscCall(PetscViewerASCIIPushTab(sub));
1014       for (PetscInt i = 0; i < nchild; ++i) {
1015         if (i == nchild - 1) {
1016           PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT, dctx->childIDs[i]));
1017         } else {
1018           PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT ", ", dctx->childIDs[i]));
1019         }
1020       }
1021     }
1022     PetscCall(PetscViewerASCIIPopTab(sub));
1023     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub));
1024     PetscCall(PetscViewerFlush(viewer));
1025     PetscCall(PetscViewerASCIIPushTab(viewer));
1026   }
1027   if (const auto device = dctx->device) PetscCall(PetscDeviceView(device, viewer));
1028   if (iascii) PetscCall(PetscViewerASCIIPopTab(viewer));
1029   PetscFunctionReturn(PETSC_SUCCESS);
1030 }
1031 
1032 /*@C
1033   PetscDeviceContextViewFromOptions - View a `PetscDeviceContext` from options
1034 
1035   Input Parameters:
1036 + dctx - The `PetscDeviceContext` to view
1037 . obj  - Optional `PetscObject` to associate (may be `NULL`)
1038 - name - The command line option
1039 
1040   Level: beginner
1041 
1042 .seealso: `PetscDeviceContextView()`, `PetscObjectViewFromOptions()`, `PetscDeviceContextCreate()`
1043 @*/
1044 PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext dctx, PetscObject obj, const char name[])
1045 {
1046   PetscFunctionBegin;
1047   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1048   if (obj) PetscValidHeader(obj, 2);
1049   PetscValidCharPointer(name, 3);
1050   PetscCall(PetscObjectViewFromOptions(PetscObjectCast(dctx), obj, name));
1051   PetscFunctionReturn(PETSC_SUCCESS);
1052 }
1053 
1054 /*@C
1055   PetscDeviceContextGetStreamHandle - Return a handle to the underlying stream of the current device context
1056 
1057   Input Parameters:
1058 + dctx   - The `PetscDeviceContext` to get the stream from
1059 - handle - A handle to the stream
1060 
1061   Level: developer
1062 
1063   Note:
1064   This routine is dangerous. It exists only for the most experienced users and
1065   internal PETSc developement.
1066 
1067   There is no way for PETSc's auto-dependency system to track what the caller does with the
1068   stream.
1069 
1070   If the user uses the stream to copy memory that was previously modified by PETSc, or launches
1071   kernels that modify memory with the stream, it is the users responsibility to inform PETSc of
1072   their actions via `PetscDeviceContextMarkIntentFromID()`. Failure to do so may introduce a
1073   race condition. This race condition may manifest in nondeterministic ways.
1074 
1075   Alternatively, the user may synchronize the stream immediately before and after use. This is
1076   the safest option.
1077 
1078   Example Usage:
1079 .vb
1080   PetscDeviceContext dctx;
1081   PetscDeviceType    type;
1082   void               *handle;
1083 
1084   PetscDeviceContextGetCurrentContext(&dctx);
1085   PetscDeviceContextGetStreamHandle(dctx, &handle);
1086   PetscDeviceContextGetDeviceType(dctx, &type);
1087 
1088   if (type == PETSC_DEVICE_CUDA) {
1089     cudsStream_t stream = *(cudaStream_t*)handle;
1090 
1091     my_cuda_kernel<<<1, 2, 3, stream>>>();
1092   }
1093 .ve
1094 
1095 .N ASYNC_API
1096 
1097 .seealso: `PetscDeviceContext`
1098 @*/
1099 PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext dctx, void *handle)
1100 {
1101   PetscFunctionBegin;
1102   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
1103   PetscValidPointer(handle, 2);
1104   PetscCall(PetscDeviceContextGetStreamHandle_Internal(dctx, (void **)handle));
1105   PetscFunctionReturn(PETSC_SUCCESS);
1106 }
1107