xref: /petsc/src/sys/objects/device/interface/dcontext.cxx (revision eea86af358b811c970ff889f944b9dbcebd75538)
1 #include <petsc/private/deviceimpl.h> /*I "petscdevice.h" I*/
2 #include "objpool.hpp"
3 
4 const char *const PetscStreamTypes[] = {"global_blocking", "default_blocking", "global_nonblocking", "max", "PetscStreamType", "PETSC_STREAM_", nullptr};
5 
6 const char *const PetscDeviceContextJoinModes[] = {"destroy", "sync", "no_sync", "PetscDeviceContextJoinMode", "PETSC_DEVICE_CONTEXT_JOIN_", nullptr};
7 
8 /* Define the allocator */
9 struct PetscDeviceContextAllocator : Petsc::AllocatorBase<PetscDeviceContext> {
10   static PetscInt PetscDeviceContextID;
11 
12   PETSC_NODISCARD static PetscErrorCode create(PetscDeviceContext *dctx) noexcept {
13     PetscDeviceContext dc;
14 
15     PetscFunctionBegin;
16     PetscCall(PetscNew(&dc));
17     dc->id         = PetscDeviceContextID++;
18     dc->streamType = PETSC_STREAM_DEFAULT_BLOCKING;
19     *dctx          = dc;
20     PetscFunctionReturn(0);
21   }
22 
23   PETSC_NODISCARD static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept {
24     PetscFunctionBegin;
25     PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren);
26     PetscTryTypeMethod(dctx, destroy);
27     PetscCall(PetscDeviceDestroy(&dctx->device));
28     PetscCall(PetscFree(dctx->childIDs));
29     PetscCall(PetscFree(dctx));
30     PetscFunctionReturn(0);
31   }
32 
33   PETSC_NODISCARD static PetscErrorCode reset(PetscDeviceContext dctx) noexcept {
34     PetscFunctionBegin;
35     /* don't deallocate the child array, rather just zero it out */
36     PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren));
37     dctx->setup       = PETSC_FALSE;
38     dctx->numChildren = 0;
39     dctx->streamType  = PETSC_STREAM_DEFAULT_BLOCKING;
40     PetscFunctionReturn(0);
41   }
42 
43   PETSC_NODISCARD static constexpr PetscErrorCode finalize() noexcept { return 0; }
44 };
45 /* an ID = 0 is invalid */
46 PetscInt PetscDeviceContextAllocator::PetscDeviceContextID = 1;
47 
48 static Petsc::ObjectPool<PetscDeviceContext, PetscDeviceContextAllocator> contextPool;
49 
50 /*@C
51   PetscDeviceContextCreate - Creates a `PetscDeviceContext`
52 
53   Not Collective, Asynchronous
54 
55   Output Paramemter:
56 . dctx - The `PetscDeviceContext`
57 
58   Note:
59   Unlike almost every other PETSc class it is advised that most users use
60   `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts
61   of different types are incompatible with one another; using
62   `PetscDeviceContextDuplicate()` ensures compatible types.
63 
64   Level: beginner
65 
66 .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`,
67           `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`,
68           `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextDestroy()`
69 @*/
70 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx) {
71   PetscFunctionBegin;
72   PetscValidPointer(dctx, 1);
73   PetscCall(PetscDeviceInitializePackage());
74   PetscCall(contextPool.get(*dctx));
75   PetscFunctionReturn(0);
76 }
77 
78 /*@C
79   PetscDeviceContextDestroy - Frees a `PetscDeviceContext`
80 
81   Not Collective, Asynchronous
82 
83   Input Parameters:
84 . dctx - The `PetscDeviceContext`
85 
86   Note:
87   No implicit synchronization occurs due to this routine, all resources are released completely asynchronously
88   w.r.t. the host. If one needs to guarantee access to the data produced on this contexts stream one should perform the
89   appropriate synchronization before calling this routine.
90 
91   Developer Note:
92   The context is never actually "destroyed", only returned to an ever growing pool of
93   contexts. There are currently no safeguards on the size of the pool, this should perhaps
94   be implemented.
95 
96   Level: beginner
97 
98 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()`
99 @*/
100 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx) {
101   PetscFunctionBegin;
102   if (!*dctx) PetscFunctionReturn(0);
103   PetscCall(contextPool.reclaim(std::move(*dctx)));
104   *dctx = nullptr;
105   PetscFunctionReturn(0);
106 }
107 
108 /*@C
109   PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a `PetscDeviceContext`
110 
111   Not Collective, Asynchronous
112 
113   Input Parameters:
114 + dctx - The `PetscDeviceContext`
115 - type - The `PetscStreamType`
116 
117   Notes:
118   See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
119   types and their interactions.
120 
121   If the `PetscDeviceContext` was previously set up and stream
122   type was changed, you must call `PetscDeviceContextSetUp()` again after this routine.
123 
124   Level: intermediate
125 
126 .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`, `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()`
127 @*/
128 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type) {
129   PetscFunctionBegin;
130   PetscValidDeviceContext(dctx, 1);
131   PetscValidStreamType(type, 2);
132   /* only need to do complex swapping if the object has already been setup */
133   if (dctx->setup && (dctx->streamType != type)) {
134     PetscUseTypeMethod(dctx, changestreamtype, type);
135     dctx->setup = PETSC_FALSE;
136   }
137   dctx->streamType = type;
138   PetscFunctionReturn(0);
139 }
140 
141 /*@C
142   PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a `PetscDeviceContext`
143 
144   Not Collective, Asynchronous
145 
146   Input Parameter:
147 . dctx - The `PetscDeviceContext`
148 
149   Output Parameter:
150 . type - The `PetscStreamType`
151 
152   Note:
153   See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available types and their interactions
154 
155   Level: intermediate
156 
157 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`, `PetscDeviceContextSetFromOptions()`
158 @*/
159 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type) {
160   PetscFunctionBegin;
161   PetscValidDeviceContext(dctx, 1);
162   PetscValidIntPointer(type, 2);
163   *type = dctx->streamType;
164   PetscFunctionReturn(0);
165 }
166 
167 /*@C
168   PetscDeviceContextSetDevice - Set the underlying device for the `PetscDeviceContext`
169 
170   Not Collective, Possibly Synchronous
171 
172   Input Parameters:
173 + dctx   - The `PetscDeviceContext`
174 - device - The `PetscDevice`
175 
176   Notes:
177   This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext`
178   must also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is
179   not stricly necessary to set a contexts device to enable usage, any created device
180   contexts will always come equipped with the "default" device.
181 
182   This routine is a no-op if dctx is already attached to device.
183 
184   This routine may initialize the backend device and incur synchronization.
185 
186   Level: intermediate
187 
188 .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`
189 @*/
190 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device) {
191   PetscFunctionBegin;
192   PetscValidDeviceContext(dctx, 1);
193   PetscValidDevice(device, 2);
194   if (dctx->device) {
195     /* can't do a strict pointer equality check since PetscDevice's are reused */
196     if (dctx->device->ops->createcontext == device->ops->createcontext) PetscFunctionReturn(0);
197   }
198   PetscCall(PetscDeviceDestroy(&dctx->device));
199   PetscTryTypeMethod(dctx, destroy);
200   PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops)));
201   PetscCall((*device->ops->createcontext)(dctx));
202   PetscCall(PetscDeviceReference_Internal(device));
203   dctx->device = device;
204   dctx->setup  = PETSC_FALSE;
205   PetscFunctionReturn(0);
206 }
207 
208 /*@C
209   PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext`
210 
211   Not Collective, Asynchronous
212 
213   Input Parameter:
214 . dctx - the `PetscDeviceContext`
215 
216   Output Parameter:
217 . device - The `PetscDevice`
218 
219   Note:
220   This is a borrowed reference, the user should not destroy `device`.
221 
222   Level: intermediate
223 
224 .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`
225 @*/
226 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device) {
227   PetscFunctionBegin;
228   PetscValidDeviceContext(dctx, 1);
229   PetscValidPointer(device, 2);
230   PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt_FMT " has no attached PetscDevice to get", dctx->id);
231   *device = dctx->device;
232   PetscFunctionReturn(0);
233 }
234 
235 /*@C
236   PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use
237 
238   Not Collective, Asynchronous
239 
240   Input Parameter:
241 . dctx - The `PetscDeviceContext`
242 
243   Developer Note:
244   This routine is usually the stage where a `PetscDeviceContext` acquires device-side data structures such as streams,
245   events, and (possibly) handles.
246 
247   Level: beginner
248 
249 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()`
250 @*/
251 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx) {
252   PetscFunctionBegin;
253   PetscValidDeviceContext(dctx, 1);
254   if (!dctx->device) {
255     PetscCall(PetscInfo(nullptr, "PetscDeviceContext %" PetscInt_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", dctx->id, PetscDeviceTypes[PETSC_DEVICE_DEFAULT]));
256     PetscCall(PetscDeviceContextSetDefaultDevice_Internal(dctx));
257   }
258   if (dctx->setup) PetscFunctionReturn(0);
259   PetscUseTypeMethod(dctx, setup);
260   dctx->setup = PETSC_TRUE;
261   PetscFunctionReturn(0);
262 }
263 
264 /*@C
265   PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object
266 
267   Not Collective, Asynchronous
268 
269   Input Parameter:
270 . dctx - The `PetscDeviceContext` to duplicate
271 
272   Output Parameter:
273 . dctxdup - The duplicated `PetscDeviceContext`
274 
275   Note:
276   This is a shorthand method for creating a `PetscDeviceContext` with the exact same
277   settings as another. However the `dctxdup` does not "share"
278   any of the underlying data with the original, (including its current stream-state) they
279   are completely separate objects.
280 
281   Level: beginner
282 
283 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, `PetscDeviceContextSetStreamType()`
284 @*/
285 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup) {
286   PetscDeviceContext dup;
287 
288   PetscFunctionBegin;
289   PetscValidDeviceContext(dctx, 1);
290   PetscValidPointer(dctxdup, 2);
291   PetscCall(PetscDeviceContextCreate(&dup));
292   PetscCall(PetscDeviceContextSetStreamType(dup, dctx->streamType));
293   if (dctx->device) PetscCall(PetscDeviceContextSetDevice(dup, dctx->device));
294   PetscCall(PetscDeviceContextSetUp(dup));
295   *dctxdup = dup;
296   PetscFunctionReturn(0);
297 }
298 
299 /*@C
300   PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle
301 
302   Not Collective, Asynchronous
303 
304   Input Parameter:
305 . dctx - The `PetscDeviceContext` object
306 
307   Output Parameter:
308 . idle - `PETSC_TRUE` if `PetscDeviceContext` has NO work, `PETSC_FALSE` if it has work
309 
310   Note:
311   This routine only refers a singular context and does NOT take any of its children into
312   account. That is, if `dctx` is idle but has dependents who do have work, this routine still
313   returns `PETSC_TRUE`.
314 
315   Level: intermediate
316 
317 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()`
318 @*/
319 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle) {
320   PetscFunctionBegin;
321   PetscValidDeviceContext(dctx, 1);
322   PetscValidBoolPointer(idle, 2);
323   PetscUseTypeMethod(dctx, query, idle);
324   PetscCall(PetscInfo(nullptr, "PetscDeviceContext id %" PetscInt_FMT " %s idle\n", dctx->id, *idle ? "was" : "was not"));
325   PetscFunctionReturn(0);
326 }
327 
328 /*@C
329   PetscDeviceContextWaitForContext - Make one context wait for another context to finish
330 
331   Not Collective, Asynchronous
332 
333   Input Parameters:
334 + dctxa - The `PetscDeviceContext` object that is waiting
335 - dctxb - The `PetscDeviceContext` object that is being waited on
336 
337   Notes:
338   Serializes two `PetscDeviceContexts`. This routine uses only the state of `dctxb` at the moment this routine was
339   called, so any future work queued will not affect `dctxa`.
340 
341   It is safe to pass the same context to both arguments.
342 
343   Level: beginner
344 
345 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()`
346 @*/
347 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb) {
348   PetscFunctionBegin;
349   PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2);
350   if (dctxa == dctxb) PetscFunctionReturn(0);
351   PetscUseTypeMethod(dctxa, waitforcontext, dctxb);
352   PetscFunctionReturn(0);
353 }
354 
355 #define PETSC_USE_DEBUG_AND_INFO (PetscDefined(USE_DEBUG) && PetscDefined(USE_INFO))
356 #if PETSC_USE_DEBUG_AND_INFO
357 #include <string>
358 #endif
359 /*@C
360   PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
361 
362   Not Collective, Asynchronous
363 
364   Input Parameters:
365 + dctx - The parent `PetscDeviceContext`
366 - n    - The number of children to create
367 
368   Output Parameter:
369 . dsub - The created child context(s)
370 
371   Notes:
372   This routine creates n edges of a DAG from a source node which are causally dependent on the source node, meaning
373   that work queued on child contexts will not start until the parent context finishes its work. This accounts for work
374   queued on the parent up until calling this function, any subsequent work enqueued on the parent has no effect on the children.
375 
376   Any children created with this routine have their lifetimes bounded by the parent. That is, the parent context expects
377   to free all of it's children (and ONLY its children) before itself is freed.
378 
379   DAG representation:
380 .vb
381   time ->
382 
383   -> dctx \----> dctx ------>
384            \---> dsub[0] --->
385             \--> ... ------->
386              \-> dsub[n-1] ->
387 .ve
388 
389   Level: intermediate
390 
391 .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()`
392 @*/
393 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub) {
394 #if PETSC_USE_DEBUG_AND_INFO
395   const PetscInt     nBefore = n;
396   static std::string idList;
397 #endif
398   PetscDeviceContext *dsubTmp = nullptr;
399   PetscInt            i       = 0;
400 
401   PetscFunctionBegin;
402   PetscValidDeviceContext(dctx, 1);
403   PetscValidPointer(dsub, 3);
404   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n);
405 #if PETSC_USE_DEBUG_AND_INFO
406   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
407   idList.reserve(4 * n);
408 #endif
409   /* update child totals */
410   dctx->numChildren += n;
411   /* now to find out if we have room */
412   if (dctx->numChildren > dctx->maxNumChildren) {
413     /* no room, either from having too many kids or not having any */
414     if (dctx->childIDs) {
415       /* have existing children, must reallocate them */
416       PetscCall(PetscRealloc(dctx->numChildren * sizeof(*dctx->childIDs), &dctx->childIDs));
417       /* clear the extra memory since realloc doesn't do it for us */
418       PetscCall(PetscArrayzero((dctx->childIDs) + (dctx->maxNumChildren), (dctx->numChildren) - (dctx->maxNumChildren)));
419     } else {
420       /* have no children */
421       PetscCall(PetscCalloc1(dctx->numChildren, &dctx->childIDs));
422     }
423     /* update total number of children */
424     dctx->maxNumChildren = dctx->numChildren;
425   }
426   PetscCall(PetscMalloc1(n, &dsubTmp));
427   while (n) {
428     /* empty child slot */
429     if (!(dctx->childIDs[i])) {
430       /* create the child context in the image of its parent */
431       PetscCall(PetscDeviceContextDuplicate(dctx, dsubTmp + i));
432       PetscCall(PetscDeviceContextWaitForContext(dsubTmp[i], dctx));
433       /* register the child with its parent */
434       dctx->childIDs[i] = dsubTmp[i]->id;
435 #if PETSC_USE_DEBUG_AND_INFO
436       idList += std::to_string(dsubTmp[i]->id);
437       if (n != 1) idList += ", ";
438 #endif
439       --n;
440     }
441     ++i;
442   }
443 #if PETSC_USE_DEBUG_AND_INFO
444   PetscCall(PetscInfo(nullptr, "Forked %" PetscInt_FMT " children from parent %" PetscInt_FMT " with IDs: %s\n", nBefore, dctx->id, idList.c_str()));
445   /* resets the size but doesn't deallocate the memory */
446   idList.clear();
447 #endif
448   /* pass the children back to caller */
449   *dsub = dsubTmp;
450   PetscFunctionReturn(0);
451 }
452 
453 /*@C
454   PetscDeviceContextJoin - Converge a set of child contexts
455 
456   Not Collective, Asynchronous
457 
458   Input Parameters:
459 + dctx         - A `PetscDeviceContext` to converge on
460 . n            - The number of sub contexts to converge
461 . joinMode     - The type of join to perform
462 - dsub         - The sub contexts to converge
463 
464   Notes:
465   If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the
466   source node, then this routine is the exact mirror. That is, it creates a node
467   (represented in `dctx`) which receives `n` edges (and optionally destroys them) which is
468   dependent on the completion of all incoming edges.
469 
470   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY` all contexts in `dsub` will be destroyed
471   by this routine. Thus all sub contexts must have been created with the `dctx` passed to
472   this routine.
473 
474   if `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC` `dctx` waits for all sub contexts but the
475   sub contexts do not wait for one another afterwards.
476 
477   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC` all sub contexts will additionally
478   wait on `dctx` after converging. This has the effect of "synchronizing" the outgoing
479   edges.
480 
481   DAG representations:
482   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`
483 .vb
484   time ->
485 
486   -> dctx ---------/- dctx ->
487   -> dsub[0] -----/
488   ->  ... -------/
489   -> dsub[n-1] -/
490 .ve
491   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`
492 .vb
493   time ->
494 
495   -> dctx ---------/- dctx ->
496   -> dsub[0] -----/--------->
497   ->  ... -------/---------->
498   -> dsub[n-1] -/----------->
499 .ve
500   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`
501 .vb
502   time ->
503 
504   -> dctx ---------/- dctx -\----> dctx ------>
505   -> dsub[0] -----/          \---> dsub[0] --->
506   ->  ... -------/            \--> ... ------->
507   -> dsub[n-1] -/              \-> dsub[n-1] ->
508 .ve
509 
510   Level: intermediate
511 
512 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode`
513 @*/
514 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub) {
515 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
516   static std::string idList;
517 #endif
518 
519   PetscFunctionBegin;
520   /* validity of dctx is checked in the wait-for loop */
521   PetscValidPointer(dsub, 4);
522   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n);
523 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
524   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
525   idList.reserve(4 * n);
526 #endif
527   /* first dctx waits on all the incoming edges */
528   for (PetscInt i = 0; i < n; ++i) {
529     PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4);
530     PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i]));
531 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
532     idList += std::to_string((*dsub)[i]->id);
533     if (i + 1 < n) idList += ", ";
534 #endif
535   }
536 
537   /* now we handle the aftermath */
538   switch (joinMode) {
539   case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: {
540     PetscInt j = 0;
541 
542     PetscAssert(n <= dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, dctx->numChildren);
543     /* update child count while it's still fresh in memory */
544     dctx->numChildren -= n;
545     for (PetscInt i = 0; i < dctx->maxNumChildren; ++i) {
546       if (dctx->childIDs[i] && (dctx->childIDs[i] == (*dsub)[j]->id)) {
547         /* child is one of ours, can destroy it */
548         PetscCall(PetscDeviceContextDestroy((*dsub) + j));
549         /* reset the child slot */
550         dctx->childIDs[i] = 0;
551         if (++j == n) break;
552       }
553     }
554     /* gone through the loop but did not find every child, if this triggers (or well, doesn't) on perf-builds we leak the remaining contexts memory */
555     PetscAssert(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in.", n - j);
556     PetscCall(PetscFree(*dsub));
557   } break;
558   case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
559     for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx));
560   case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC: break;
561   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given");
562   }
563 
564 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
565   PetscCall(PetscInfo(nullptr, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt_FMT ", mode %s with IDs: %s\n", n, dctx->id, PetscDeviceContextJoinModes[joinMode], idList.c_str()));
566   idList.clear();
567 #endif
568   PetscFunctionReturn(0);
569 }
570 
571 /*@C
572   PetscDeviceContextSynchronize - Block the host until all work queued on or associated with a `PetscDeviceContext` has finished
573 
574   Not Collective, Synchronous
575 
576   Input Parameters:
577 . dctx - The `PetscDeviceContext` to synchronize
578 
579   Level: beginner
580 
581 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()`
582 @*/
583 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx) {
584   PetscFunctionBegin;
585   PetscValidDeviceContext(dctx, 1);
586   /* if it isn't setup there is nothing to sync on */
587   if (dctx->setup) PetscUseTypeMethod(dctx, synchronize);
588   PetscFunctionReturn(0);
589 }
590 
591 #define PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE PETSC_DEVICE_DEFAULT
592 // REMOVE ME (change)
593 #define PETSC_DEVICE_CONTEXT_DEFAULT_STREAM PETSC_STREAM_GLOBAL_BLOCKING
594 
595 static PetscDeviceType    rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE;
596 static PetscStreamType    rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM;
597 static PetscDeviceContext globalContext  = nullptr;
598 
599 /* when PetsDevice initializes PetscDeviceContext eagerly the type of device created should
600  * match whatever device is eagerly intialized */
601 PetscErrorCode PetscDeviceContextSetRootDeviceType_Internal(PetscDeviceType type) {
602   PetscFunctionBegin;
603   PetscValidDeviceType(type, 1);
604   rootDeviceType = type;
605   PetscFunctionReturn(0);
606 }
607 
608 #if 0
609 /* currently unused */
610 PetscErrorCode PetscDeviceContextSetRootStreamType_Internal(PetscStreamType type)
611 {
612   PetscFunctionBegin;
613   PetscValidStreamType(type,1);
614   rootStreamType = type;
615   PetscFunctionReturn(0);
616 }
617 #endif
618 
619 static PetscErrorCode PetscDeviceContextSetupGlobalContext_Private(void) {
620   static const auto PetscDeviceContextFinalizer = []() -> PetscErrorCode {
621     PetscFunctionBegin;
622     PetscCall(PetscDeviceContextDestroy(&globalContext));
623     rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE;
624     rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM;
625     PetscFunctionReturn(0);
626   };
627 
628   PetscFunctionBegin;
629   if (globalContext) PetscFunctionReturn(0);
630   /* this exists purely as a valid device check. */
631   PetscCall(PetscDeviceInitializePackage());
632   PetscCall(PetscRegisterFinalize(PetscDeviceContextFinalizer));
633   PetscCall(PetscInfo(nullptr, "Initializing global PetscDeviceContext\n"));
634   /* we call the allocator directly here since the ObjectPool creates a PetscContainer which
635    * eventually tries to call logging functions. However, this routine may be purposefully
636    * called __before__ logging is initialized, so the logging function would PETSCABORT */
637   PetscCall(contextPool.allocator().create(&globalContext));
638   PetscCall(PetscDeviceContextSetStreamType(globalContext, rootStreamType));
639   PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(globalContext, rootDeviceType));
640   PetscCall(PetscDeviceContextSetUp(globalContext));
641   PetscFunctionReturn(0);
642 }
643 
644 /*@C
645   PetscDeviceContextGetCurrentContext - Get the current active `PetscDeviceContext`
646 
647   Not Collective, Asynchronous
648 
649   Output Parameter:
650 . dctx - The `PetscDeviceContext`
651 
652   Note:
653   The user generally should not destroy contexts retrieved with this routine unless they
654   themselves have created them. There exists no protection against destroying the root
655   context.
656 
657   Developer Note:
658   Unless the user has set their own, this routine creates the "root" context the first time it
659   is called, registering its destructor to `PetscFinalize()`.
660 
661   Level: beginner
662 
663 .seealso: `PetscDeviceContextSetCurrentContext()`, `PetscDeviceContextFork()`,
664           `PetscDeviceContextJoin()`, `PetscDeviceContextCreate()`
665 @*/
666 PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *dctx) {
667   PetscFunctionBegin;
668   PetscValidPointer(dctx, 1);
669   PetscCall(PetscDeviceContextSetupGlobalContext_Private());
670   /* while the static analyzer can find global variables, it will throw a warning about not
671    * being able to connect this back to the function arguments */
672   PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscValidDeviceContext(globalContext, -1));
673   *dctx = globalContext;
674   PetscFunctionReturn(0);
675 }
676 
677 /*@C
678   PetscDeviceContextSetCurrentContext - Set the current active `PetscDeviceContext`
679 
680   Not Collective, Asynchronous
681 
682   Input Parameter:
683 . dctx - The `PetscDeviceContext`
684 
685   Notes:
686   This routine can be used to set the defacto "root" `PetscDeviceContext` to a user-defined
687   implementation by calling this routine immediately after `PetscInitialize()` and ensuring that
688   `PetscDevice` is not eagerly initialized. In this case the user is responsible for destroying
689   their `PetscDeviceContext` before `PetscFinalize()` returns.
690 
691   The old context is not stored in any way by this routine; if one is overriding a context that
692   they themselves do not control, one should take care to temporarily store it by calling
693   `PetscDeviceContextGetCurrentContext()` before calling this routine.
694 
695   Level: beginner
696 
697 .seealso: `PetscDeviceContextGetCurrentContext()`, `PetscDeviceContextFork()`,
698           `PetscDeviceContextJoin()`, `PetscDeviceContextCreate()`
699 @*/
700 PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext dctx) {
701   PetscFunctionBegin;
702   PetscValidDeviceContext(dctx, 1);
703   PetscAssert(dctx->setup, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt_FMT " must be set up before being set as global context", dctx->id);
704   globalContext = dctx;
705   PetscCall(PetscInfo(nullptr, "Set global PetscDeviceContext id %" PetscInt_FMT "\n", dctx->id));
706   PetscFunctionReturn(0);
707 }
708 
709 /*@C
710   PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database
711 
712   Collective on comm, Asynchronous
713 
714   Input Parameters:
715 + comm   - MPI communicator on which to query the options database
716 . prefix - prefix to prepend to all options database queries, NULL if not needed
717 - dctx   - The `PetscDeviceContext` to configure
718 
719   Output Parameter:
720 . dctx - The `PetscDeviceContext`
721 
722   Options Database Keys:
723 + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` - `PetscDeviceContextSetStreamType()`
724 - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType`
725 
726   Level: beginner
727 
728 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`
729 @*/
730 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, const char prefix[], PetscDeviceContext dctx) {
731   PetscBool flag;
732   PetscInt  stype, dtype;
733 
734   PetscFunctionBegin;
735   if (prefix) PetscValidCharPointer(prefix, 2);
736   PetscValidDeviceContext(dctx, 3);
737   PetscOptionsBegin(comm, prefix, "PetscDeviceContext Options", "Sys");
738   PetscCall(PetscOptionsEList("-device_context_stream_type", "PetscDeviceContext PetscStreamType", "PetscDeviceContextSetStreamType", PetscStreamTypes, PETSC_STREAM_MAX, PetscStreamTypes[dctx->streamType], &stype, &flag));
739   if (flag) PetscCall(PetscDeviceContextSetStreamType(dctx, static_cast<PetscStreamType>(stype)));
740   PetscCall(PetscOptionsEList("-device_context_device_type", "Underlying PetscDevice", "PetscDeviceContextSetDevice", PetscDeviceTypes + 1, PETSC_DEVICE_MAX - 1, dctx->device ? PetscDeviceTypes[dctx->device->type] : PetscDeviceTypes[PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE], &dtype, &flag));
741   if (flag) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, static_cast<PetscDeviceType>(dtype + 1)));
742   PetscOptionsEnd();
743   PetscFunctionReturn(0);
744 }
745