xref: /petsc/src/sys/objects/device/interface/dcontext.cxx (revision a69119a591a03a9d906b29c0a4e9802e4d7c9795)
1 #include <petsc/private/deviceimpl.h> /*I "petscdevice.h" I*/
2 #include "objpool.hpp"
3 
4 const char *const PetscStreamTypes[] = {"global_blocking", "default_blocking", "global_nonblocking", "max", "PetscStreamType", "PETSC_STREAM_", nullptr};
5 
6 const char *const PetscDeviceContextJoinModes[] = {"destroy", "sync", "no_sync", "PetscDeviceContextJoinMode", "PETSC_DEVICE_CONTEXT_JOIN_", nullptr};
7 
8 /* Define the allocator */
9 struct PetscDeviceContextAllocator : Petsc::AllocatorBase<PetscDeviceContext> {
10   static PetscInt PetscDeviceContextID;
11 
12   PETSC_NODISCARD static PetscErrorCode create(PetscDeviceContext *dctx) noexcept {
13     PetscDeviceContext dc;
14 
15     PetscFunctionBegin;
16     PetscCall(PetscNew(&dc));
17     dc->id         = PetscDeviceContextID++;
18     dc->streamType = PETSC_STREAM_DEFAULT_BLOCKING;
19     *dctx          = dc;
20     PetscFunctionReturn(0);
21   }
22 
23   PETSC_NODISCARD static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept {
24     PetscFunctionBegin;
25     PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren);
26     PetscTryTypeMethod(dctx, destroy);
27     PetscCall(PetscDeviceDestroy(&dctx->device));
28     PetscCall(PetscFree(dctx->childIDs));
29     PetscCall(PetscFree(dctx));
30     PetscFunctionReturn(0);
31   }
32 
33   PETSC_NODISCARD static PetscErrorCode reset(PetscDeviceContext dctx) noexcept {
34     PetscFunctionBegin;
35     /* don't deallocate the child array, rather just zero it out */
36     PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren));
37     dctx->setup       = PETSC_FALSE;
38     dctx->numChildren = 0;
39     dctx->streamType  = PETSC_STREAM_DEFAULT_BLOCKING;
40     PetscFunctionReturn(0);
41   }
42 
43   PETSC_NODISCARD static constexpr PetscErrorCode finalize() noexcept { return 0; }
44 };
45 /* an ID = 0 is invalid */
46 PetscInt PetscDeviceContextAllocator::PetscDeviceContextID = 1;
47 
48 static Petsc::ObjectPool<PetscDeviceContext, PetscDeviceContextAllocator> contextPool;
49 
50 /*@C
51   PetscDeviceContextCreate - Creates a PetscDeviceContext
52 
53   Not Collective, Asynchronous
54 
55   Output Paramemter:
56 . dctx - The PetscDeviceContext
57 
58   Notes:
59   Unlike almost every other PETSc class it is advised that most users use
60   PetscDeviceContextDuplicate() rather than this routine to create new contexts. Contexts
61   of different types are incompatible with one another; using
62   PetscDeviceContextDuplicate() ensures compatible types.
63 
64   Level: beginner
65 
66 .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`,
67           `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`,
68           `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextDestroy()`
69 @*/
70 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx) {
71   PetscFunctionBegin;
72   PetscValidPointer(dctx, 1);
73   PetscCall(PetscDeviceInitializePackage());
74   PetscCall(contextPool.get(*dctx));
75   PetscFunctionReturn(0);
76 }
77 
78 /*@C
79   PetscDeviceContextDestroy - Frees a PetscDeviceContext
80 
81   Not Collective, Asynchronous
82 
83   Input Parameters:
84 . dctx - The PetscDeviceContext
85 
86   Notes:
87   No implicit synchronization occurs due to this routine, all resources are released completely asynchronously
88   w.r.t. the host. If one needs to guarantee access to the data produced on this contexts stream one should perform the
89   appropriate synchronization before calling this routine.
90 
91   Developer Notes:
92   The context is never actually "destroyed", only returned to an ever growing pool of
93   contexts. There are currently no safeguards on the size of the pool, this should perhaps
94   be implemented.
95 
96   Level: beginner
97 
98 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()`
99 @*/
100 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx) {
101   PetscFunctionBegin;
102   if (!*dctx) PetscFunctionReturn(0);
103   PetscCall(contextPool.reclaim(std::move(*dctx)));
104   *dctx = nullptr;
105   PetscFunctionReturn(0);
106 }
107 
108 /*@C
109   PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a PetscDeviceContext
110 
111   Not Collective, Asynchronous
112 
113   Input Parameters:
114 + dctx - The PetscDeviceContext
115 - type - The PetscStreamType
116 
117   Notes:
118   See PetscStreamType in include/petscdevicetypes.h for more information on the available
119   types and their interactions. If the PetscDeviceContext was previously set up and stream
120   type was changed, you must call PetscDeviceContextSetUp() again after this routine.
121 
122   Level: intermediate
123 
124 .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`, `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()`
125 @*/
126 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type) {
127   PetscFunctionBegin;
128   PetscValidDeviceContext(dctx, 1);
129   PetscValidStreamType(type, 2);
130   /* only need to do complex swapping if the object has already been setup */
131   if (dctx->setup && (dctx->streamType != type)) {
132     PetscUseTypeMethod(dctx, changestreamtype, type);
133     dctx->setup = PETSC_FALSE;
134   }
135   dctx->streamType = type;
136   PetscFunctionReturn(0);
137 }
138 
139 /*@C
140   PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a PetscDeviceContext
141 
142   Not Collective, Asynchronous
143 
144   Input Parameter:
145 . dctx - The PetscDeviceContext
146 
147   Output Parameter:
148 . type - The PetscStreamType
149 
150   Notes:
151   See PetscStreamType in include/petscdevicetypes.h for more information on the available types and their interactions
152 
153   Level: intermediate
154 
155 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`, `PetscDeviceContextSetFromOptions()`
156 @*/
157 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type) {
158   PetscFunctionBegin;
159   PetscValidDeviceContext(dctx, 1);
160   PetscValidIntPointer(type, 2);
161   *type = dctx->streamType;
162   PetscFunctionReturn(0);
163 }
164 
165 /*@C
166   PetscDeviceContextSetDevice - Set the underlying device for the PetscDeviceContext
167 
168   Not Collective, Possibly Synchronous
169 
170   Input Parameters:
171 + dctx   - The PetscDeviceContext
172 - device - The PetscDevice
173 
174   Notes:
175   This routine is effectively PetscDeviceContext's "set-type" (so every PetscDeviceContext
176   must also have an attached PetscDevice). Unlike the usual set-type semantics, it is
177   not stricly necessary to set a contexts device to enable usage, any created device
178   contexts will always come equipped with the "default" device.
179 
180   This routine is a no-op if dctx is already attached to device.
181 
182   This routine may initialize the backend device and incur synchronization.
183 
184   Level: intermediate
185 
186 .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`
187 @*/
188 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device) {
189   PetscFunctionBegin;
190   PetscValidDeviceContext(dctx, 1);
191   PetscValidDevice(device, 2);
192   if (dctx->device) {
193     /* can't do a strict pointer equality check since PetscDevice's are reused */
194     if (dctx->device->ops->createcontext == device->ops->createcontext) PetscFunctionReturn(0);
195   }
196   PetscCall(PetscDeviceDestroy(&dctx->device));
197   PetscTryTypeMethod(dctx, destroy);
198   PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops)));
199   PetscCall((*device->ops->createcontext)(dctx));
200   PetscCall(PetscDeviceReference_Internal(device));
201   dctx->device = device;
202   dctx->setup  = PETSC_FALSE;
203   PetscFunctionReturn(0);
204 }
205 
206 /*@C
207   PetscDeviceContextGetDevice - Get the underlying PetscDevice for a PetscDeviceContext
208 
209   Not Collective, Asynchronous
210 
211   Input Parameter:
212 . dctx - the PetscDeviceContext
213 
214   Output Parameter:
215 . device - The PetscDevice
216 
217   Notes:
218   This is a borrowed reference, the user should not destroy the device.
219 
220   Level: intermediate
221 
222 .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`
223 @*/
224 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device) {
225   PetscFunctionBegin;
226   PetscValidDeviceContext(dctx, 1);
227   PetscValidPointer(device, 2);
228   PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt_FMT " has no attached PetscDevice to get", dctx->id);
229   *device = dctx->device;
230   PetscFunctionReturn(0);
231 }
232 
233 /*@C
234   PetscDeviceContextSetUp - Prepares a PetscDeviceContext for use
235 
236   Not Collective, Asynchronous
237 
238   Input Parameter:
239 . dctx - The PetscDeviceContext
240 
241   Developer Notes:
242   This routine is usually the stage where a PetscDeviceContext acquires device-side data structures such as streams,
243   events, and (possibly) handles.
244 
245   Level: beginner
246 
247 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()`
248 @*/
249 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx) {
250   PetscFunctionBegin;
251   PetscValidDeviceContext(dctx, 1);
252   if (!dctx->device) {
253     PetscCall(PetscInfo(nullptr, "PetscDeviceContext %" PetscInt_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", dctx->id, PetscDeviceTypes[PETSC_DEVICE_DEFAULT]));
254     PetscCall(PetscDeviceContextSetDefaultDevice_Internal(dctx));
255   }
256   if (dctx->setup) PetscFunctionReturn(0);
257   PetscUseTypeMethod(dctx, setup);
258   dctx->setup = PETSC_TRUE;
259   PetscFunctionReturn(0);
260 }
261 
262 /*@C
263   PetscDeviceContextDuplicate - Duplicates a PetscDeviceContext object
264 
265   Not Collective, Asynchronous
266 
267   Input Parameter:
268 . dctx - The PetscDeviceContext to duplicate
269 
270   Output Parameter:
271 . dctxdup - The duplicated PetscDeviceContext
272 
273   Notes:
274   This is a shorthand method for creating a PetscDeviceContext with the exact same
275   settings as another. Note however that the duplicated PetscDeviceContext does not "share"
276   any of the underlying data with the original, (including its current stream-state) they
277   are completely separate objects.
278 
279   Level: beginner
280 
281 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, `PetscDeviceContextSetStreamType()`
282 @*/
283 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup) {
284   PetscDeviceContext dup;
285 
286   PetscFunctionBegin;
287   PetscValidDeviceContext(dctx, 1);
288   PetscValidPointer(dctxdup, 2);
289   PetscCall(PetscDeviceContextCreate(&dup));
290   PetscCall(PetscDeviceContextSetStreamType(dup, dctx->streamType));
291   if (dctx->device) PetscCall(PetscDeviceContextSetDevice(dup, dctx->device));
292   PetscCall(PetscDeviceContextSetUp(dup));
293   *dctxdup = dup;
294   PetscFunctionReturn(0);
295 }
296 
297 /*@C
298   PetscDeviceContextQueryIdle - Returns whether or not a PetscDeviceContext is idle
299 
300   Not Collective, Asynchronous
301 
302   Input Parameter:
303 . dctx - The PetscDeviceContext object
304 
305   Output Parameter:
306 . idle - PETSC_TRUE if PetscDeviceContext has NO work, PETSC_FALSE if it has work
307 
308   Notes:
309   This routine only refers a singular context and does NOT take any of its children into
310   account. That is, if dctx is idle but has dependents who do have work, this routine still
311   returns PETSC_TRUE.
312 
313   Level: intermediate
314 
315 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()`
316 @*/
317 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle) {
318   PetscFunctionBegin;
319   PetscValidDeviceContext(dctx, 1);
320   PetscValidBoolPointer(idle, 2);
321   PetscUseTypeMethod(dctx, query, idle);
322   PetscCall(PetscInfo(nullptr, "PetscDeviceContext id %" PetscInt_FMT " %s idle\n", dctx->id, *idle ? "was" : "was not"));
323   PetscFunctionReturn(0);
324 }
325 
326 /*@C
327   PetscDeviceContextWaitForContext - Make one context wait for another context to finish
328 
329   Not Collective, Asynchronous
330 
331   Input Parameters:
332 + dctxa - The PetscDeviceContext object that is waiting
333 - dctxb - The PetscDeviceContext object that is being waited on
334 
335   Notes:
336   Serializes two PetscDeviceContexts. This routine uses only the state of dctxb at the moment this routine was
337   called, so any future work queued will not affect dctxa. It is safe to pass the same context to both arguments.
338 
339   Level: beginner
340 
341 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()`
342 @*/
343 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb) {
344   PetscFunctionBegin;
345   PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2);
346   if (dctxa == dctxb) PetscFunctionReturn(0);
347   PetscUseTypeMethod(dctxa, waitforcontext, dctxb);
348   PetscFunctionReturn(0);
349 }
350 
351 #define PETSC_USE_DEBUG_AND_INFO (PetscDefined(USE_DEBUG) && PetscDefined(USE_INFO))
352 #if PETSC_USE_DEBUG_AND_INFO
353 #include <string>
354 #endif
355 /*@C
356   PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
357 
358   Not Collective, Asynchronous
359 
360   Input Parameters:
361 + dctx - The parent PetscDeviceContext
362 - n    - The number of children to create
363 
364   Output Parameter:
365 . dsub - The created child context(s)
366 
367   Notes:
368   This routine creates n edges of a DAG from a source node which are causally dependent on the source node, meaning
369   that work queued on child contexts will not start until the parent context finishes its work. This accounts for work
370   queued on the parent up until calling this function, any subsequent work enqueued on the parent has no effect on the children.
371 
372   Any children created with this routine have their lifetimes bounded by the parent. That is, the parent context expects
373   to free all of it's children (and ONLY its children) before itself is freed.
374 
375   DAG representation:
376 .vb
377   time ->
378 
379   -> dctx \----> dctx ------>
380            \---> dsub[0] --->
381             \--> ... ------->
382              \-> dsub[n-1] ->
383 .ve
384 
385   Level: intermediate
386 
387 .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()`
388 @*/
389 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub) {
390 #if PETSC_USE_DEBUG_AND_INFO
391   const PetscInt     nBefore = n;
392   static std::string idList;
393 #endif
394   PetscDeviceContext *dsubTmp = nullptr;
395   PetscInt            i       = 0;
396 
397   PetscFunctionBegin;
398   PetscValidDeviceContext(dctx, 1);
399   PetscValidPointer(dsub, 3);
400   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n);
401 #if PETSC_USE_DEBUG_AND_INFO
402   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
403   idList.reserve(4 * n);
404 #endif
405   /* update child totals */
406   dctx->numChildren += n;
407   /* now to find out if we have room */
408   if (dctx->numChildren > dctx->maxNumChildren) {
409     /* no room, either from having too many kids or not having any */
410     if (dctx->childIDs) {
411       /* have existing children, must reallocate them */
412       PetscCall(PetscRealloc(dctx->numChildren * sizeof(*dctx->childIDs), &dctx->childIDs));
413       /* clear the extra memory since realloc doesn't do it for us */
414       PetscCall(PetscArrayzero((dctx->childIDs) + (dctx->maxNumChildren), (dctx->numChildren) - (dctx->maxNumChildren)));
415     } else {
416       /* have no children */
417       PetscCall(PetscCalloc1(dctx->numChildren, &dctx->childIDs));
418     }
419     /* update total number of children */
420     dctx->maxNumChildren = dctx->numChildren;
421   }
422   PetscCall(PetscMalloc1(n, &dsubTmp));
423   while (n) {
424     /* empty child slot */
425     if (!(dctx->childIDs[i])) {
426       /* create the child context in the image of its parent */
427       PetscCall(PetscDeviceContextDuplicate(dctx, dsubTmp + i));
428       PetscCall(PetscDeviceContextWaitForContext(dsubTmp[i], dctx));
429       /* register the child with its parent */
430       dctx->childIDs[i] = dsubTmp[i]->id;
431 #if PETSC_USE_DEBUG_AND_INFO
432       idList += std::to_string(dsubTmp[i]->id);
433       if (n != 1) idList += ", ";
434 #endif
435       --n;
436     }
437     ++i;
438   }
439 #if PETSC_USE_DEBUG_AND_INFO
440   PetscCall(PetscInfo(nullptr, "Forked %" PetscInt_FMT " children from parent %" PetscInt_FMT " with IDs: %s\n", nBefore, dctx->id, idList.c_str()));
441   /* resets the size but doesn't deallocate the memory */
442   idList.clear();
443 #endif
444   /* pass the children back to caller */
445   *dsub = dsubTmp;
446   PetscFunctionReturn(0);
447 }
448 
449 /*@C
450   PetscDeviceContextJoin - Converge a set of child contexts
451 
452   Not Collective, Asynchronous
453 
454   Input Parameters:
455 + dctx         - A PetscDeviceContext to converge on
456 . n            - The number of sub contexts to converge
457 . joinMode     - The type of join to perform
458 - dsub         - The sub contexts to converge
459 
460   Notes:
461   If PetscDeviceContextFork() creates n edges from a source node which all depend on the
462   source node, then this routine is the exact mirror. That is, it creates a node
463   (represented in dctx) which recieves n edges (and optionally destroys them) which is
464   dependent on the completion of all incoming edges.
465 
466   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY all contexts in dsub will be destroyed
467   by this routine. Thus all sub contexts must have been created with the dctx passed to
468   this routine.
469 
470   if joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC dctx waits for all sub contexts but the
471   sub contexts do not wait for one another afterwards.
472 
473   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC all sub contexts will additionally
474   wait on dctx after converging. This has the effect of "synchronizing" the outgoing
475   edges.
476 
477   DAG representations:
478   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY
479 .vb
480   time ->
481 
482   -> dctx ---------/- dctx ->
483   -> dsub[0] -----/
484   ->  ... -------/
485   -> dsub[n-1] -/
486 .ve
487   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC
488 .vb
489   time ->
490 
491   -> dctx ---------/- dctx ->
492   -> dsub[0] -----/--------->
493   ->  ... -------/---------->
494   -> dsub[n-1] -/----------->
495 .ve
496   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC
497 .vb
498   time ->
499 
500   -> dctx ---------/- dctx -\----> dctx ------>
501   -> dsub[0] -----/          \---> dsub[0] --->
502   ->  ... -------/            \--> ... ------->
503   -> dsub[n-1] -/              \-> dsub[n-1] ->
504 .ve
505 
506   Level: intermediate
507 
508 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode`
509 @*/
510 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub) {
511 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
512   static std::string idList;
513 #endif
514 
515   PetscFunctionBegin;
516   /* validity of dctx is checked in the wait-for loop */
517   PetscValidPointer(dsub, 4);
518   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n);
519 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
520   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
521   idList.reserve(4 * n);
522 #endif
523   /* first dctx waits on all the incoming edges */
524   for (PetscInt i = 0; i < n; ++i) {
525     PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4);
526     PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i]));
527 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
528     idList += std::to_string((*dsub)[i]->id);
529     if (i + 1 < n) idList += ", ";
530 #endif
531   }
532 
533   /* now we handle the aftermath */
534   switch (joinMode) {
535   case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: {
536     PetscInt j = 0;
537 
538     PetscAssert(n <= dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, dctx->numChildren);
539     /* update child count while it's still fresh in memory */
540     dctx->numChildren -= n;
541     for (PetscInt i = 0; i < dctx->maxNumChildren; ++i) {
542       if (dctx->childIDs[i] && (dctx->childIDs[i] == (*dsub)[j]->id)) {
543         /* child is one of ours, can destroy it */
544         PetscCall(PetscDeviceContextDestroy((*dsub) + j));
545         /* reset the child slot */
546         dctx->childIDs[i] = 0;
547         if (++j == n) break;
548       }
549     }
550     /* gone through the loop but did not find every child, if this triggers (or well, doesn't) on perf-builds we leak the remaining contexts memory */
551     PetscAssert(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in.", n - j);
552     PetscCall(PetscFree(*dsub));
553   } break;
554   case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
555     for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx));
556   case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC: break;
557   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given");
558   }
559 
560 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
561   PetscCall(PetscInfo(nullptr, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt_FMT ", mode %s with IDs: %s\n", n, dctx->id, PetscDeviceContextJoinModes[joinMode], idList.c_str()));
562   idList.clear();
563 #endif
564   PetscFunctionReturn(0);
565 }
566 
567 /*@C
568   PetscDeviceContextSynchronize - Block the host until all work queued on or associated with a PetscDeviceContext has finished
569 
570   Not Collective, Synchronous
571 
572   Input Parameters:
573 . dctx - The PetscDeviceContext to synchronize
574 
575   Level: beginner
576 
577 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()`
578 @*/
579 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx) {
580   PetscFunctionBegin;
581   PetscValidDeviceContext(dctx, 1);
582   /* if it isn't setup there is nothing to sync on */
583   if (dctx->setup) PetscUseTypeMethod(dctx, synchronize);
584   PetscFunctionReturn(0);
585 }
586 
587 #define PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE PETSC_DEVICE_DEFAULT
588 // REMOVE ME (change)
589 #define PETSC_DEVICE_CONTEXT_DEFAULT_STREAM PETSC_STREAM_GLOBAL_BLOCKING
590 
591 static PetscDeviceType    rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE;
592 static PetscStreamType    rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM;
593 static PetscDeviceContext globalContext  = nullptr;
594 
595 /* when PetsDevice initializes PetscDeviceContext eagerly the type of device created should
596  * match whatever device is eagerly intialized */
597 PetscErrorCode PetscDeviceContextSetRootDeviceType_Internal(PetscDeviceType type) {
598   PetscFunctionBegin;
599   PetscValidDeviceType(type, 1);
600   rootDeviceType = type;
601   PetscFunctionReturn(0);
602 }
603 
604 #if 0
605 /* currently unused */
606 PetscErrorCode PetscDeviceContextSetRootStreamType_Internal(PetscStreamType type)
607 {
608   PetscFunctionBegin;
609   PetscValidStreamType(type,1);
610   rootStreamType = type;
611   PetscFunctionReturn(0);
612 }
613 #endif
614 
615 static PetscErrorCode PetscDeviceContextSetupGlobalContext_Private(void) {
616   static const auto PetscDeviceContextFinalizer = []() -> PetscErrorCode {
617     PetscFunctionBegin;
618     PetscCall(PetscDeviceContextDestroy(&globalContext));
619     rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE;
620     rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM;
621     PetscFunctionReturn(0);
622   };
623 
624   PetscFunctionBegin;
625   if (globalContext) PetscFunctionReturn(0);
626   /* this exists purely as a valid device check. */
627   PetscCall(PetscDeviceInitializePackage());
628   PetscCall(PetscRegisterFinalize(PetscDeviceContextFinalizer));
629   PetscCall(PetscInfo(nullptr, "Initializing global PetscDeviceContext\n"));
630   /* we call the allocator directly here since the ObjectPool creates a PetscContainer which
631    * eventually tries to call logging functions. However, this routine may be purposefully
632    * called __before__ logging is initialized, so the logging function would PETSCABORT */
633   PetscCall(contextPool.allocator().create(&globalContext));
634   PetscCall(PetscDeviceContextSetStreamType(globalContext, rootStreamType));
635   PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(globalContext, rootDeviceType));
636   PetscCall(PetscDeviceContextSetUp(globalContext));
637   PetscFunctionReturn(0);
638 }
639 
640 /*@C
641   PetscDeviceContextGetCurrentContext - Get the current active PetscDeviceContext
642 
643   Not Collective, Asynchronous
644 
645   Output Parameter:
646 . dctx - The PetscDeviceContext
647 
648   Notes:
649   The user generally should not destroy contexts retrieved with this routine unless they
650   themselves have created them. There exists no protection against destroying the root
651   context.
652 
653   Developer Notes:
654   Unless the user has set their own, this routine creates the "root" context the first time it
655   is called, registering its destructor to PetscFinalize().
656 
657   Level: beginner
658 
659 .seealso: `PetscDeviceContextSetCurrentContext()`, `PetscDeviceContextFork()`,
660           `PetscDeviceContextJoin()`, `PetscDeviceContextCreate()`
661 @*/
662 PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *dctx) {
663   PetscFunctionBegin;
664   PetscValidPointer(dctx, 1);
665   PetscCall(PetscDeviceContextSetupGlobalContext_Private());
666   /* while the static analyzer can find global variables, it will throw a warning about not
667    * being able to connect this back to the function arguments */
668   PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscValidDeviceContext(globalContext, -1));
669   *dctx = globalContext;
670   PetscFunctionReturn(0);
671 }
672 
673 /*@C
674   PetscDeviceContextSetCurrentContext - Set the current active PetscDeviceContext
675 
676   Not Collective, Asynchronous
677 
678   Input Parameter:
679 . dctx - The PetscDeviceContext
680 
681   Notes:
682   This routine can be used to set the defacto "root" PetscDeviceContext to a user-defined
683   implementation by calling this routine immediately after PetscInitialize() and ensuring that
684   PetscDevice is not greedily intialized. In this case the user is responsible for destroying
685   their PetscDeviceContext before PetscFinalize() returns.
686 
687   The old context is not stored in any way by this routine; if one is overriding a context that
688   they themselves do not control, one should take care to temporarily store it by calling
689   PetscDeviceContextGetCurrentContext() before calling this routine.
690 
691   Level: beginner
692 
693 .seealso: `PetscDeviceContextGetCurrentContext()`, `PetscDeviceContextFork()`,
694           `PetscDeviceContextJoin()`, `PetscDeviceContextCreate()`
695 @*/
696 PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext dctx) {
697   PetscFunctionBegin;
698   PetscValidDeviceContext(dctx, 1);
699   PetscAssert(dctx->setup, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt_FMT " must be set up before being set as global context", dctx->id);
700   globalContext = dctx;
701   PetscCall(PetscInfo(nullptr, "Set global PetscDeviceContext id %" PetscInt_FMT "\n", dctx->id));
702   PetscFunctionReturn(0);
703 }
704 
705 /*@C
706   PetscDeviceContextSetFromOptions - Configure a PetscDeviceContext from the options database
707 
708   Collective on comm, Asynchronous
709 
710   Input Parameters:
711 + comm   - MPI communicator on which to query the options database
712 . prefix - prefix to prepend to all options database queries, NULL if not needed
713 - dctx   - The PetscDeviceContext to configure
714 
715   Output Parameter:
716 . dctx - The PetscDeviceContext
717 
718   Options Database:
719 + -device_context_stream_type - type of stream to create inside the PetscDeviceContext -
720    PetscDeviceContextSetStreamType()
721 - -device_context_device_type - the type of PetscDevice to attach by default - PetscDeviceType
722 
723   Level: beginner
724 
725 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`
726 @*/
727 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, const char prefix[], PetscDeviceContext dctx) {
728   PetscBool flag;
729   PetscInt  stype, dtype;
730 
731   PetscFunctionBegin;
732   if (prefix) PetscValidCharPointer(prefix, 2);
733   PetscValidDeviceContext(dctx, 3);
734   PetscOptionsBegin(comm, prefix, "PetscDeviceContext Options", "Sys");
735   PetscCall(PetscOptionsEList("-device_context_stream_type", "PetscDeviceContext PetscStreamType", "PetscDeviceContextSetStreamType", PetscStreamTypes, PETSC_STREAM_MAX, PetscStreamTypes[dctx->streamType], &stype, &flag));
736   if (flag) PetscCall(PetscDeviceContextSetStreamType(dctx, static_cast<PetscStreamType>(stype)));
737   PetscCall(PetscOptionsEList("-device_context_device_type", "Underlying PetscDevice", "PetscDeviceContextSetDevice", PetscDeviceTypes + 1, PETSC_DEVICE_MAX - 1, dctx->device ? PetscDeviceTypes[dctx->device->type] : PetscDeviceTypes[PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE], &dtype, &flag));
738   if (flag) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, static_cast<PetscDeviceType>(dtype + 1)));
739   PetscOptionsEnd();
740   PetscFunctionReturn(0);
741 }
742