xref: /petsc/src/sys/objects/device/interface/dcontext.cxx (revision c9cc58a286c8c88e832fec383b23499de67c4274)
1 #include <petsc/private/deviceimpl.h> /*I "petscdevice.h" I*/
2 #include "objpool.hpp"
3 
4 const char *const PetscStreamTypes[] = {
5   "global_blocking",
6   "default_blocking",
7   "global_nonblocking",
8   "max",
9   "PetscStreamType",
10   "PETSC_STREAM_",
11   nullptr
12 };
13 
14 const char *const PetscDeviceContextJoinModes[] = {
15   "destroy",
16   "sync",
17   "no_sync",
18   "PetscDeviceContextJoinMode",
19   "PETSC_DEVICE_CONTEXT_JOIN_",
20   nullptr
21 };
22 
23 /* Define the allocator */
24 struct PetscDeviceContextAllocator : Petsc::AllocatorBase<PetscDeviceContext>
25 {
26   static PetscInt PetscDeviceContextID;
27 
28   PETSC_NODISCARD static PetscErrorCode create(PetscDeviceContext *dctx) noexcept
29   {
30     PetscDeviceContext dc;
31 
32     PetscFunctionBegin;
33     PetscCall(PetscNew(&dc));
34     dc->id         = PetscDeviceContextID++;
35     dc->streamType = PETSC_STREAM_DEFAULT_BLOCKING;
36     *dctx          = dc;
37     PetscFunctionReturn(0);
38   }
39 
40   PETSC_NODISCARD static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept
41   {
42     PetscFunctionBegin;
43     PetscAssert(!dctx->numChildren,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying",dctx->numChildren);
44     if (dctx->ops->destroy) PetscCall((*dctx->ops->destroy)(dctx));
45     PetscCall(PetscDeviceDestroy(&dctx->device));
46     PetscCall(PetscFree(dctx->childIDs));
47     PetscCall(PetscFree(dctx));
48     PetscFunctionReturn(0);
49   }
50 
51   PETSC_NODISCARD static PetscErrorCode reset(PetscDeviceContext dctx) noexcept
52   {
53     PetscFunctionBegin;
54     /* don't deallocate the child array, rather just zero it out */
55     PetscCall(PetscArrayzero(dctx->childIDs,dctx->maxNumChildren));
56     dctx->setup       = PETSC_FALSE;
57     dctx->numChildren = 0;
58     dctx->streamType  = PETSC_STREAM_DEFAULT_BLOCKING;
59     PetscFunctionReturn(0);
60   }
61 
62   PETSC_NODISCARD static constexpr PetscErrorCode finalize() noexcept { return 0; }
63 };
64 /* an ID = 0 is invalid */
65 PetscInt PetscDeviceContextAllocator::PetscDeviceContextID = 1;
66 
67 static Petsc::ObjectPool<PetscDeviceContext,PetscDeviceContextAllocator> contextPool;
68 
69 /*@C
70   PetscDeviceContextCreate - Creates a PetscDeviceContext
71 
72   Not Collective, Asynchronous
73 
74   Output Paramemter:
75 . dctx - The PetscDeviceContext
76 
77   Notes:
78   Unlike almost every other PETSc class it is advised that most users use
79   PetscDeviceContextDuplicate() rather than this routine to create new contexts. Contexts
80   of different types are incompatible with one another; using
81   PetscDeviceContextDuplicate() ensures compatible types.
82 
83   Level: beginner
84 
85 .seealso: PetscDeviceContextDuplicate(), PetscDeviceContextSetDevice(),
86 PetscDeviceContextSetStreamType(), PetscDeviceContextSetUp(),
87 PetscDeviceContextSetFromOptions(), PetscDeviceContextDestroy()
88 @*/
89 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
90 {
91   PetscFunctionBegin;
92   PetscValidPointer(dctx,1);
93   PetscCall(PetscDeviceInitializePackage());
94   PetscCall(contextPool.get(*dctx));
95   PetscFunctionReturn(0);
96 }
97 
98 /*@C
99   PetscDeviceContextDestroy - Frees a PetscDeviceContext
100 
101   Not Collective, Asynchronous
102 
103   Input Parameters:
104 . dctx - The PetscDeviceContext
105 
106   Notes:
107   No implicit synchronization occurs due to this routine, all resources are released completely asynchronously
108   w.r.t. the host. If one needs to guarantee access to the data produced on this contexts stream one should perform the
109   appropriate synchronization before calling this routine.
110 
111   Developer Notes:
112   The context is never actually "destroyed", only returned to an ever growing pool of
113   contexts. There are currently no safeguards on the size of the pool, this should perhaps
114   be implemented.
115 
116   Level: beginner
117 
118 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetUp(), PetscDeviceContextSynchronize()
119 @*/
120 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
121 {
122   PetscFunctionBegin;
123   if (!*dctx) PetscFunctionReturn(0);
124   PetscCall(contextPool.reclaim(std::move(*dctx)));
125   *dctx = nullptr;
126   PetscFunctionReturn(0);
127 }
128 
129 /*@C
130   PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a PetscDeviceContext
131 
132   Not Collective, Asynchronous
133 
134   Input Parameters:
135 + dctx - The PetscDeviceContext
136 - type - The PetscStreamType
137 
138   Notes:
139   See PetscStreamType in include/petscdevicetypes.h for more information on the available
140   types and their interactions. If the PetscDeviceContext was previously set up and stream
141   type was changed, you must call PetscDeviceContextSetUp() again after this routine.
142 
143   Level: intermediate
144 
145 .seealso: PetscStreamType, PetscDeviceContextGetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetUp(), PetscDeviceContextSetFromOptions()
146 @*/
147 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
148 {
149   PetscFunctionBegin;
150   PetscValidDeviceContext(dctx,1);
151   PetscValidStreamType(type,2);
152   /* only need to do complex swapping if the object has already been setup */
153   if (dctx->setup && (dctx->streamType != type)) {
154     PetscCall((*dctx->ops->changestreamtype)(dctx,type));
155     dctx->setup = PETSC_FALSE;
156   }
157   dctx->streamType = type;
158   PetscFunctionReturn(0);
159 }
160 
161 /*@C
162   PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a PetscDeviceContext
163 
164   Not Collective, Asynchronous
165 
166   Input Parameter:
167 . dctx - The PetscDeviceContext
168 
169   Output Parameter:
170 . type - The PetscStreamType
171 
172   Notes:
173   See PetscStreamType in include/petscdevicetypes.h for more information on the available types and their interactions
174 
175   Level: intermediate
176 
177 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetFromOptions()
178 @*/
179 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
180 {
181   PetscFunctionBegin;
182   PetscValidDeviceContext(dctx,1);
183   PetscValidIntPointer(type,2);
184   *type = dctx->streamType;
185   PetscFunctionReturn(0);
186 }
187 
188 /*@C
189   PetscDeviceContextSetDevice - Set the underlying device for the PetscDeviceContext
190 
191   Not Collective, Possibly Synchronous
192 
193   Input Parameters:
194 + dctx   - The PetscDeviceContext
195 - device - The PetscDevice
196 
197   Notes:
198   This routine is effectively PetscDeviceContext's "set-type" (so every PetscDeviceContext
199   must also have an attached PetscDevice). Unlike the usual set-type semantics, it is
200   not stricly necessary to set a contexts device to enable usage, any created device
201   contexts will always come equipped with the "default" device.
202 
203   This routine is a no-op if dctx is already attached to device.
204 
205   This routine may initialize the backend device and incur synchronization.
206 
207   Level: intermediate
208 
209 .seealso: PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceContextGetDevice()
210 @*/
211 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
212 {
213   PetscFunctionBegin;
214   PetscValidDeviceContext(dctx,1);
215   PetscValidDevice(device,2);
216   if (dctx->device) {
217     /* can't do a strict pointer equality check since PetscDevice's are reused */
218     if (dctx->device->ops->createcontext == device->ops->createcontext) PetscFunctionReturn(0);
219   }
220   PetscCall(PetscDeviceDestroy(&dctx->device));
221   if (dctx->ops->destroy) PetscCall((*dctx->ops->destroy)(dctx));
222   PetscCall(PetscMemzero(dctx->ops,sizeof(*dctx->ops)));
223   PetscCall((*device->ops->createcontext)(dctx));
224   PetscCall(PetscDeviceReference_Internal(device));
225   dctx->device = device;
226   dctx->setup  = PETSC_FALSE;
227   PetscFunctionReturn(0);
228 }
229 
230 /*@C
231   PetscDeviceContextGetDevice - Get the underlying PetscDevice for a PetscDeviceContext
232 
233   Not Collective, Asynchronous
234 
235   Input Parameter:
236 . dctx - the PetscDeviceContext
237 
238   Output Parameter:
239 . device - The PetscDevice
240 
241   Notes:
242   This is a borrowed reference, the user should not destroy the device.
243 
244   Level: intermediate
245 
246 .seealso: PetscDeviceContextSetDevice(), PetscDevice
247 @*/
248 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
249 {
250   PetscFunctionBegin;
251   PetscValidDeviceContext(dctx,1);
252   PetscValidPointer(device,2);
253   PetscAssert(dctx->device,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PetscDeviceContext %" PetscInt_FMT " has no attached PetscDevice to get",dctx->id);
254   *device = dctx->device;
255   PetscFunctionReturn(0);
256 }
257 
258 /*@C
259   PetscDeviceContextSetUp - Prepares a PetscDeviceContext for use
260 
261   Not Collective, Asynchronous
262 
263   Input Parameter:
264 . dctx - The PetscDeviceContext
265 
266   Developer Notes:
267   This routine is usually the stage where a PetscDeviceContext acquires device-side data structures such as streams,
268   events, and (possibly) handles.
269 
270   Level: beginner
271 
272 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextDestroy(), PetscDeviceContextSetFromOptions()
273 @*/
274 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
275 {
276   PetscFunctionBegin;
277   PetscValidDeviceContext(dctx,1);
278   if (!dctx->device) {
279     PetscCall(PetscInfo(nullptr,"PetscDeviceContext %" PetscInt_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n",dctx->id,PetscDeviceTypes[PETSC_DEVICE_DEFAULT]));
280     PetscCall(PetscDeviceContextSetDefaultDevice_Internal(dctx));
281   }
282   if (dctx->setup) PetscFunctionReturn(0);
283   PetscCall((*dctx->ops->setup)(dctx));
284   dctx->setup = PETSC_TRUE;
285   PetscFunctionReturn(0);
286 }
287 
288 /*@C
289   PetscDeviceContextDuplicate - Duplicates a PetscDeviceContext object
290 
291   Not Collective, Asynchronous
292 
293   Input Parameter:
294 . dctx - The PetscDeviceContext to duplicate
295 
296   Output Paramter:
297 . dctxdup - The duplicated PetscDeviceContext
298 
299   Notes:
300   This is a shorthand method for creating a PetscDeviceContext with the exact same
301   settings as another. Note however that the duplicated PetscDeviceContext does not "share"
302   any of the underlying data with the original, (including its current stream-state) they
303   are completely separate objects.
304 
305   Level: beginner
306 
307 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetStreamType()
308 @*/
309 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
310 {
311   PetscDeviceContext dup;
312 
313   PetscFunctionBegin;
314   PetscValidDeviceContext(dctx,1);
315   PetscValidPointer(dctxdup,2);
316   PetscCall(PetscDeviceContextCreate(&dup));
317   PetscCall(PetscDeviceContextSetStreamType(dup,dctx->streamType));
318   if (dctx->device) PetscCall(PetscDeviceContextSetDevice(dup,dctx->device));
319   PetscCall(PetscDeviceContextSetUp(dup));
320   *dctxdup = dup;
321   PetscFunctionReturn(0);
322 }
323 
324 /*@C
325   PetscDeviceContextQueryIdle - Returns whether or not a PetscDeviceContext is idle
326 
327   Not Collective, Asynchronous
328 
329   Input Parameter:
330 . dctx - The PetscDeviceContext object
331 
332   Output Parameter:
333 . idle - PETSC_TRUE if PetscDeviceContext has NO work, PETSC_FALSE if it has work
334 
335   Notes:
336   This routine only refers a singular context and does NOT take any of its children into
337   account. That is, if dctx is idle but has dependents who do have work, this routine still
338   returns PETSC_TRUE.
339 
340   Level: intermediate
341 
342 .seealso: PetscDeviceContextCreate(), PetscDeviceContextWaitForContext(), PetscDeviceContextFork()
343 @*/
344 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
345 {
346   PetscFunctionBegin;
347   PetscValidDeviceContext(dctx,1);
348   PetscValidBoolPointer(idle,2);
349   PetscCall((*dctx->ops->query)(dctx,idle));
350   PetscCall(PetscInfo(nullptr,"PetscDeviceContext id %" PetscInt_FMT " %s idle\n",dctx->id,*idle ? "was" : "was not"));
351   PetscFunctionReturn(0);
352 }
353 
354 /*@C
355   PetscDeviceContextWaitForContext - Make one context wait for another context to finish
356 
357   Not Collective, Asynchronous
358 
359   Input Parameters:
360 + dctxa - The PetscDeviceContext object that is waiting
361 - dctxb - The PetscDeviceContext object that is being waited on
362 
363   Notes:
364   Serializes two PetscDeviceContexts. This routine uses only the state of dctxb at the moment this routine was
365   called, so any future work queued will not affect dctxa. It is safe to pass the same context to both arguments.
366 
367   Level: beginner
368 
369 .seealso: PetscDeviceContextCreate(), PetscDeviceContextQueryIdle(), PetscDeviceContextJoin()
370 @*/
371 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
372 {
373   PetscFunctionBegin;
374   PetscCheckCompatibleDeviceContexts(dctxa,1,dctxb,2);
375   if (dctxa == dctxb) PetscFunctionReturn(0);
376   PetscCall((*dctxa->ops->waitforcontext)(dctxa,dctxb));
377   PetscFunctionReturn(0);
378 }
379 
380 #define PETSC_USE_DEBUG_AND_INFO (PetscDefined(USE_DEBUG) && PetscDefined(USE_INFO))
381 #if PETSC_USE_DEBUG_AND_INFO
382 #include <string>
383 #endif
384 /*@C
385   PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
386 
387   Not Collective, Asynchronous
388 
389   Input Parameters:
390 + dctx - The parent PetscDeviceContext
391 - n    - The number of children to create
392 
393   Output Parameter:
394 . dsub - The created child context(s)
395 
396   Notes:
397   This routine creates n edges of a DAG from a source node which are causally dependent on the source node, meaning
398   that work queued on child contexts will not start until the parent context finishes its work. This accounts for work
399   queued on the parent up until calling this function, any subsequent work enqueued on the parent has no effect on the children.
400 
401   Any children created with this routine have their lifetimes bounded by the parent. That is, the parent context expects
402   to free all of it's children (and ONLY its children) before itself is freed.
403 
404   DAG representation:
405 .vb
406   time ->
407 
408   -> dctx \----> dctx ------>
409            \---> dsub[0] --->
410             \--> ... ------->
411              \-> dsub[n-1] ->
412 .ve
413 
414   Level: intermediate
415 
416 .seealso: PetscDeviceContextJoin(), PetscDeviceContextSynchronize(), PetscDeviceContextQueryIdle()
417 @*/
418 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
419 {
420 #if PETSC_USE_DEBUG_AND_INFO
421   const PetscInt      nBefore = n;
422   static std::string  idList;
423 #endif
424   PetscDeviceContext *dsubTmp = nullptr;
425   PetscInt            i = 0;
426 
427   PetscFunctionBegin;
428   PetscValidDeviceContext(dctx,1);
429   PetscValidPointer(dsub,3);
430   PetscAssert(n >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts requested %" PetscInt_FMT " < 0",n);
431 #if PETSC_USE_DEBUG_AND_INFO
432   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
433   idList.reserve(4*n);
434 #endif
435   /* update child totals */
436   dctx->numChildren += n;
437   /* now to find out if we have room */
438   if (dctx->numChildren > dctx->maxNumChildren) {
439     /* no room, either from having too many kids or not having any */
440     if (dctx->childIDs) {
441       /* have existing children, must reallocate them */
442       PetscCall(PetscRealloc(dctx->numChildren*sizeof(*dctx->childIDs),&dctx->childIDs));
443       /* clear the extra memory since realloc doesn't do it for us */
444       PetscCall(PetscArrayzero((dctx->childIDs)+(dctx->maxNumChildren),(dctx->numChildren)-(dctx->maxNumChildren)));
445     } else {
446       /* have no children */
447       PetscCall(PetscCalloc1(dctx->numChildren,&dctx->childIDs));
448     }
449     /* update total number of children */
450     dctx->maxNumChildren = dctx->numChildren;
451   }
452   PetscCall(PetscMalloc1(n,&dsubTmp));
453   while (n) {
454     /* empty child slot */
455     if (!(dctx->childIDs[i])) {
456       /* create the child context in the image of its parent */
457       PetscCall(PetscDeviceContextDuplicate(dctx,dsubTmp+i));
458       PetscCall(PetscDeviceContextWaitForContext(dsubTmp[i],dctx));
459       /* register the child with its parent */
460       dctx->childIDs[i] = dsubTmp[i]->id;
461 #if PETSC_USE_DEBUG_AND_INFO
462       idList += std::to_string(dsubTmp[i]->id);
463       if (n != 1) idList += ", ";
464 #endif
465       --n;
466     }
467     ++i;
468   }
469 #if PETSC_USE_DEBUG_AND_INFO
470   PetscCall(PetscInfo(nullptr,"Forked %" PetscInt_FMT " children from parent %" PetscInt_FMT " with IDs: %s\n",nBefore,dctx->id,idList.c_str()));
471   /* resets the size but doesn't deallocate the memory */
472   idList.clear();
473 #endif
474   /* pass the children back to caller */
475   *dsub = dsubTmp;
476   PetscFunctionReturn(0);
477 }
478 
479 /*@C
480   PetscDeviceContextJoin - Converge a set of child contexts
481 
482   Not Collective, Asynchronous
483 
484   Input Parameters:
485 + dctx         - A PetscDeviceContext to converge on
486 . n            - The number of sub contexts to converge
487 . joinMode     - The type of join to perform
488 - dsub         - The sub contexts to converge
489 
490   Notes:
491   If PetscDeviceContextFork() creates n edges from a source node which all depend on the
492   source node, then this routine is the exact mirror. That is, it creates a node
493   (represented in dctx) which recieves n edges (and optionally destroys them) which is
494   dependent on the completion of all incoming edges.
495 
496   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY all contexts in dsub will be destroyed
497   by this routine. Thus all sub contexts must have been created with the dctx passed to
498   this routine.
499 
500   if joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC dctx waits for all sub contexts but the
501   sub contexts do not wait for one another afterwards.
502 
503   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC all sub contexts will additionally
504   wait on dctx after converging. This has the effect of "synchronizing" the outgoing
505   edges.
506 
507   DAG representations:
508   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY
509 .vb
510   time ->
511 
512   -> dctx ---------/- dctx ->
513   -> dsub[0] -----/
514   ->  ... -------/
515   -> dsub[n-1] -/
516 .ve
517   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC
518 .vb
519   time ->
520 
521   -> dctx ---------/- dctx ->
522   -> dsub[0] -----/--------->
523   ->  ... -------/---------->
524   -> dsub[n-1] -/----------->
525 .ve
526   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC
527 .vb
528   time ->
529 
530   -> dctx ---------/- dctx -\----> dctx ------>
531   -> dsub[0] -----/          \---> dsub[0] --->
532   ->  ... -------/            \--> ... ------->
533   -> dsub[n-1] -/              \-> dsub[n-1] ->
534 .ve
535 
536   Level: intermediate
537 
538 .seealso: PetscDeviceContextFork(), PetscDeviceContextSynchronize(), PetscDeviceContextJoinMode
539 @*/
540 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
541 {
542 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
543   static std::string idList;
544 #endif
545 
546   PetscFunctionBegin;
547   /* validity of dctx is checked in the wait-for loop */
548   PetscValidPointer(dsub,4);
549   PetscAssert(n >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts merged %" PetscInt_FMT " < 0",n);
550 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
551   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
552   idList.reserve(4*n);
553 #endif
554   /* first dctx waits on all the incoming edges */
555   for (PetscInt i = 0; i < n; ++i) {
556     PetscCheckCompatibleDeviceContexts(dctx,1,(*dsub)[i],4);
557     PetscCall(PetscDeviceContextWaitForContext(dctx,(*dsub)[i]));
558 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
559     idList += std::to_string((*dsub)[i]->id);
560     if (i+1 < n) idList += ", ";
561 #endif
562   }
563 
564   /* now we handle the aftermath */
565   switch (joinMode) {
566   case PETSC_DEVICE_CONTEXT_JOIN_DESTROY:
567     {
568       PetscInt j = 0;
569 
570       PetscAssert(n <= dctx->numChildren,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent",n,dctx->numChildren);
571       /* update child count while it's still fresh in memory */
572       dctx->numChildren -= n;
573       for (PetscInt i = 0; i < dctx->maxNumChildren; ++i) {
574         if (dctx->childIDs[i] && (dctx->childIDs[i] == (*dsub)[j]->id)) {
575           /* child is one of ours, can destroy it */
576           PetscCall(PetscDeviceContextDestroy((*dsub)+j));
577           /* reset the child slot */
578           dctx->childIDs[i] = 0;
579           if (++j == n) break;
580         }
581       }
582       /* gone through the loop but did not find every child, if this triggers (or well, doesn't) on perf-builds we leak the remaining contexts memory */
583       PetscAssert(j == n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in.",n-j);
584       PetscCall(PetscFree(*dsub));
585     }
586     break;
587   case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
588     for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i],dctx));
589   case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
590     break;
591   default:
592     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown PetscDeviceContextJoinMode given");
593   }
594 
595 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
596   PetscCall(PetscInfo(nullptr,"Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt_FMT ", mode %s with IDs: %s\n",n,dctx->id,PetscDeviceContextJoinModes[joinMode],idList.c_str()));
597   idList.clear();
598 #endif
599   PetscFunctionReturn(0);
600 }
601 
602 /*@C
603   PetscDeviceContextSynchronize - Block the host until all work queued on or associated with a PetscDeviceContext has finished
604 
605   Not Collective, Synchronous
606 
607   Input Parameters:
608 . dctx - The PetscDeviceContext to synchronize
609 
610   Level: beginner
611 
612 .seealso: PetscDeviceContextFork(), PetscDeviceContextJoin(), PetscDeviceContextQueryIdle()
613 @*/
614 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
615 {
616   PetscFunctionBegin;
617   PetscValidDeviceContext(dctx,1);
618   /* if it isn't setup there is nothing to sync on */
619   if (dctx->setup) PetscCall((*dctx->ops->synchronize)(dctx));
620   PetscFunctionReturn(0);
621 }
622 
623 #define PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE PETSC_DEVICE_DEFAULT
624 // REMOVE ME (change)
625 #define PETSC_DEVICE_CONTEXT_DEFAULT_STREAM PETSC_STREAM_GLOBAL_BLOCKING
626 
627 static PetscDeviceType    rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE;
628 static PetscStreamType    rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM;
629 static PetscDeviceContext globalContext  = nullptr;
630 
631 /* when PetsDevice initializes PetscDeviceContext eagerly the type of device created should
632  * match whatever device is eagerly intialized */
633 PetscErrorCode PetscDeviceContextSetRootDeviceType_Internal(PetscDeviceType type)
634 {
635   PetscFunctionBegin;
636   PetscValidDeviceType(type,1);
637   rootDeviceType = type;
638   PetscFunctionReturn(0);
639 }
640 
641 #if 0
642 /* currently unused */
643 PetscErrorCode PetscDeviceContextSetRootStreamType_Internal(PetscStreamType type)
644 {
645   PetscFunctionBegin;
646   PetscValidStreamType(type,1);
647   rootStreamType = type;
648   PetscFunctionReturn(0);
649 }
650 #endif
651 
652 static PetscErrorCode PetscDeviceContextSetupGlobalContext_Private(void)
653 {
654   static const auto PetscDeviceContextFinalizer = []() -> PetscErrorCode {
655 
656     PetscFunctionBegin;
657     PetscCall(PetscDeviceContextDestroy(&globalContext));
658     rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE;
659     rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM;
660     PetscFunctionReturn(0);
661   };
662 
663   PetscFunctionBegin;
664   if (globalContext) PetscFunctionReturn(0);
665   /* this exists purely as a valid device check. */
666   PetscCall(PetscDeviceInitializePackage());
667   PetscCall(PetscRegisterFinalize(PetscDeviceContextFinalizer));
668   PetscCall(PetscInfo(nullptr,"Initializing global PetscDeviceContext\n"));
669   /* we call the allocator directly here since the ObjectPool creates a PetscContainer which
670    * eventually tries to call logging functions. However, this routine may be purposefully
671    * called __before__ logging is initialized, so the logging function would PETSCABORT */
672   PetscCall(contextPool.allocator().create(&globalContext));
673   PetscCall(PetscDeviceContextSetStreamType(globalContext,rootStreamType));
674   PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(globalContext,rootDeviceType));
675   PetscCall(PetscDeviceContextSetUp(globalContext));
676   PetscFunctionReturn(0);
677 }
678 
679 /*@C
680   PetscDeviceContextGetCurrentContext - Get the current active PetscDeviceContext
681 
682   Not Collective, Asynchronous
683 
684   Output Parameter:
685 . dctx - The PetscDeviceContext
686 
687   Notes:
688   The user generally should not destroy contexts retrieved with this routine unless they
689   themselves have created them. There exists no protection against destroying the root
690   context.
691 
692   Developer Notes:
693   Unless the user has set their own, this routine creates the "root" context the first time it
694   is called, registering its destructor to PetscFinalize().
695 
696   Level: beginner
697 
698 .seealso: PetscDeviceContextSetCurrentContext(), PetscDeviceContextFork(),
699 PetscDeviceContextJoin(), PetscDeviceContextCreate()
700 @*/
701 PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *dctx)
702 {
703   PetscFunctionBegin;
704   PetscValidPointer(dctx,1);
705   PetscCall(PetscDeviceContextSetupGlobalContext_Private());
706   /* while the static analyzer can find global variables, it will throw a warning about not
707    * being able to connect this back to the function arguments */
708   PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscValidDeviceContext(globalContext,-1));
709   *dctx = globalContext;
710   PetscFunctionReturn(0);
711 }
712 
713 /*@C
714   PetscDeviceContextSetCurrentContext - Set the current active PetscDeviceContext
715 
716   Not Collective, Asynchronous
717 
718   Input Parameter:
719 . dctx - The PetscDeviceContext
720 
721   Notes:
722   This routine can be used to set the defacto "root" PetscDeviceContext to a user-defined
723   implementation by calling this routine immediately after PetscInitialize() and ensuring that
724   PetscDevice is not greedily intialized. In this case the user is responsible for destroying
725   their PetscDeviceContext before PetscFinalize() returns.
726 
727   The old context is not stored in any way by this routine; if one is overriding a context that
728   they themselves do not control, one should take care to temporarily store it by calling
729   PetscDeviceContextGetCurrentContext() before calling this routine.
730 
731   Level: beginner
732 
733 .seealso: PetscDeviceContextGetCurrentContext(), PetscDeviceContextFork(),
734 PetscDeviceContextJoin(), PetscDeviceContextCreate()
735 @*/
736 PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext dctx)
737 {
738   PetscFunctionBegin;
739   PetscValidDeviceContext(dctx,1);
740   PetscAssert(dctx->setup,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PetscDeviceContext %" PetscInt_FMT " must be set up before being set as global context",dctx->id);
741   globalContext = dctx;
742   PetscCall(PetscInfo(nullptr,"Set global PetscDeviceContext id %" PetscInt_FMT "\n",dctx->id));
743   PetscFunctionReturn(0);
744 }
745 
746 /*@C
747   PetscDeviceContextSetFromOptions - Configure a PetscDeviceContext from the options database
748 
749   Collective on comm, Asynchronous
750 
751   Input Parameters:
752 + comm   - MPI communicator on which to query the options database
753 . prefix - prefix to prepend to all options database queries, NULL if not needed
754 - dctx   - The PetscDeviceContext to configure
755 
756   Output Parameter:
757 . dctx - The PetscDeviceContext
758 
759   Options Database:
760 + -device_context_stream_type - type of stream to create inside the PetscDeviceContext -
761    PetscDeviceContextSetStreamType()
762 - -device_context_device_type - the type of PetscDevice to attach by default - PetscDeviceType
763 
764   Level: beginner
765 
766 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextSetDevice()
767 @*/
768 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, const char prefix[], PetscDeviceContext dctx)
769 {
770   PetscBool      flag;
771   PetscInt       stype,dtype;
772 
773   PetscFunctionBegin;
774   if (prefix) PetscValidCharPointer(prefix,2);
775   PetscValidDeviceContext(dctx,3);
776   PetscOptionsBegin(comm,prefix,"PetscDeviceContext Options","Sys");
777   PetscCall(PetscOptionsEList("-device_context_stream_type","PetscDeviceContext PetscStreamType","PetscDeviceContextSetStreamType",PetscStreamTypes,PETSC_STREAM_MAX,PetscStreamTypes[dctx->streamType],&stype,&flag));
778   if (flag) PetscCall(PetscDeviceContextSetStreamType(dctx,static_cast<PetscStreamType>(stype)));
779   PetscCall(PetscOptionsEList("-device_context_device_type","Underlying PetscDevice","PetscDeviceContextSetDevice",PetscDeviceTypes+1,PETSC_DEVICE_MAX-1,dctx->device ? PetscDeviceTypes[dctx->device->type] : PetscDeviceTypes[PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE],&dtype,&flag));
780   if (flag) {
781     PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx,static_cast<PetscDeviceType>(dtype+1)));
782   }
783   PetscOptionsEnd();
784   PetscFunctionReturn(0);
785 }
786