xref: /petsc/src/sys/objects/device/interface/dcontext.cxx (revision ed587871e994007e9aeee2261b868fe3c596c9b3)
1 #include <petsc/private/deviceimpl.h> /*I "petscdevice.h" I*/
2 #include "objpool.hpp"
3 
4 /* Define the allocator */
5 struct PetscDeviceContextAllocator : Petsc::Allocator<PetscDeviceContext>
6 {
7   static PetscInt PetscDeviceContextID;
8 
9   PETSC_NODISCARD PetscErrorCode create(PetscDeviceContext *dctx) noexcept
10   {
11     PetscDeviceContext dc;
12     PetscErrorCode     ierr;
13 
14     PetscFunctionBegin;
15     ierr           = PetscNew(&dc);CHKERRQ(ierr);
16     dc->id         = PetscDeviceContextID++;
17     dc->idle       = PETSC_TRUE;
18     dc->streamType = PETSC_STREAM_DEFAULT_BLOCKING;
19     *dctx          = dc;
20     PetscFunctionReturn(0);
21   }
22 
23   PETSC_NODISCARD PetscErrorCode destroy(PetscDeviceContext &dctx) const noexcept
24   {
25     PetscErrorCode ierr;
26 
27     PetscFunctionBegin;
28     if (PetscUnlikelyDebug(dctx->numChildren)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Device context still has %D un-restored children, must call PetscDeviceContextRestore() on all children before destroying",dctx->numChildren);
29     if (dctx->ops->destroy) {ierr = (*dctx->ops->destroy)(dctx);CHKERRQ(ierr);}
30     ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr);
31     ierr = PetscFree(dctx->childIDs);CHKERRQ(ierr);
32     ierr = PetscFree(dctx);CHKERRQ(ierr);
33     PetscFunctionReturn(0);
34   }
35 
36   PETSC_NODISCARD PetscErrorCode reset(PetscDeviceContext &dctx) const noexcept
37   {
38     PetscErrorCode ierr;
39 
40     PetscFunctionBegin;
41     /* don't deallocate the child array, rather just zero it out */
42     ierr = PetscArrayzero(dctx->childIDs,dctx->maxNumChildren);CHKERRQ(ierr);
43     dctx->setup       = PETSC_FALSE;
44     dctx->numChildren = 0;
45     dctx->idle        = PETSC_TRUE;
46     dctx->streamType  = PETSC_STREAM_DEFAULT_BLOCKING;
47     PetscFunctionReturn(0);
48   }
49 
50   PETSC_NODISCARD PetscErrorCode finalize(void) noexcept
51   {
52     PetscFunctionBegin;
53     PetscDeviceContextID = 0;
54     PetscFunctionReturn(0);
55   }
56 };
57 PetscInt PetscDeviceContextAllocator::PetscDeviceContextID = 0;
58 
59 static Petsc::ObjectPool<PetscDeviceContext,PetscDeviceContextAllocator> contextPool;
60 
61 /*@C
62   PetscDeviceContextCreate - Creates a PetscDeviceContext
63 
64   Not Collective, Asynchronous
65 
66   Ouput Paramemters:
67 . dctx - The PetscDeviceContext
68 
69   Notes:
70   Unlike almost every other PETSc class it is advised that most users use
71   PetscDeviceContextDuplicate() rather than this routine to create new contexts. Contexts
72   of different types are incompatible with one another; using
73   PetscDeviceContextDuplicate() ensures compatible types.
74 
75   Level: beginner
76 
77 .seealso: PetscDeviceContextDuplicate(), PetscDeviceContextSetDevice(),
78 PetscDeviceContextSetStreamType(), PetscDeviceContextSetUp(), PetscDeviceContextDestroy(),
79 PetscDeviceContextSetFromOptions()
80 @*/
81 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
82 {
83   PetscErrorCode ierr;
84 
85   PetscFunctionBegin;
86   PetscValidPointer(dctx,1);
87   ierr = PetscDeviceInitializePackage();CHKERRQ(ierr);
88   ierr = contextPool.get(*dctx);CHKERRQ(ierr);
89   PetscFunctionReturn(0);
90 }
91 
92 /*@C
93   PetscDeviceContextDestroy - Frees a PetscDeviceContext
94 
95   Not Collective, Asynchronous
96 
97   Input Parameters:
98 . dctx - The PetscDeviceContext
99 
100   Notes:
101   No implicit synchronization occurs due to this routine, all resources are released completely asynchronously
102   w.r.t. the host. If one needs to guarantee access to the data produced on this contexts stream one should perform the
103   appropriate synchronization before calling this routine.
104 
105   Developer Notes:
106   The context is never actually "destroyed", only returned to an ever growing pool of
107   contexts. There are currently no safeguards on the size of the pool, this should perhaps
108   be implemented.
109 
110   Level: beginner
111 
112 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetUp(), PetscDeviceContextSynchronize()
113 @*/
114 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
115 {
116   PetscErrorCode ierr;
117 
118   PetscFunctionBegin;
119   if (!*dctx) PetscFunctionReturn(0);
120   /* use move assignment whenever possible */
121   ierr = contextPool.reclaim(std::move(*dctx));CHKERRQ(ierr);
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a PetscDeviceContext
127 
128   Not Collective, Asynchronous
129 
130   Input Paramaters:
131 + dctx - The PetscDeviceContext
132 - type - The PetscStreamType
133 
134   Notes:
135   See PetscStreamType in include/petscdevicetypes.h for more information on the available
136   types and their interactions. If the PetscDeviceContext was previously set up and stream
137   type was changed, you must call PetscDeviceContextSetUp() again after this routine.
138 
139   Level: intermediate
140 
141 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(),
142 PetscDeviceContextGetStreamType(), PetscDeviceContextSetUp(), PetscDeviceContextSetFromOptions()
143 @*/
144 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
145 {
146   PetscFunctionBegin;
147   PetscValidDeviceContext(dctx,1);
148   PetscValidStreamType(type,2);
149   /* only need to do complex swapping if the object has already been setup */
150   if (dctx->setup && (dctx->streamType != type)) {
151     PetscErrorCode ierr;
152 
153     ierr = (*dctx->ops->changestreamtype)(dctx,type);CHKERRQ(ierr);
154     dctx->setup = PETSC_FALSE;
155   }
156   dctx->streamType = type;
157   PetscFunctionReturn(0);
158 }
159 
160 /*@C
161   PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a PetscDeviceContext
162 
163   Not Collective, Asynchronous
164 
165   Input Paramater:
166 . dctx - The PetscDeviceContext
167 
168   Output Parameter:
169 . type - The PetscStreamType
170 
171   Notes:
172   See PetscStreamType in include/petscdevicetypes.h for more information on the available types and their interactions
173 
174   Level: intermediate
175 
176 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(),
177 PetscDeviceContextSetStreamType(), PetscDeviceContextSetFromOptions()
178 @*/
179 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
180 {
181   PetscFunctionBegin;
182   PetscValidDeviceContext(dctx,1);
183   PetscValidIntPointer(type,2);
184   *type = dctx->streamType;
185   PetscFunctionReturn(0);
186 }
187 
188 /*@C
189   PetscDeviceContextSetDevice - Set the underlying device for the PetscDeviceContext
190 
191   Not Collective, Possibly Synchronous
192 
193   Input Paramaters:
194 + dctx   - The PetscDeviceContext
195 - device - The PetscDevice
196 
197   Notes:
198   This routine is effectively PetscDeviceContext's "set-type" (so every PetscDeviceContext
199   must also have an attached PetscDevice). Unlike the usual set-type semantics, it is
200   not stricly necessary to set a contexts device to enable usage, any created device
201   contexts will always come equipped with the "default" device.
202 
203   Level: intermediate
204 
205 .seealso: PetscDeviceCreate(), PetscDeviceContextGetDevice()
206 @*/
207 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
208 {
209   PetscErrorCode ierr;
210 
211   PetscFunctionBegin;
212   PetscValidDeviceContext(dctx,1);
213   PetscValidDevice(device,2);
214   if (dctx->device == device) PetscFunctionReturn(0);
215   ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr);
216   ierr = PetscMemzero(dctx->ops,sizeof(*dctx->ops));CHKERRQ(ierr);
217   ierr = (*device->ops->createcontext)(dctx);CHKERRQ(ierr);
218   dctx->device = PetscDeviceReference(device);
219   dctx->setup  = PETSC_FALSE;
220   PetscFunctionReturn(0);
221 }
222 
223 /*@C
224   PetscDeviceContextGetDevice - Get the underlying PetscDevice for a PetscDeviceContext
225 
226   Not Collective, Asynchronous
227 
228   Input Parameter:
229 . dctx - the PetscDeviceContext
230 
231   Output Parameter:
232 . device - The PetscDevice
233 
234   Notes:
235   This is a borrowed reference, the user should not destroy the device.
236 
237 .seealso: PetscDeviceContextSetDevice(), PetscDevice
238 @*/
239 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
240 {
241   PetscFunctionBegin;
242   PetscValidDeviceContext(dctx,1);
243   PetscValidPointer(device,2);
244   *device = dctx->device;
245   PetscFunctionReturn(0);
246 }
247 
248 /*@C
249   PetscDeviceContextSetUp - Prepares a PetscDeviceContext for use
250 
251   Not Collective, Asynchronous
252 
253   Intput Parameter:
254 . dctx - The PetscDeviceContext
255 
256   Developer Notes:
257   This routine is usually the stage where a PetscDeviceContext acquires device-side data structures such as streams,
258   events, and (possibly) handles.
259 
260   Level: beginner
261 
262 .seealso: PetscDeviceContextTypes, PetscDeviceContextCreate(),
263 PetscDeviceContextSetDevice(), PetscDeviceContextDestroy(), PetscDeviceContextSetFromOptions()
264 @*/
265 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
266 {
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   PetscValidDeviceContext(dctx,1);
271   if (!dctx->device) {
272     ierr = PetscInfo2(NULL,"PetscDeviceContext %d did not have an explicitly attached PetscDevice, using default with type %s\n",dctx->id,PetscDeviceKinds[PETSC_DEVICE_DEFAULT]);CHKERRQ(ierr);
273     ierr = PetscDeviceContextSetDevice(dctx,PetscDeviceDefault_Internal());CHKERRQ(ierr);
274   }
275   if (dctx->setup) PetscFunctionReturn(0);
276   ierr = (*dctx->ops->setup)(dctx);CHKERRQ(ierr);
277   dctx->setup = PETSC_TRUE;
278   PetscFunctionReturn(0);
279 }
280 
281 /*@C
282   PetscDeviceContextDuplicate - Duplicates a PetscDeviceContext object
283 
284   Not Collective, Asynchronous
285 
286   Input Parameter:
287 . dctx - The PetscDeviceContext to duplicate
288 
289   Output Paramter:
290 . strmdup - The duplicated PetscDeviceContext
291 
292   Notes:
293   This is a shorthand method for creating a PetscDeviceContext with the exact same
294   settings as another. Note however that the duplicated PetscDeviceContext does not "share"
295   any of the underlying data with the original, (including its current stream-state) they
296   are completely separate objects.
297 
298   Level: beginner
299 
300 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetStreamType()
301 @*/
302 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
303 {
304   PetscErrorCode ierr;
305 
306   PetscFunctionBegin;
307   PetscValidDeviceContext(dctx,1);
308   PetscValidPointer(dctxdup,2);
309   ierr = PetscDeviceContextCreate(dctxdup);CHKERRQ(ierr);
310   ierr = PetscDeviceContextSetDevice(*dctxdup,dctx->device);CHKERRQ(ierr);
311   ierr = PetscDeviceContextSetStreamType(*dctxdup,dctx->streamType);CHKERRQ(ierr);
312   ierr = PetscDeviceContextSetUp(*dctxdup);CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 /*@C
317   PetscDeviceContextQueryIdle - Returns whether or not a PetscDeviceContext is idle
318 
319   Not Collective, Asynchronous
320 
321   Input Parameter:
322 . dctx - The PetscDeviceContext object
323 
324   Output Parameter:
325 . idle - PETSC_TRUE if PetscDeviceContext has NO work, PETSC_FALSE if it has work
326 
327   Notes:
328   This routine only refers a singular context and does NOT take any of its children into account. That is, if dctx is
329   idle but has dependents who do have work, this routine still returns PETSC_TRUE.
330 
331   Results of PetscDeviceContextQueryIdle() are cached on return, allowing this function to be called repeatedly in an
332   efficient manner. When debug mode is enabled this cache is verified on every call to
333   this routine, but is blindly believed when debugging is disabled.
334 
335   Level: intermediate
336 
337 .seealso: PetscDeviceContextCreate(), PetscDeviceContextWaitForContext(), PetscDeviceContextFork()
338 @*/
339 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
340 {
341   PetscErrorCode ierr;
342 
343   PetscFunctionBegin;
344   PetscValidDeviceContext(dctx,1);
345   PetscValidBoolPointer(idle,2);
346   if (dctx->idle) {
347     *idle = PETSC_TRUE;
348     ierr = PetscDeviceContextValidateIdle_Internal(dctx);CHKERRQ(ierr);
349   } else {
350     ierr = (*dctx->ops->query)(dctx,idle);CHKERRQ(ierr);
351     dctx->idle = *idle;
352   }
353   PetscFunctionReturn(0);
354 }
355 
356 /*@C
357   PetscDeviceContextWaitForContext - Make one context wait for another context to finish
358 
359   Not Collective, Asynchronous
360 
361   Input Parameters:
362 + dctxa - The PetscDeviceContext object that is waiting
363 - dctxb - The PetscDeviceContext object that is being waited on
364 
365   Notes:
366   Serializes two PetscDeviceContexts. This routine uses only the state of dctxb at the moment this routine was
367   called, so any future work queued will not affect dctxa. It is safe to pass the same context to both arguments.
368 
369   Level: beginner
370 
371 .seealso: PetscDeviceContextCreate(), PetscDeviceContextQueryIdle(), PetscDeviceContextJoin()
372 @*/
373 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
374 {
375   PetscErrorCode ierr;
376 
377   PetscFunctionBegin;
378   PetscCheckCompatibleDeviceContexts(dctxa,1,dctxb,2);
379   if (dctxa == dctxb) PetscFunctionReturn(0);
380   if (dctxb->idle) {
381     /* No need to do the extra function lookup and event record if the stream were waiting on isn't doing anything */
382     ierr = PetscDeviceContextValidateIdle_Internal(dctxb);CHKERRQ(ierr);
383   } else {
384     ierr = (*dctxa->ops->waitforctx)(dctxa,dctxb);CHKERRQ(ierr);
385   }
386   PetscFunctionReturn(0);
387 }
388 
389 /*@C
390   PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
391 
392   Not Collective, Asynchronous
393 
394   Input Parameters:
395 + dctx - The parent PetscDeviceContext
396 - n    - The number of children to create
397 
398   Output Parameter:
399 . dsub - The created child context(s)
400 
401   Notes:
402   This routine creates n edges of a DAG from a source node which are causally dependent on the source node, meaning
403   that work queued on child contexts will not start until the parent context finishes its work. This accounts for work
404   queued on the parent up until calling this function, any subsequent work enqueued on the parent has no effect on the children.
405 
406   Any children created with this routine have their lifetimes bounded by the parent. That is, the parent context expects
407   to free all of it's children (and ONLY its children) before itself is freed.
408 
409   DAG representation:
410 .vb
411   time ->
412 
413   -> dctx \----> dctx ------>
414            \---> dsub[0] --->
415             \--> ... ------->
416              \-> dsub[n-1] ->
417 .ve
418 
419   Level: intermediate
420 
421 .seealso: PetscDeviceContextJoin(), PetscDeviceContextSynchronize(), PetscDeviceContextQueryIdle()
422 @*/
423 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
424 {
425 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
426   const PetscInt      nBefore = n;
427   static std::string  idList;
428 #endif
429   PetscDeviceContext *dsubTmp = nullptr;
430   PetscInt            i = 0;
431   PetscErrorCode      ierr;
432 
433   PetscFunctionBegin;
434   PetscValidDeviceContext(dctx,1);
435   PetscValidPointer(dsub,3);
436   if (PetscUnlikelyDebug(n < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts requested %D < 0",n);
437 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
438   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
439   idList.reserve(4*n);
440 #endif
441   /* update child totals */
442   dctx->numChildren += n;
443   /* now to find out if we have room */
444   if (dctx->numChildren > dctx->maxNumChildren) {
445     /* no room, either from having too many kids or not having any */
446     if (dctx->childIDs) {
447       /* have existing children, must reallocate them */
448       ierr = PetscRealloc(dctx->numChildren*sizeof(*dctx->childIDs),&dctx->childIDs);CHKERRQ(ierr);
449       /* clear the extra memory since realloc doesn't do it for us */
450       ierr = PetscArrayzero((dctx->childIDs)+(dctx->maxNumChildren),(dctx->numChildren)-(dctx->maxNumChildren));CHKERRQ(ierr);
451     } else {
452       /* have no children */
453       ierr = PetscCalloc1(dctx->numChildren,&dctx->childIDs);CHKERRQ(ierr);
454     }
455     /* update total number of children */
456     dctx->maxNumChildren = dctx->numChildren;
457   }
458   ierr = PetscMalloc1(n,&dsubTmp);CHKERRQ(ierr);
459   while (n) {
460     /* empty child slot */
461     if (!(dctx->childIDs[i])) {
462       /* create the child context in the image of its parent */
463       ierr = PetscDeviceContextDuplicate(dctx,dsubTmp+i);CHKERRQ(ierr);
464       ierr = PetscDeviceContextWaitForContext(dsubTmp[i],dctx);CHKERRQ(ierr);
465       /* register the child with its parent */
466       dctx->childIDs[i] = dsubTmp[i]->id;
467 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
468       idList += std::to_string(dsubTmp[i]->id);
469       if (n != 1) idList += ", ";
470 #endif
471       --n;
472     }
473     ++i;
474   }
475 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
476   ierr = PetscInfo3(NULL,"Forked %D children from parent %D with IDs: %s\n",nBefore,dctx->id,idList.c_str());CHKERRQ(ierr);
477   /* resets the size but doesn't deallocate the memory */
478   idList.clear();
479 #endif
480   /* pass the children back to caller */
481   *dsub = dsubTmp;
482   PetscFunctionReturn(0);
483 }
484 
485 /*@C
486   PetscDeviceContextJoin() - Converge a set of child contexts
487 
488   Not Collective, Asynchronous
489 
490   Input Parameters:
491 + dctx         - A PetscDeviceContext to converge on
492 . n            - The number of sub contexts to converge
493 . joinMode     - The type of join to perform
494 - dsub         - The sub contexts to converge
495 
496   Notes:
497   If PetscDeviceContextFork() creates n edges from a source node which all depend on the
498   source node, then this routine is the exact mirror. That is, it creates a node
499   (represented in dctx) which recieves n edges (and optionally destroys them) which is
500   dependent on the completion of all incoming edges.
501 
502   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY all contexts in dsub will be destroyed
503   by this routine. Thus all sub contexts must have been created with the dctx passed to
504   this routine.
505 
506   if joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC dctx waits for all sub contexts but the
507   sub contexts do not wait for one another afterwards.
508 
509   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC all sub contexts will additionally
510   wait on dctx after converging. This has the effect of "synchronizing" the outgoing
511   edges.
512 
513   DAG representations:
514   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY
515 .vb
516   time ->
517 
518   -> dctx ---------/- dctx ->
519   -> dsub[0] -----/
520   ->  ... -------/
521   -> dsub[n-1] -/
522 .ve
523   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC
524 .vb
525   time ->
526 
527   -> dctx ---------/- dctx ->
528   -> dsub[0] -----/--------->
529   ->  ... -------/---------->
530   -> dsub[n-1] -/----------->
531 .ve
532   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC
533 .vb
534   time ->
535 
536   -> dctx ---------/- dctx -\----> dctx ------>
537   -> dsub[0] -----/          \---> dsub[0] --->
538   ->  ... -------/            \--> ... ------->
539   -> dsub[n-1] -/              \-> dsub[n-1] ->
540 .ve
541 
542   Level: intermediate
543 
544 .seealso: PetscDeviceContextFork(), PetscDeviceContextSynchronize(), PetscDeviceContextJoinMode
545 @*/
546 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
547 {
548 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
549   static std::string idList;
550 #endif
551   PetscErrorCode     ierr;
552 
553   PetscFunctionBegin;
554   /* validity of dctx is checked in the wait-for loop */
555   PetscValidPointer(dsub,4);
556   if (PetscUnlikelyDebug(n < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts merged %D < 0",n);
557 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
558   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
559   idList.reserve(4*n);
560 #endif
561   /* first dctx waits on all the incoming edges */
562   for (PetscInt i = 0; i < n; ++i) {
563     PetscCheckCompatibleDeviceContexts(dctx,1,(*dsub)[i],4);
564     ierr = PetscDeviceContextWaitForContext(dctx,(*dsub)[i]);CHKERRQ(ierr);
565 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
566     idList += std::to_string((*dsub)[i]->id);
567     if (i+1 < n) idList += ", ";
568 #endif
569   }
570 
571   /* now we handle the aftermath */
572   switch (joinMode) {
573   case PETSC_DEVICE_CONTEXT_JOIN_DESTROY:
574     {
575       PetscInt j = 0;
576 
577       if (PetscUnlikelyDebug(n > dctx->numChildren)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Trying to destroy %D children of a parent context that only has %D children, likely trying to restore to wrong parent",n,dctx->numChildren);
578       /* update child count while it's still fresh in memory */
579       dctx->numChildren -= n;
580       for (PetscInt i = 0; i < dctx->maxNumChildren; ++i) {
581         if (dctx->childIDs[i] && (dctx->childIDs[i] == (*dsub)[j]->id)) {
582           /* child is one of ours, can destroy it */
583           ierr = PetscDeviceContextDestroy((*dsub)+j);CHKERRQ(ierr);
584           /* reset the child slot */
585           dctx->childIDs[i] = 0;
586           if (++j == n) break;
587         }
588       }
589       /* gone through the loop but did not find every child, if this triggers (or well, doesn't) on perf-builds we leak the remaining contexts memory */
590       if (PetscUnlikelyDebug(j != n)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"%D contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in.",n-j);
591       ierr = PetscFree(*dsub);CHKERRQ(ierr);
592     }
593     break;
594   case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
595     for (PetscInt i = 0; i < n; ++i) {
596       ierr = PetscDeviceContextWaitForContext((*dsub)[i],dctx);CHKERRQ(ierr);
597     }
598   case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
599     break;
600   default:
601     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown PetscDeviceContextJoinMode given");
602     break;
603   }
604 
605 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
606   ierr = PetscInfo4(NULL,"Joined %D ctxs to ctx %D, mode %s with IDs: %s\n",n,dctx->id,PetscDeviceContextJoinModes[joinMode],idList.c_str());CHKERRQ(ierr);
607   idList.clear();
608 #endif
609   PetscFunctionReturn(0);
610 }
611 
612 /*@C
613   PetscDeviceContextSynchronize() - Block the host until all work queued on or associated with a PetscDeviceContext has finished
614 
615   Not Collective, Synchronous
616 
617   Input Parameters:
618 . dctx - The PetscDeviceContext to synchronize
619 
620   Level: beginner
621 
622 .seealso: PetscDeviceContextFork(), PetscDeviceContextJoin(), PetscDeviceContextQueryIdle()
623 @*/
624 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
625 {
626   PetscErrorCode ierr;
627 
628   PetscFunctionBegin;
629   PetscValidDeviceContext(dctx,1);
630   /* if it isn't setup there is nothing to sync on */
631   if (dctx->setup) {ierr = (*dctx->ops->synchronize)(dctx);CHKERRQ(ierr);}
632   dctx->idle = PETSC_TRUE;
633   PetscFunctionReturn(0);
634 }
635 
636 static PetscDeviceContext globalContext      = nullptr;
637 static PetscBool          globalContextSetup = PETSC_FALSE;
638 static PetscStreamType    defaultStreamType  = PETSC_STREAM_DEFAULT_BLOCKING;
639 
640 /* automatically registered to PetscFinalize() when first context is instantiated, do not
641    call */
642 static PetscErrorCode PetscDeviceContextDestroyGlobalContext_Private(void)
643 {
644   PetscErrorCode ierr;
645 
646   PetscFunctionBegin;
647   ierr = PetscDeviceContextSynchronize(globalContext);CHKERRQ(ierr);
648   ierr = PetscDeviceContextDestroy(&globalContext);CHKERRQ(ierr);
649   /* reset everything to defaults */
650   defaultStreamType  = PETSC_STREAM_DEFAULT_BLOCKING;
651   globalContextSetup = PETSC_FALSE;
652   PetscFunctionReturn(0);
653 }
654 
655 /* creates and initializes the root context in PetscInitialize() but does not call
656    SetUp() as the user may wish to change types after PetscInitialize() */
657 PetscErrorCode PetscDeviceContextInitializeRootContext_Internal(MPI_Comm comm, const char prefix[])
658 {
659   PetscErrorCode ierr;
660 
661   PetscFunctionBegin;
662   ierr = PetscInfo1(NULL,"Initializing root PetscDeviceContext with PetscDeviceKind %s\n",PetscDeviceKinds[PETSC_DEVICE_DEFAULT]);CHKERRQ(ierr);
663   ierr = PetscDeviceContextCreate(&globalContext);CHKERRQ(ierr);
664   if (PetscUnlikelyDebug(globalContext->id != 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The root current PetscDeviceContext should have id = 0, however it has id = %D",globalContext->id);
665   ierr = PetscDeviceContextSetDevice(globalContext,PetscDeviceDefault_Internal());CHKERRQ(ierr);
666   ierr = PetscDeviceContextSetStreamType(globalContext,defaultStreamType);CHKERRQ(ierr);
667   ierr = PetscDeviceContextSetFromOptions(comm,prefix,globalContext);CHKERRQ(ierr);
668   ierr = PetscRegisterFinalize(PetscDeviceContextDestroyGlobalContext_Private);CHKERRQ(ierr);
669   PetscFunctionReturn(0);
670 }
671 
672 /*@C
673   PetscDeviceContextGetCurrentContext() - Get the current active PetscDeviceContext
674 
675   Not Collective, Asynchronous
676 
677   Output Parameter:
678 . dctx - The PetscDeviceContext
679 
680   Notes:
681   The user generally should not destroy contexts retrieved with this routine unless they themselves have created
682   them. There exists no protection against destroying the root context.
683 
684   Developer Notes:
685   This routine creates the "root" context the first time it is called, registering its
686   destructor to PetscFinalize(). The root context is synchronized before being destroyed.
687 
688   Level: beginner
689 
690 .seealso: PetscDeviceContextSetCurrentContext(), PetscDeviceContextFork(),
691 PetscDeviceContextJoin(), PetscDeviceContextCreate()
692 @*/
693 PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *dctx)
694 {
695   PetscFunctionBegin;
696   PetscValidPointer(dctx,1);
697   if (PetscUnlikely(!globalContextSetup)) {
698     PetscErrorCode ierr;
699 
700     /* if there is no available device backend, PetscDeviceInitializePackage() will fire a
701        PETSC_ERR_SUP_SYS error. */
702     ierr = PetscDeviceInitializePackage();CHKERRQ(ierr);
703     ierr = PetscDeviceContextSetUp(globalContext);CHKERRQ(ierr);
704     globalContextSetup = PETSC_TRUE;
705   }
706   *dctx = globalContext;
707   PetscFunctionReturn(0);
708 }
709 
710 /*@C
711   PetscDeviceContextSetCurrentContext() - Set the current active PetscDeviceContext
712 
713   Not Collective, Asynchronous
714 
715   Input Parameter:
716 . dctx - The PetscDeviceContext
717 
718   Notes:
719   The old context is not stored in any way by this routine; if one is overriding a context that they themselves do not
720   control, one should take care to temporarily store it by calling PetscDeviceContextGetCurrentContext() before calling
721   this routine.
722 
723   Level: beginner
724 
725 .seealso: PetscDeviceContextGetCurrentContext(), PetscDeviceContextFork(),
726 PetscDeviceContextJoin(), PetscDeviceContextCreate()
727 @*/
728 PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext dctx)
729 {
730   PetscErrorCode ierr;
731 
732   PetscFunctionBegin;
733   PetscValidDeviceContext(dctx,1);
734   globalContext = dctx;
735   ierr = PetscInfo1(NULL,"Set global device context id %D\n",dctx->id);CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 /*@C
740   PetscDeviceContextSetFromOptions - Configure a PetscDeviceContext from the options database
741 
742   Collective on comm, Asynchronous
743 
744   Input Parameters:
745 + comm   - MPI communicator on which to query the options database
746 . prefix - prefix to prepend to all options database queries, NULL if not needed
747 - dctx   - The PetscDeviceContext to configure
748 
749   Output Parameter:
750 . dctx - The PetscDeviceContext
751 
752   Options Database:
753 . -device_context_device_kind - the kind of PetscDevice to attach by default
754 . -device_context_stream_type - type of stream to create inside the PetscDeviceContext -
755   PetscDeviceContextSetStreamType()
756 
757   Level: beginner
758 
759 .seealso: PetscDeviceContextSetStreamType()
760 @*/
761 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, const char prefix[], PetscDeviceContext dctx)
762 {
763   PetscBool      flag;
764   PetscInt       stype,dkind;
765   PetscErrorCode ierr;
766 
767   PetscFunctionBegin;
768   if (prefix) {PetscValidCharPointer(prefix,2);}
769   PetscValidDeviceContext(dctx,3);
770   ierr = PetscOptionsBegin(comm,prefix,"PetscDeviceContext Options","Sys");CHKERRQ(ierr);
771   ierr = PetscOptionsEList("-device_context_device_kind","Underlying PetscDevice","PetscDeviceContextSetDevice",PetscDeviceKinds+1,PETSC_DEVICE_MAX-1,dctx->device ? PetscDeviceKinds[dctx->device->kind] : PetscDeviceKinds[PETSC_DEVICE_DEFAULT],&dkind,&flag);CHKERRQ(ierr);
772   if (flag) {
773     ierr = PetscDeviceContextSetDevice(dctx,PetscDeviceDefaultKind_Internal(static_cast<PetscDeviceKind>(dkind+1)));CHKERRQ(ierr);
774   }
775   ierr = PetscOptionsEList("-device_context_stream_type","PetscDeviceContext PetscStreamType","PetscDeviceContextSetStreamType",PetscStreamTypes,3,PetscStreamTypes[dctx->streamType],&stype,&flag);CHKERRQ(ierr);
776   if (flag) {
777     ierr = PetscDeviceContextSetStreamType(dctx,static_cast<PetscStreamType>(stype));CHKERRQ(ierr);
778   }
779   ierr = PetscOptionsEnd();CHKERRQ(ierr);
780   PetscFunctionReturn(0);
781 }
782