xref: /petsc/src/sys/objects/device/interface/dcontext.cxx (revision 60d4fc614ff537ecd43db92193c85cc85809f93f)
1 #include <petsc/private/deviceimpl.h> /*I "petscdevice.h" I*/
2 #include "objpool.hpp"
3 
4 /* Define the allocator */
5 struct PetscDeviceContextAllocator : Petsc::Allocator<PetscDeviceContext>
6 {
7   static PetscInt PetscDeviceContextID;
8 
9   PETSC_NODISCARD PetscErrorCode create(PetscDeviceContext *dctx) noexcept
10   {
11     PetscDeviceContext dc;
12     PetscErrorCode     ierr;
13 
14     PetscFunctionBegin;
15     ierr           = PetscNew(&dc);CHKERRQ(ierr);
16     dc->id         = PetscDeviceContextID++;
17     dc->idle       = PETSC_TRUE;
18     dc->streamType = PETSC_STREAM_DEFAULT_BLOCKING;
19     *dctx          = dc;
20     PetscFunctionReturn(0);
21   }
22 
23   PETSC_NODISCARD PetscErrorCode destroy(PetscDeviceContext &dctx) const noexcept
24   {
25     PetscErrorCode ierr;
26 
27     PetscFunctionBegin;
28     if (PetscUnlikelyDebug(dctx->numChildren)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Device context still has %D un-restored children, must call PetscDeviceContextRestore() on all children before destroying",dctx->numChildren);
29     if (dctx->ops->destroy) {ierr = (*dctx->ops->destroy)(dctx);CHKERRQ(ierr);}
30     ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr);
31     ierr = PetscFree(dctx->childIDs);CHKERRQ(ierr);
32     ierr = PetscFree(dctx);CHKERRQ(ierr);
33     PetscFunctionReturn(0);
34   }
35 
36   PETSC_NODISCARD PetscErrorCode reset(PetscDeviceContext &dctx) const noexcept
37   {
38     PetscErrorCode ierr;
39 
40     PetscFunctionBegin;
41     /* don't deallocate the child array, rather just zero it out */
42     ierr = PetscArrayzero(dctx->childIDs,dctx->maxNumChildren);CHKERRQ(ierr);
43     dctx->setup       = PETSC_FALSE;
44     dctx->numChildren = 0;
45     dctx->idle        = PETSC_TRUE;
46     dctx->streamType  = PETSC_STREAM_DEFAULT_BLOCKING;
47     PetscFunctionReturn(0);
48   }
49 
50   PETSC_NODISCARD PetscErrorCode finalize(void) noexcept
51   {
52     PetscFunctionBegin;
53     PetscDeviceContextID = 0;
54     PetscFunctionReturn(0);
55   }
56 };
57 PetscInt PetscDeviceContextAllocator::PetscDeviceContextID = 0;
58 
59 static Petsc::ObjectPool<PetscDeviceContext,PetscDeviceContextAllocator> contextPool;
60 
61 /*@C
62   PetscDeviceContextCreate - Creates a PetscDeviceContext
63 
64   Not Collective, Asynchronous
65 
66   Output Paramemter:
67 . dctx - The PetscDeviceContext
68 
69   Notes:
70   Unlike almost every other PETSc class it is advised that most users use
71   PetscDeviceContextDuplicate() rather than this routine to create new contexts. Contexts
72   of different types are incompatible with one another; using
73   PetscDeviceContextDuplicate() ensures compatible types.
74 
75   Level: beginner
76 
77 .seealso: PetscDeviceContextDuplicate(), PetscDeviceContextSetDevice(),
78 PetscDeviceContextSetStreamType(), PetscDeviceContextSetUp(),
79 PetscDeviceContextSetFromOptions(), PetscDeviceContextDestroy()
80 @*/
81 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
82 {
83   PetscErrorCode ierr;
84 
85   PetscFunctionBegin;
86   PetscValidPointer(dctx,1);
87   ierr = PetscDeviceInitializePackage();CHKERRQ(ierr);
88   ierr = contextPool.get(*dctx);CHKERRQ(ierr);
89   PetscFunctionReturn(0);
90 }
91 
92 /*@C
93   PetscDeviceContextDestroy - Frees a PetscDeviceContext
94 
95   Not Collective, Asynchronous
96 
97   Input Parameters:
98 . dctx - The PetscDeviceContext
99 
100   Notes:
101   No implicit synchronization occurs due to this routine, all resources are released completely asynchronously
102   w.r.t. the host. If one needs to guarantee access to the data produced on this contexts stream one should perform the
103   appropriate synchronization before calling this routine.
104 
105   Developer Notes:
106   The context is never actually "destroyed", only returned to an ever growing pool of
107   contexts. There are currently no safeguards on the size of the pool, this should perhaps
108   be implemented.
109 
110   Level: beginner
111 
112 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetUp(), PetscDeviceContextSynchronize()
113 @*/
114 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
115 {
116   PetscErrorCode ierr;
117 
118   PetscFunctionBegin;
119   if (!*dctx) PetscFunctionReturn(0);
120   /* use move assignment whenever possible */
121   ierr = contextPool.reclaim(std::move(*dctx));CHKERRQ(ierr);
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a PetscDeviceContext
127 
128   Not Collective, Asynchronous
129 
130   Input Parameters:
131 + dctx - The PetscDeviceContext
132 - type - The PetscStreamType
133 
134   Notes:
135   See PetscStreamType in include/petscdevicetypes.h for more information on the available
136   types and their interactions. If the PetscDeviceContext was previously set up and stream
137   type was changed, you must call PetscDeviceContextSetUp() again after this routine.
138 
139   Level: intermediate
140 
141 .seealso: PetscDeviceContextGetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetUp(), PetscDeviceContextSetFromOptions()
142 @*/
143 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
144 {
145   PetscFunctionBegin;
146   PetscValidDeviceContext(dctx,1);
147   PetscValidStreamType(type,2);
148   /* only need to do complex swapping if the object has already been setup */
149   if (dctx->setup && (dctx->streamType != type)) {
150     PetscErrorCode ierr;
151 
152     ierr = (*dctx->ops->changestreamtype)(dctx,type);CHKERRQ(ierr);
153     dctx->setup = PETSC_FALSE;
154   }
155   dctx->streamType = type;
156   PetscFunctionReturn(0);
157 }
158 
159 /*@C
160   PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a PetscDeviceContext
161 
162   Not Collective, Asynchronous
163 
164   Input Parameter:
165 . dctx - The PetscDeviceContext
166 
167   Output Parameter:
168 . type - The PetscStreamType
169 
170   Notes:
171   See PetscStreamType in include/petscdevicetypes.h for more information on the available types and their interactions
172 
173   Level: intermediate
174 
175 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetFromOptions()
176 @*/
177 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
178 {
179   PetscFunctionBegin;
180   PetscValidDeviceContext(dctx,1);
181   PetscValidIntPointer(type,2);
182   *type = dctx->streamType;
183   PetscFunctionReturn(0);
184 }
185 
186 /*@C
187   PetscDeviceContextSetDevice - Set the underlying device for the PetscDeviceContext
188 
189   Not Collective, Possibly Synchronous
190 
191   Input Parameters:
192 + dctx   - The PetscDeviceContext
193 - device - The PetscDevice
194 
195   Notes:
196   This routine is effectively PetscDeviceContext's "set-type" (so every PetscDeviceContext
197   must also have an attached PetscDevice). Unlike the usual set-type semantics, it is
198   not stricly necessary to set a contexts device to enable usage, any created device
199   contexts will always come equipped with the "default" device.
200 
201   This routine may initialize the backend device and incur synchronization.
202 
203   Level: intermediate
204 
205 .seealso: PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceContextGetDevice()
206 @*/
207 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
208 {
209   PetscErrorCode ierr;
210 
211   PetscFunctionBegin;
212   PetscValidDeviceContext(dctx,1);
213   PetscValidDevice(device,2);
214   if (dctx->device == device) PetscFunctionReturn(0);
215   ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr);
216   ierr = PetscMemzero(dctx->ops,sizeof(*dctx->ops));CHKERRQ(ierr);
217   ierr = (*device->ops->createcontext)(dctx);CHKERRQ(ierr);
218   dctx->device = PetscDeviceReference(device);
219   dctx->setup  = PETSC_FALSE;
220   PetscFunctionReturn(0);
221 }
222 
223 /*@C
224   PetscDeviceContextGetDevice - Get the underlying PetscDevice for a PetscDeviceContext
225 
226   Not Collective, Asynchronous
227 
228   Input Parameter:
229 . dctx - the PetscDeviceContext
230 
231   Output Parameter:
232 . device - The PetscDevice
233 
234   Notes:
235   This is a borrowed reference, the user should not destroy the device.
236 
237 .seealso: PetscDeviceContextSetDevice(), PetscDevice
238 @*/
239 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
240 {
241   PetscFunctionBegin;
242   PetscValidDeviceContext(dctx,1);
243   PetscValidPointer(device,2);
244   *device = dctx->device;
245   PetscFunctionReturn(0);
246 }
247 
248 /*@C
249   PetscDeviceContextSetUp - Prepares a PetscDeviceContext for use
250 
251   Not Collective, Asynchronous
252 
253   Input Parameter:
254 . dctx - The PetscDeviceContext
255 
256   Developer Notes:
257   This routine is usually the stage where a PetscDeviceContext acquires device-side data structures such as streams,
258   events, and (possibly) handles.
259 
260   Level: beginner
261 
262 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextDestroy(), PetscDeviceContextSetFromOptions()
263 @*/
264 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
265 {
266   PetscErrorCode ierr;
267 
268   PetscFunctionBegin;
269   PetscValidDeviceContext(dctx,1);
270   if (!dctx->device) {
271     ierr = PetscInfo2(NULL,"PetscDeviceContext %d did not have an explicitly attached PetscDevice, using default with type %s\n",dctx->id,PetscDeviceKinds[PETSC_DEVICE_DEFAULT]);CHKERRQ(ierr);
272     ierr = PetscDeviceContextSetDevice(dctx,PetscDeviceDefault_Internal());CHKERRQ(ierr);
273   }
274   if (dctx->setup) PetscFunctionReturn(0);
275   ierr = (*dctx->ops->setup)(dctx);CHKERRQ(ierr);
276   dctx->setup = PETSC_TRUE;
277   PetscFunctionReturn(0);
278 }
279 
280 /*@C
281   PetscDeviceContextDuplicate - Duplicates a PetscDeviceContext object
282 
283   Not Collective, Asynchronous
284 
285   Input Parameter:
286 . dctx - The PetscDeviceContext to duplicate
287 
288   Output Paramter:
289 . strmdup - The duplicated PetscDeviceContext
290 
291   Notes:
292   This is a shorthand method for creating a PetscDeviceContext with the exact same
293   settings as another. Note however that the duplicated PetscDeviceContext does not "share"
294   any of the underlying data with the original, (including its current stream-state) they
295   are completely separate objects.
296 
297   Level: beginner
298 
299 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetStreamType()
300 @*/
301 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
302 {
303   PetscErrorCode ierr;
304 
305   PetscFunctionBegin;
306   PetscValidDeviceContext(dctx,1);
307   PetscValidPointer(dctxdup,2);
308   ierr = PetscDeviceContextCreate(dctxdup);CHKERRQ(ierr);
309   ierr = PetscDeviceContextSetDevice(*dctxdup,dctx->device);CHKERRQ(ierr);
310   ierr = PetscDeviceContextSetStreamType(*dctxdup,dctx->streamType);CHKERRQ(ierr);
311   ierr = PetscDeviceContextSetUp(*dctxdup);CHKERRQ(ierr);
312   PetscFunctionReturn(0);
313 }
314 
315 /*@C
316   PetscDeviceContextQueryIdle - Returns whether or not a PetscDeviceContext is idle
317 
318   Not Collective, Asynchronous
319 
320   Input Parameter:
321 . dctx - The PetscDeviceContext object
322 
323   Output Parameter:
324 . idle - PETSC_TRUE if PetscDeviceContext has NO work, PETSC_FALSE if it has work
325 
326   Notes:
327   This routine only refers a singular context and does NOT take any of its children into account. That is, if dctx is
328   idle but has dependents who do have work, this routine still returns PETSC_TRUE.
329 
330   Results of PetscDeviceContextQueryIdle() are cached on return, allowing this function to be called repeatedly in an
331   efficient manner. When debug mode is enabled this cache is verified on every call to
332   this routine, but is blindly believed when debugging is disabled.
333 
334   Level: intermediate
335 
336 .seealso: PetscDeviceContextCreate(), PetscDeviceContextWaitForContext(), PetscDeviceContextFork()
337 @*/
338 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
339 {
340   PetscErrorCode ierr;
341 
342   PetscFunctionBegin;
343   PetscValidDeviceContext(dctx,1);
344   PetscValidBoolPointer(idle,2);
345   if (dctx->idle) {
346     *idle = PETSC_TRUE;
347     ierr = PetscDeviceContextValidateIdle_Internal(dctx);CHKERRQ(ierr);
348   } else {
349     ierr = (*dctx->ops->query)(dctx,idle);CHKERRQ(ierr);
350     dctx->idle = *idle;
351   }
352   PetscFunctionReturn(0);
353 }
354 
355 /*@C
356   PetscDeviceContextWaitForContext - Make one context wait for another context to finish
357 
358   Not Collective, Asynchronous
359 
360   Input Parameters:
361 + dctxa - The PetscDeviceContext object that is waiting
362 - dctxb - The PetscDeviceContext object that is being waited on
363 
364   Notes:
365   Serializes two PetscDeviceContexts. This routine uses only the state of dctxb at the moment this routine was
366   called, so any future work queued will not affect dctxa. It is safe to pass the same context to both arguments.
367 
368   Level: beginner
369 
370 .seealso: PetscDeviceContextCreate(), PetscDeviceContextQueryIdle(), PetscDeviceContextJoin()
371 @*/
372 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
373 {
374   PetscErrorCode ierr;
375 
376   PetscFunctionBegin;
377   PetscCheckCompatibleDeviceContexts(dctxa,1,dctxb,2);
378   if (dctxa == dctxb) PetscFunctionReturn(0);
379   if (dctxb->idle) {
380     /* No need to do the extra function lookup and event record if the stream were waiting on isn't doing anything */
381     ierr = PetscDeviceContextValidateIdle_Internal(dctxb);CHKERRQ(ierr);
382   } else {
383     ierr = (*dctxa->ops->waitforctx)(dctxa,dctxb);CHKERRQ(ierr);
384   }
385   PetscFunctionReturn(0);
386 }
387 
388 /*@C
389   PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
390 
391   Not Collective, Asynchronous
392 
393   Input Parameters:
394 + dctx - The parent PetscDeviceContext
395 - n    - The number of children to create
396 
397   Output Parameter:
398 . dsub - The created child context(s)
399 
400   Notes:
401   This routine creates n edges of a DAG from a source node which are causally dependent on the source node, meaning
402   that work queued on child contexts will not start until the parent context finishes its work. This accounts for work
403   queued on the parent up until calling this function, any subsequent work enqueued on the parent has no effect on the children.
404 
405   Any children created with this routine have their lifetimes bounded by the parent. That is, the parent context expects
406   to free all of it's children (and ONLY its children) before itself is freed.
407 
408   DAG representation:
409 .vb
410   time ->
411 
412   -> dctx \----> dctx ------>
413            \---> dsub[0] --->
414             \--> ... ------->
415              \-> dsub[n-1] ->
416 .ve
417 
418   Level: intermediate
419 
420 .seealso: PetscDeviceContextJoin(), PetscDeviceContextSynchronize(), PetscDeviceContextQueryIdle()
421 @*/
422 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
423 {
424 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
425   const PetscInt      nBefore = n;
426   static std::string  idList;
427 #endif
428   PetscDeviceContext *dsubTmp = nullptr;
429   PetscInt            i = 0;
430   PetscErrorCode      ierr;
431 
432   PetscFunctionBegin;
433   PetscValidDeviceContext(dctx,1);
434   PetscValidPointer(dsub,3);
435   if (PetscUnlikelyDebug(n < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts requested %D < 0",n);
436 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
437   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
438   idList.reserve(4*n);
439 #endif
440   /* update child totals */
441   dctx->numChildren += n;
442   /* now to find out if we have room */
443   if (dctx->numChildren > dctx->maxNumChildren) {
444     /* no room, either from having too many kids or not having any */
445     if (dctx->childIDs) {
446       /* have existing children, must reallocate them */
447       ierr = PetscRealloc(dctx->numChildren*sizeof(*dctx->childIDs),&dctx->childIDs);CHKERRQ(ierr);
448       /* clear the extra memory since realloc doesn't do it for us */
449       ierr = PetscArrayzero((dctx->childIDs)+(dctx->maxNumChildren),(dctx->numChildren)-(dctx->maxNumChildren));CHKERRQ(ierr);
450     } else {
451       /* have no children */
452       ierr = PetscCalloc1(dctx->numChildren,&dctx->childIDs);CHKERRQ(ierr);
453     }
454     /* update total number of children */
455     dctx->maxNumChildren = dctx->numChildren;
456   }
457   ierr = PetscMalloc1(n,&dsubTmp);CHKERRQ(ierr);
458   while (n) {
459     /* empty child slot */
460     if (!(dctx->childIDs[i])) {
461       /* create the child context in the image of its parent */
462       ierr = PetscDeviceContextDuplicate(dctx,dsubTmp+i);CHKERRQ(ierr);
463       ierr = PetscDeviceContextWaitForContext(dsubTmp[i],dctx);CHKERRQ(ierr);
464       /* register the child with its parent */
465       dctx->childIDs[i] = dsubTmp[i]->id;
466 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
467       idList += std::to_string(dsubTmp[i]->id);
468       if (n != 1) idList += ", ";
469 #endif
470       --n;
471     }
472     ++i;
473   }
474 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
475   ierr = PetscInfo3(NULL,"Forked %D children from parent %D with IDs: %s\n",nBefore,dctx->id,idList.c_str());CHKERRQ(ierr);
476   /* resets the size but doesn't deallocate the memory */
477   idList.clear();
478 #endif
479   /* pass the children back to caller */
480   *dsub = dsubTmp;
481   PetscFunctionReturn(0);
482 }
483 
484 /*@C
485   PetscDeviceContextJoin - Converge a set of child contexts
486 
487   Not Collective, Asynchronous
488 
489   Input Parameters:
490 + dctx         - A PetscDeviceContext to converge on
491 . n            - The number of sub contexts to converge
492 . joinMode     - The type of join to perform
493 - dsub         - The sub contexts to converge
494 
495   Notes:
496   If PetscDeviceContextFork() creates n edges from a source node which all depend on the
497   source node, then this routine is the exact mirror. That is, it creates a node
498   (represented in dctx) which recieves n edges (and optionally destroys them) which is
499   dependent on the completion of all incoming edges.
500 
501   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY all contexts in dsub will be destroyed
502   by this routine. Thus all sub contexts must have been created with the dctx passed to
503   this routine.
504 
505   if joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC dctx waits for all sub contexts but the
506   sub contexts do not wait for one another afterwards.
507 
508   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC all sub contexts will additionally
509   wait on dctx after converging. This has the effect of "synchronizing" the outgoing
510   edges.
511 
512   DAG representations:
513   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY
514 .vb
515   time ->
516 
517   -> dctx ---------/- dctx ->
518   -> dsub[0] -----/
519   ->  ... -------/
520   -> dsub[n-1] -/
521 .ve
522   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC
523 .vb
524   time ->
525 
526   -> dctx ---------/- dctx ->
527   -> dsub[0] -----/--------->
528   ->  ... -------/---------->
529   -> dsub[n-1] -/----------->
530 .ve
531   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC
532 .vb
533   time ->
534 
535   -> dctx ---------/- dctx -\----> dctx ------>
536   -> dsub[0] -----/          \---> dsub[0] --->
537   ->  ... -------/            \--> ... ------->
538   -> dsub[n-1] -/              \-> dsub[n-1] ->
539 .ve
540 
541   Level: intermediate
542 
543 .seealso: PetscDeviceContextFork(), PetscDeviceContextSynchronize(), PetscDeviceContextJoinMode
544 @*/
545 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
546 {
547 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
548   static std::string idList;
549 #endif
550   PetscErrorCode     ierr;
551 
552   PetscFunctionBegin;
553   /* validity of dctx is checked in the wait-for loop */
554   PetscValidPointer(dsub,4);
555   if (PetscUnlikelyDebug(n < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts merged %D < 0",n);
556 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
557   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
558   idList.reserve(4*n);
559 #endif
560   /* first dctx waits on all the incoming edges */
561   for (PetscInt i = 0; i < n; ++i) {
562     PetscCheckCompatibleDeviceContexts(dctx,1,(*dsub)[i],4);
563     ierr = PetscDeviceContextWaitForContext(dctx,(*dsub)[i]);CHKERRQ(ierr);
564 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
565     idList += std::to_string((*dsub)[i]->id);
566     if (i+1 < n) idList += ", ";
567 #endif
568   }
569 
570   /* now we handle the aftermath */
571   switch (joinMode) {
572   case PETSC_DEVICE_CONTEXT_JOIN_DESTROY:
573     {
574       PetscInt j = 0;
575 
576       if (PetscUnlikelyDebug(n > dctx->numChildren)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Trying to destroy %D children of a parent context that only has %D children, likely trying to restore to wrong parent",n,dctx->numChildren);
577       /* update child count while it's still fresh in memory */
578       dctx->numChildren -= n;
579       for (PetscInt i = 0; i < dctx->maxNumChildren; ++i) {
580         if (dctx->childIDs[i] && (dctx->childIDs[i] == (*dsub)[j]->id)) {
581           /* child is one of ours, can destroy it */
582           ierr = PetscDeviceContextDestroy((*dsub)+j);CHKERRQ(ierr);
583           /* reset the child slot */
584           dctx->childIDs[i] = 0;
585           if (++j == n) break;
586         }
587       }
588       /* gone through the loop but did not find every child, if this triggers (or well, doesn't) on perf-builds we leak the remaining contexts memory */
589       if (PetscUnlikelyDebug(j != n)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"%D contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in.",n-j);
590       ierr = PetscFree(*dsub);CHKERRQ(ierr);
591     }
592     break;
593   case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
594     for (PetscInt i = 0; i < n; ++i) {
595       ierr = PetscDeviceContextWaitForContext((*dsub)[i],dctx);CHKERRQ(ierr);
596     }
597   case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
598     break;
599   default:
600     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown PetscDeviceContextJoinMode given");
601   }
602 
603 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
604   ierr = PetscInfo4(NULL,"Joined %D ctxs to ctx %D, mode %s with IDs: %s\n",n,dctx->id,PetscDeviceContextJoinModes[joinMode],idList.c_str());CHKERRQ(ierr);
605   idList.clear();
606 #endif
607   PetscFunctionReturn(0);
608 }
609 
610 /*@C
611   PetscDeviceContextSynchronize - Block the host until all work queued on or associated with a PetscDeviceContext has finished
612 
613   Not Collective, Synchronous
614 
615   Input Parameters:
616 . dctx - The PetscDeviceContext to synchronize
617 
618   Level: beginner
619 
620 .seealso: PetscDeviceContextFork(), PetscDeviceContextJoin(), PetscDeviceContextQueryIdle()
621 @*/
622 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
623 {
624   PetscErrorCode ierr;
625 
626   PetscFunctionBegin;
627   PetscValidDeviceContext(dctx,1);
628   /* if it isn't setup there is nothing to sync on */
629   if (dctx->setup) {ierr = (*dctx->ops->synchronize)(dctx);CHKERRQ(ierr);}
630   dctx->idle = PETSC_TRUE;
631   PetscFunctionReturn(0);
632 }
633 
634 static PetscDeviceContext globalContext      = nullptr;
635 static PetscBool          globalContextSetup = PETSC_FALSE;
636 static PetscStreamType    defaultStreamType  = PETSC_STREAM_DEFAULT_BLOCKING;
637 
638 /* automatically registered to PetscFinalize() when first context is instantiated, do not
639    call */
640 static PetscErrorCode PetscDeviceContextDestroyGlobalContext_Private(void)
641 {
642   PetscErrorCode ierr;
643 
644   PetscFunctionBegin;
645   ierr = PetscDeviceContextSynchronize(globalContext);CHKERRQ(ierr);
646   ierr = PetscDeviceContextDestroy(&globalContext);CHKERRQ(ierr);
647   /* reset everything to defaults */
648   defaultStreamType  = PETSC_STREAM_DEFAULT_BLOCKING;
649   globalContextSetup = PETSC_FALSE;
650   PetscFunctionReturn(0);
651 }
652 
653 /* creates and initializes the root context in PetscInitialize() but does not call
654    SetUp() as the user may wish to change types after PetscInitialize() */
655 PetscErrorCode PetscDeviceContextInitializeRootContext_Internal(MPI_Comm comm, const char prefix[])
656 {
657   PetscErrorCode ierr;
658 
659   PetscFunctionBegin;
660   ierr = PetscInfo1(NULL,"Initializing root PetscDeviceContext with PetscDeviceKind %s\n",PetscDeviceKinds[PETSC_DEVICE_DEFAULT]);CHKERRQ(ierr);
661   ierr = PetscDeviceContextCreate(&globalContext);CHKERRQ(ierr);
662   if (PetscUnlikelyDebug(globalContext->id != 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The root current PetscDeviceContext should have id = 0, however it has id = %D",globalContext->id);
663   ierr = PetscDeviceContextSetDevice(globalContext,PetscDeviceDefault_Internal());CHKERRQ(ierr);
664   ierr = PetscDeviceContextSetStreamType(globalContext,defaultStreamType);CHKERRQ(ierr);
665   ierr = PetscDeviceContextSetFromOptions(comm,prefix,globalContext);CHKERRQ(ierr);
666   ierr = PetscRegisterFinalize(PetscDeviceContextDestroyGlobalContext_Private);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*@C
671   PetscDeviceContextGetCurrentContext - Get the current active PetscDeviceContext
672 
673   Not Collective, Asynchronous
674 
675   Output Parameter:
676 . dctx - The PetscDeviceContext
677 
678   Notes:
679   The user generally should not destroy contexts retrieved with this routine unless they themselves have created
680   them. There exists no protection against destroying the root context.
681 
682   Developer Notes:
683   This routine creates the "root" context the first time it is called, registering its
684   destructor to PetscFinalize(). The root context is synchronized before being destroyed.
685 
686   Level: beginner
687 
688 .seealso: PetscDeviceContextSetCurrentContext(), PetscDeviceContextFork(),
689 PetscDeviceContextJoin(), PetscDeviceContextCreate()
690 @*/
691 PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *dctx)
692 {
693   PetscFunctionBegin;
694   PetscValidPointer(dctx,1);
695   if (PetscUnlikely(!globalContextSetup)) {
696     PetscErrorCode ierr;
697 
698     /* if there is no available device backend, PetscDeviceInitializePackage() will fire a
699        PETSC_ERR_SUP_SYS error. */
700     ierr = PetscDeviceInitializePackage();CHKERRQ(ierr);
701     ierr = PetscDeviceContextSetUp(globalContext);CHKERRQ(ierr);
702     globalContextSetup = PETSC_TRUE;
703   }
704   *dctx = globalContext;
705   PetscFunctionReturn(0);
706 }
707 
708 /*@C
709   PetscDeviceContextSetCurrentContext - Set the current active PetscDeviceContext
710 
711   Not Collective, Asynchronous
712 
713   Input Parameter:
714 . dctx - The PetscDeviceContext
715 
716   Notes:
717   The old context is not stored in any way by this routine; if one is overriding a context that they themselves do not
718   control, one should take care to temporarily store it by calling PetscDeviceContextGetCurrentContext() before calling
719   this routine.
720 
721   Level: beginner
722 
723 .seealso: PetscDeviceContextGetCurrentContext(), PetscDeviceContextFork(),
724 PetscDeviceContextJoin(), PetscDeviceContextCreate()
725 @*/
726 PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext dctx)
727 {
728   PetscErrorCode ierr;
729 
730   PetscFunctionBegin;
731   PetscValidDeviceContext(dctx,1);
732   globalContext = dctx;
733   ierr = PetscInfo1(NULL,"Set global device context id %D\n",dctx->id);CHKERRQ(ierr);
734   PetscFunctionReturn(0);
735 }
736 
737 /*@C
738   PetscDeviceContextSetFromOptions - Configure a PetscDeviceContext from the options database
739 
740   Collective on comm, Asynchronous
741 
742   Input Parameters:
743 + comm   - MPI communicator on which to query the options database
744 . prefix - prefix to prepend to all options database queries, NULL if not needed
745 - dctx   - The PetscDeviceContext to configure
746 
747   Output Parameter:
748 . dctx - The PetscDeviceContext
749 
750   Options Database:
751 . -device_context_device_kind - the kind of PetscDevice to attach by default - PetscDeviceKind
752 . -device_context_stream_type - type of stream to create inside the PetscDeviceContext -
753   PetscDeviceContextSetStreamType()
754 
755   Level: beginner
756 
757 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextSetDevice()
758 @*/
759 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, const char prefix[], PetscDeviceContext dctx)
760 {
761   PetscBool      flag;
762   PetscInt       stype,dkind;
763   PetscErrorCode ierr;
764 
765   PetscFunctionBegin;
766   if (prefix) {PetscValidCharPointer(prefix,2);}
767   PetscValidDeviceContext(dctx,3);
768   ierr = PetscOptionsBegin(comm,prefix,"PetscDeviceContext Options","Sys");CHKERRQ(ierr);
769   ierr = PetscOptionsEList("-device_context_device_kind","Underlying PetscDevice","PetscDeviceContextSetDevice",PetscDeviceKinds+1,PETSC_DEVICE_MAX-1,dctx->device ? PetscDeviceKinds[dctx->device->kind] : PetscDeviceKinds[PETSC_DEVICE_DEFAULT],&dkind,&flag);CHKERRQ(ierr);
770   if (flag) {
771     ierr = PetscDeviceContextSetDevice(dctx,PetscDeviceDefaultKind_Internal(static_cast<PetscDeviceKind>(dkind+1)));CHKERRQ(ierr);
772   }
773   ierr = PetscOptionsEList("-device_context_stream_type","PetscDeviceContext PetscStreamType","PetscDeviceContextSetStreamType",PetscStreamTypes,3,PetscStreamTypes[dctx->streamType],&stype,&flag);CHKERRQ(ierr);
774   if (flag) {
775     ierr = PetscDeviceContextSetStreamType(dctx,static_cast<PetscStreamType>(stype));CHKERRQ(ierr);
776   }
777   ierr = PetscOptionsEnd();CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780