xref: /petsc/src/sys/objects/device/interface/dcontext.cxx (revision f210f596a55b992253e3045dd28ed212964dcd44)
1 #include <petsc/private/deviceimpl.h> /*I "petscdevice.h" I*/
2 #include "objpool.hpp"
3 
4 const char *const PetscStreamTypes[] = {
5   "global_blocking",
6   "default_blocking",
7   "global_nonblocking",
8   "max",
9   "PetscStreamType",
10   "PETSC_STREAM_",
11   nullptr
12 };
13 
14 const char *const PetscDeviceContextJoinModes[] = {
15   "destroy",
16   "sync",
17   "no_sync",
18   "PetscDeviceContextJoinMode",
19   "PETSC_DEVICE_CONTEXT_JOIN_",
20   nullptr
21 };
22 
23 /* Define the allocator */
24 struct PetscDeviceContextAllocator : Petsc::AllocatorBase<PetscDeviceContext>
25 {
26   static PetscInt PetscDeviceContextID;
27 
28   PETSC_NODISCARD static PetscErrorCode create(PetscDeviceContext *dctx) noexcept
29   {
30     PetscDeviceContext dc;
31     PetscErrorCode     ierr;
32 
33     PetscFunctionBegin;
34     ierr           = PetscNew(&dc);CHKERRQ(ierr);
35     dc->id         = PetscDeviceContextID++;
36     dc->streamType = PETSC_STREAM_DEFAULT_BLOCKING;
37     *dctx          = dc;
38     PetscFunctionReturn(0);
39   }
40 
41   PETSC_NODISCARD static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept
42   {
43     PetscErrorCode ierr;
44 
45     PetscFunctionBegin;
46     PetscAssert(!dctx->numChildren,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying",dctx->numChildren);
47     if (dctx->ops->destroy) {ierr = (*dctx->ops->destroy)(dctx);CHKERRQ(ierr);}
48     ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr);
49     ierr = PetscFree(dctx->childIDs);CHKERRQ(ierr);
50     ierr = PetscFree(dctx);CHKERRQ(ierr);
51     PetscFunctionReturn(0);
52   }
53 
54   PETSC_NODISCARD static PetscErrorCode reset(PetscDeviceContext dctx) noexcept
55   {
56     PetscErrorCode ierr;
57 
58     PetscFunctionBegin;
59     /* don't deallocate the child array, rather just zero it out */
60     ierr = PetscArrayzero(dctx->childIDs,dctx->maxNumChildren);CHKERRQ(ierr);
61     dctx->setup       = PETSC_FALSE;
62     dctx->numChildren = 0;
63     dctx->streamType  = PETSC_STREAM_DEFAULT_BLOCKING;
64     PetscFunctionReturn(0);
65   }
66 
67   PETSC_NODISCARD static constexpr PetscErrorCode finalize() noexcept { return 0; }
68 };
69 /* an ID = 0 is invalid */
70 PetscInt PetscDeviceContextAllocator::PetscDeviceContextID = 1;
71 
72 static Petsc::ObjectPool<PetscDeviceContext,PetscDeviceContextAllocator> contextPool;
73 
74 /*@C
75   PetscDeviceContextCreate - Creates a PetscDeviceContext
76 
77   Not Collective, Asynchronous
78 
79   Output Paramemter:
80 . dctx - The PetscDeviceContext
81 
82   Notes:
83   Unlike almost every other PETSc class it is advised that most users use
84   PetscDeviceContextDuplicate() rather than this routine to create new contexts. Contexts
85   of different types are incompatible with one another; using
86   PetscDeviceContextDuplicate() ensures compatible types.
87 
88   Level: beginner
89 
90 .seealso: PetscDeviceContextDuplicate(), PetscDeviceContextSetDevice(),
91 PetscDeviceContextSetStreamType(), PetscDeviceContextSetUp(),
92 PetscDeviceContextSetFromOptions(), PetscDeviceContextDestroy()
93 @*/
94 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
95 {
96   PetscErrorCode ierr;
97 
98   PetscFunctionBegin;
99   PetscValidPointer(dctx,1);
100   ierr = PetscDeviceInitializePackage();CHKERRQ(ierr);
101   ierr = contextPool.get(*dctx);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 /*@C
106   PetscDeviceContextDestroy - Frees a PetscDeviceContext
107 
108   Not Collective, Asynchronous
109 
110   Input Parameters:
111 . dctx - The PetscDeviceContext
112 
113   Notes:
114   No implicit synchronization occurs due to this routine, all resources are released completely asynchronously
115   w.r.t. the host. If one needs to guarantee access to the data produced on this contexts stream one should perform the
116   appropriate synchronization before calling this routine.
117 
118   Developer Notes:
119   The context is never actually "destroyed", only returned to an ever growing pool of
120   contexts. There are currently no safeguards on the size of the pool, this should perhaps
121   be implemented.
122 
123   Level: beginner
124 
125 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetUp(), PetscDeviceContextSynchronize()
126 @*/
127 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
128 {
129   PetscErrorCode ierr;
130 
131   PetscFunctionBegin;
132   if (!*dctx) PetscFunctionReturn(0);
133   ierr  = contextPool.reclaim(std::move(*dctx));CHKERRQ(ierr);
134   *dctx = nullptr;
135   PetscFunctionReturn(0);
136 }
137 
138 /*@C
139   PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a PetscDeviceContext
140 
141   Not Collective, Asynchronous
142 
143   Input Parameters:
144 + dctx - The PetscDeviceContext
145 - type - The PetscStreamType
146 
147   Notes:
148   See PetscStreamType in include/petscdevicetypes.h for more information on the available
149   types and their interactions. If the PetscDeviceContext was previously set up and stream
150   type was changed, you must call PetscDeviceContextSetUp() again after this routine.
151 
152   Level: intermediate
153 
154 .seealso: PetscStreamType, PetscDeviceContextGetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetUp(), PetscDeviceContextSetFromOptions()
155 @*/
156 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
157 {
158   PetscFunctionBegin;
159   PetscValidDeviceContext(dctx,1);
160   PetscValidStreamType(type,2);
161   /* only need to do complex swapping if the object has already been setup */
162   if (dctx->setup && (dctx->streamType != type)) {
163     PetscErrorCode ierr;
164 
165     ierr = (*dctx->ops->changestreamtype)(dctx,type);CHKERRQ(ierr);
166     dctx->setup = PETSC_FALSE;
167   }
168   dctx->streamType = type;
169   PetscFunctionReturn(0);
170 }
171 
172 /*@C
173   PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a PetscDeviceContext
174 
175   Not Collective, Asynchronous
176 
177   Input Parameter:
178 . dctx - The PetscDeviceContext
179 
180   Output Parameter:
181 . type - The PetscStreamType
182 
183   Notes:
184   See PetscStreamType in include/petscdevicetypes.h for more information on the available types and their interactions
185 
186   Level: intermediate
187 
188 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetFromOptions()
189 @*/
190 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
191 {
192   PetscFunctionBegin;
193   PetscValidDeviceContext(dctx,1);
194   PetscValidIntPointer(type,2);
195   *type = dctx->streamType;
196   PetscFunctionReturn(0);
197 }
198 
199 /*@C
200   PetscDeviceContextSetDevice - Set the underlying device for the PetscDeviceContext
201 
202   Not Collective, Possibly Synchronous
203 
204   Input Parameters:
205 + dctx   - The PetscDeviceContext
206 - device - The PetscDevice
207 
208   Notes:
209   This routine is effectively PetscDeviceContext's "set-type" (so every PetscDeviceContext
210   must also have an attached PetscDevice). Unlike the usual set-type semantics, it is
211   not stricly necessary to set a contexts device to enable usage, any created device
212   contexts will always come equipped with the "default" device.
213 
214   This routine is a no-op if dctx is already attached to device.
215 
216   This routine may initialize the backend device and incur synchronization.
217 
218   Level: intermediate
219 
220 .seealso: PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceContextGetDevice()
221 @*/
222 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
223 {
224   PetscErrorCode ierr;
225 
226   PetscFunctionBegin;
227   PetscValidDeviceContext(dctx,1);
228   PetscValidDevice(device,2);
229   if (dctx->device) {
230     /* can't do a strict pointer equality check since PetscDevice's are reused */
231     if (dctx->device->ops->createcontext == device->ops->createcontext) PetscFunctionReturn(0);
232   }
233   ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr);
234   if (dctx->ops->destroy) {ierr = (*dctx->ops->destroy)(dctx);CHKERRQ(ierr);}
235   ierr = PetscMemzero(dctx->ops,sizeof(*dctx->ops));CHKERRQ(ierr);
236   ierr = (*device->ops->createcontext)(dctx);CHKERRQ(ierr);
237   ierr = PetscDeviceReference_Internal(device);CHKERRQ(ierr);
238   dctx->device = device;
239   dctx->setup  = PETSC_FALSE;
240   PetscFunctionReturn(0);
241 }
242 
243 /*@C
244   PetscDeviceContextGetDevice - Get the underlying PetscDevice for a PetscDeviceContext
245 
246   Not Collective, Asynchronous
247 
248   Input Parameter:
249 . dctx - the PetscDeviceContext
250 
251   Output Parameter:
252 . device - The PetscDevice
253 
254   Notes:
255   This is a borrowed reference, the user should not destroy the device.
256 
257   Level: intermediate
258 
259 .seealso: PetscDeviceContextSetDevice(), PetscDevice
260 @*/
261 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
262 {
263   PetscFunctionBegin;
264   PetscValidDeviceContext(dctx,1);
265   PetscValidPointer(device,2);
266   PetscAssert(dctx->device,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PetscDeviceContext %" PetscInt_FMT " has no attached PetscDevice to get",dctx->id);
267   *device = dctx->device;
268   PetscFunctionReturn(0);
269 }
270 
271 /*@C
272   PetscDeviceContextSetUp - Prepares a PetscDeviceContext for use
273 
274   Not Collective, Asynchronous
275 
276   Input Parameter:
277 . dctx - The PetscDeviceContext
278 
279   Developer Notes:
280   This routine is usually the stage where a PetscDeviceContext acquires device-side data structures such as streams,
281   events, and (possibly) handles.
282 
283   Level: beginner
284 
285 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextDestroy(), PetscDeviceContextSetFromOptions()
286 @*/
287 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
288 {
289   PetscErrorCode ierr;
290 
291   PetscFunctionBegin;
292   PetscValidDeviceContext(dctx,1);
293   if (!dctx->device) {
294     ierr = PetscInfo(nullptr,"PetscDeviceContext %" PetscInt_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n",dctx->id,PetscDeviceTypes[PETSC_DEVICE_DEFAULT]);CHKERRQ(ierr);
295     ierr = PetscDeviceContextSetDefaultDevice_Internal(dctx);CHKERRQ(ierr);
296   }
297   if (dctx->setup) PetscFunctionReturn(0);
298   ierr = (*dctx->ops->setup)(dctx);CHKERRQ(ierr);
299   dctx->setup = PETSC_TRUE;
300   PetscFunctionReturn(0);
301 }
302 
303 /*@C
304   PetscDeviceContextDuplicate - Duplicates a PetscDeviceContext object
305 
306   Not Collective, Asynchronous
307 
308   Input Parameter:
309 . dctx - The PetscDeviceContext to duplicate
310 
311   Output Paramter:
312 . dctxdup - The duplicated PetscDeviceContext
313 
314   Notes:
315   This is a shorthand method for creating a PetscDeviceContext with the exact same
316   settings as another. Note however that the duplicated PetscDeviceContext does not "share"
317   any of the underlying data with the original, (including its current stream-state) they
318   are completely separate objects.
319 
320   Level: beginner
321 
322 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetStreamType()
323 @*/
324 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
325 {
326   PetscDeviceContext dup;
327   PetscErrorCode     ierr;
328 
329   PetscFunctionBegin;
330   PetscValidDeviceContext(dctx,1);
331   PetscValidPointer(dctxdup,2);
332   ierr = PetscDeviceContextCreate(&dup);CHKERRQ(ierr);
333   ierr = PetscDeviceContextSetStreamType(dup,dctx->streamType);CHKERRQ(ierr);
334   if (dctx->device) {ierr = PetscDeviceContextSetDevice(dup,dctx->device);CHKERRQ(ierr);}
335   ierr = PetscDeviceContextSetUp(dup);CHKERRQ(ierr);
336   *dctxdup = dup;
337   PetscFunctionReturn(0);
338 }
339 
340 /*@C
341   PetscDeviceContextQueryIdle - Returns whether or not a PetscDeviceContext is idle
342 
343   Not Collective, Asynchronous
344 
345   Input Parameter:
346 . dctx - The PetscDeviceContext object
347 
348   Output Parameter:
349 . idle - PETSC_TRUE if PetscDeviceContext has NO work, PETSC_FALSE if it has work
350 
351   Notes:
352   This routine only refers a singular context and does NOT take any of its children into
353   account. That is, if dctx is idle but has dependents who do have work, this routine still
354   returns PETSC_TRUE.
355 
356   Level: intermediate
357 
358 .seealso: PetscDeviceContextCreate(), PetscDeviceContextWaitForContext(), PetscDeviceContextFork()
359 @*/
360 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
361 {
362   PetscErrorCode ierr;
363 
364   PetscFunctionBegin;
365   PetscValidDeviceContext(dctx,1);
366   PetscValidBoolPointer(idle,2);
367   ierr = (*dctx->ops->query)(dctx,idle);CHKERRQ(ierr);
368   ierr = PetscInfo(nullptr,"PetscDeviceContext id %" PetscInt_FMT " %s idle\n",dctx->id,*idle ? "was" : "was not");CHKERRQ(ierr);
369   PetscFunctionReturn(0);
370 }
371 
372 /*@C
373   PetscDeviceContextWaitForContext - Make one context wait for another context to finish
374 
375   Not Collective, Asynchronous
376 
377   Input Parameters:
378 + dctxa - The PetscDeviceContext object that is waiting
379 - dctxb - The PetscDeviceContext object that is being waited on
380 
381   Notes:
382   Serializes two PetscDeviceContexts. This routine uses only the state of dctxb at the moment this routine was
383   called, so any future work queued will not affect dctxa. It is safe to pass the same context to both arguments.
384 
385   Level: beginner
386 
387 .seealso: PetscDeviceContextCreate(), PetscDeviceContextQueryIdle(), PetscDeviceContextJoin()
388 @*/
389 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
390 {
391   PetscErrorCode ierr;
392 
393   PetscFunctionBegin;
394   PetscCheckCompatibleDeviceContexts(dctxa,1,dctxb,2);
395   if (dctxa == dctxb) PetscFunctionReturn(0);
396   ierr = (*dctxa->ops->waitforcontext)(dctxa,dctxb);CHKERRQ(ierr);
397   PetscFunctionReturn(0);
398 }
399 
400 #define PETSC_USE_DEBUG_AND_INFO (PetscDefined(USE_DEBUG) && PetscDefined(USE_INFO))
401 #if PETSC_USE_DEBUG_AND_INFO
402 #include <string>
403 #endif
404 /*@C
405   PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
406 
407   Not Collective, Asynchronous
408 
409   Input Parameters:
410 + dctx - The parent PetscDeviceContext
411 - n    - The number of children to create
412 
413   Output Parameter:
414 . dsub - The created child context(s)
415 
416   Notes:
417   This routine creates n edges of a DAG from a source node which are causally dependent on the source node, meaning
418   that work queued on child contexts will not start until the parent context finishes its work. This accounts for work
419   queued on the parent up until calling this function, any subsequent work enqueued on the parent has no effect on the children.
420 
421   Any children created with this routine have their lifetimes bounded by the parent. That is, the parent context expects
422   to free all of it's children (and ONLY its children) before itself is freed.
423 
424   DAG representation:
425 .vb
426   time ->
427 
428   -> dctx \----> dctx ------>
429            \---> dsub[0] --->
430             \--> ... ------->
431              \-> dsub[n-1] ->
432 .ve
433 
434   Level: intermediate
435 
436 .seealso: PetscDeviceContextJoin(), PetscDeviceContextSynchronize(), PetscDeviceContextQueryIdle()
437 @*/
438 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
439 {
440 #if PETSC_USE_DEBUG_AND_INFO
441   const PetscInt      nBefore = n;
442   static std::string  idList;
443 #endif
444   PetscDeviceContext *dsubTmp = nullptr;
445   PetscInt            i = 0;
446   PetscErrorCode      ierr;
447 
448   PetscFunctionBegin;
449   PetscValidDeviceContext(dctx,1);
450   PetscValidPointer(dsub,3);
451   PetscAssert(n >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts requested %" PetscInt_FMT " < 0",n);
452 #if PETSC_USE_DEBUG_AND_INFO
453   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
454   idList.reserve(4*n);
455 #endif
456   /* update child totals */
457   dctx->numChildren += n;
458   /* now to find out if we have room */
459   if (dctx->numChildren > dctx->maxNumChildren) {
460     /* no room, either from having too many kids or not having any */
461     if (dctx->childIDs) {
462       /* have existing children, must reallocate them */
463       ierr = PetscRealloc(dctx->numChildren*sizeof(*dctx->childIDs),&dctx->childIDs);CHKERRQ(ierr);
464       /* clear the extra memory since realloc doesn't do it for us */
465       ierr = PetscArrayzero((dctx->childIDs)+(dctx->maxNumChildren),(dctx->numChildren)-(dctx->maxNumChildren));CHKERRQ(ierr);
466     } else {
467       /* have no children */
468       ierr = PetscCalloc1(dctx->numChildren,&dctx->childIDs);CHKERRQ(ierr);
469     }
470     /* update total number of children */
471     dctx->maxNumChildren = dctx->numChildren;
472   }
473   ierr = PetscMalloc1(n,&dsubTmp);CHKERRQ(ierr);
474   while (n) {
475     /* empty child slot */
476     if (!(dctx->childIDs[i])) {
477       /* create the child context in the image of its parent */
478       ierr = PetscDeviceContextDuplicate(dctx,dsubTmp+i);CHKERRQ(ierr);
479       ierr = PetscDeviceContextWaitForContext(dsubTmp[i],dctx);CHKERRQ(ierr);
480       /* register the child with its parent */
481       dctx->childIDs[i] = dsubTmp[i]->id;
482 #if PETSC_USE_DEBUG_AND_INFO
483       idList += std::to_string(dsubTmp[i]->id);
484       if (n != 1) idList += ", ";
485 #endif
486       --n;
487     }
488     ++i;
489   }
490 #if PETSC_USE_DEBUG_AND_INFO
491   ierr = PetscInfo(nullptr,"Forked %" PetscInt_FMT " children from parent %" PetscInt_FMT " with IDs: %s\n",nBefore,dctx->id,idList.c_str());CHKERRQ(ierr);
492   /* resets the size but doesn't deallocate the memory */
493   idList.clear();
494 #endif
495   /* pass the children back to caller */
496   *dsub = dsubTmp;
497   PetscFunctionReturn(0);
498 }
499 
500 /*@C
501   PetscDeviceContextJoin - Converge a set of child contexts
502 
503   Not Collective, Asynchronous
504 
505   Input Parameters:
506 + dctx         - A PetscDeviceContext to converge on
507 . n            - The number of sub contexts to converge
508 . joinMode     - The type of join to perform
509 - dsub         - The sub contexts to converge
510 
511   Notes:
512   If PetscDeviceContextFork() creates n edges from a source node which all depend on the
513   source node, then this routine is the exact mirror. That is, it creates a node
514   (represented in dctx) which recieves n edges (and optionally destroys them) which is
515   dependent on the completion of all incoming edges.
516 
517   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY all contexts in dsub will be destroyed
518   by this routine. Thus all sub contexts must have been created with the dctx passed to
519   this routine.
520 
521   if joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC dctx waits for all sub contexts but the
522   sub contexts do not wait for one another afterwards.
523 
524   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC all sub contexts will additionally
525   wait on dctx after converging. This has the effect of "synchronizing" the outgoing
526   edges.
527 
528   DAG representations:
529   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY
530 .vb
531   time ->
532 
533   -> dctx ---------/- dctx ->
534   -> dsub[0] -----/
535   ->  ... -------/
536   -> dsub[n-1] -/
537 .ve
538   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC
539 .vb
540   time ->
541 
542   -> dctx ---------/- dctx ->
543   -> dsub[0] -----/--------->
544   ->  ... -------/---------->
545   -> dsub[n-1] -/----------->
546 .ve
547   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC
548 .vb
549   time ->
550 
551   -> dctx ---------/- dctx -\----> dctx ------>
552   -> dsub[0] -----/          \---> dsub[0] --->
553   ->  ... -------/            \--> ... ------->
554   -> dsub[n-1] -/              \-> dsub[n-1] ->
555 .ve
556 
557   Level: intermediate
558 
559 .seealso: PetscDeviceContextFork(), PetscDeviceContextSynchronize(), PetscDeviceContextJoinMode
560 @*/
561 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
562 {
563 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
564   static std::string idList;
565 #endif
566   PetscErrorCode     ierr;
567 
568   PetscFunctionBegin;
569   /* validity of dctx is checked in the wait-for loop */
570   PetscValidPointer(dsub,4);
571   PetscAssert(n >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts merged %" PetscInt_FMT " < 0",n);
572 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
573   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
574   idList.reserve(4*n);
575 #endif
576   /* first dctx waits on all the incoming edges */
577   for (PetscInt i = 0; i < n; ++i) {
578     PetscCheckCompatibleDeviceContexts(dctx,1,(*dsub)[i],4);
579     ierr = PetscDeviceContextWaitForContext(dctx,(*dsub)[i]);CHKERRQ(ierr);
580 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
581     idList += std::to_string((*dsub)[i]->id);
582     if (i+1 < n) idList += ", ";
583 #endif
584   }
585 
586   /* now we handle the aftermath */
587   switch (joinMode) {
588   case PETSC_DEVICE_CONTEXT_JOIN_DESTROY:
589     {
590       PetscInt j = 0;
591 
592       PetscAssert(n <= dctx->numChildren,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent",n,dctx->numChildren);
593       /* update child count while it's still fresh in memory */
594       dctx->numChildren -= n;
595       for (PetscInt i = 0; i < dctx->maxNumChildren; ++i) {
596         if (dctx->childIDs[i] && (dctx->childIDs[i] == (*dsub)[j]->id)) {
597           /* child is one of ours, can destroy it */
598           ierr = PetscDeviceContextDestroy((*dsub)+j);CHKERRQ(ierr);
599           /* reset the child slot */
600           dctx->childIDs[i] = 0;
601           if (++j == n) break;
602         }
603       }
604       /* gone through the loop but did not find every child, if this triggers (or well, doesn't) on perf-builds we leak the remaining contexts memory */
605       PetscAssert(j == n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in.",n-j);
606       ierr = PetscFree(*dsub);CHKERRQ(ierr);
607     }
608     break;
609   case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
610     for (PetscInt i = 0; i < n; ++i) {
611       ierr = PetscDeviceContextWaitForContext((*dsub)[i],dctx);CHKERRQ(ierr);
612     }
613   case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
614     break;
615   default:
616     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown PetscDeviceContextJoinMode given");
617   }
618 
619 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
620   ierr = PetscInfo(nullptr,"Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt_FMT ", mode %s with IDs: %s\n",n,dctx->id,PetscDeviceContextJoinModes[joinMode],idList.c_str());CHKERRQ(ierr);
621   idList.clear();
622 #endif
623   PetscFunctionReturn(0);
624 }
625 
626 /*@C
627   PetscDeviceContextSynchronize - Block the host until all work queued on or associated with a PetscDeviceContext has finished
628 
629   Not Collective, Synchronous
630 
631   Input Parameters:
632 . dctx - The PetscDeviceContext to synchronize
633 
634   Level: beginner
635 
636 .seealso: PetscDeviceContextFork(), PetscDeviceContextJoin(), PetscDeviceContextQueryIdle()
637 @*/
638 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
639 {
640   PetscFunctionBegin;
641   PetscValidDeviceContext(dctx,1);
642   /* if it isn't setup there is nothing to sync on */
643   if (dctx->setup) {auto ierr = (*dctx->ops->synchronize)(dctx);CHKERRQ(ierr);}
644   PetscFunctionReturn(0);
645 }
646 
647 #define PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE PETSC_DEVICE_DEFAULT
648 // REMOVE ME (change)
649 #define PETSC_DEVICE_CONTEXT_DEFAULT_STREAM PETSC_STREAM_GLOBAL_BLOCKING
650 
651 static PetscDeviceType    rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE;
652 static PetscStreamType    rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM;
653 static PetscDeviceContext globalContext  = nullptr;
654 
655 /* when PetsDevice initializes PetscDeviceContext eagerly the type of device created should
656  * match whatever device is eagerly intialized */
657 PetscErrorCode PetscDeviceContextSetRootDeviceType_Internal(PetscDeviceType type)
658 {
659   PetscFunctionBegin;
660   PetscValidDeviceType(type,1);
661   rootDeviceType = type;
662   PetscFunctionReturn(0);
663 }
664 
665 #if 0
666 /* currently unused */
667 PetscErrorCode PetscDeviceContextSetRootStreamType_Internal(PetscStreamType type)
668 {
669   PetscFunctionBegin;
670   PetscValidStreamType(type,1);
671   rootStreamType = type;
672   PetscFunctionReturn(0);
673 }
674 #endif
675 
676 static PetscErrorCode PetscDeviceContextSetupGlobalContext_Private(void)
677 {
678   PetscErrorCode    ierr;
679   static const auto PetscDeviceContextFinalizer = []() -> PetscErrorCode {
680     PetscErrorCode ierr;
681 
682     PetscFunctionBegin;
683     ierr = PetscDeviceContextDestroy(&globalContext);CHKERRQ(ierr);
684     rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE;
685     rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM;
686     PetscFunctionReturn(0);
687   };
688 
689   PetscFunctionBegin;
690   if (globalContext) PetscFunctionReturn(0);
691   /* this exists purely as a valid device check. */
692   ierr = PetscDeviceInitializePackage();CHKERRQ(ierr);
693   ierr = PetscRegisterFinalize(PetscDeviceContextFinalizer);CHKERRQ(ierr);
694   ierr = PetscInfo(nullptr,"Initializing global PetscDeviceContext\n");CHKERRQ(ierr);
695   /* we call the allocator directly here since the ObjectPool creates a PetscContainer which
696    * eventually tries to call logging functions. However, this routine may be purposefully
697    * called __before__ logging is initialized, so the logging function would PETSCABORT */
698   ierr = contextPool.allocator().create(&globalContext);CHKERRQ(ierr);
699   ierr = PetscDeviceContextSetStreamType(globalContext,rootStreamType);CHKERRQ(ierr);
700   ierr = PetscDeviceContextSetDefaultDeviceForType_Internal(globalContext,rootDeviceType);CHKERRQ(ierr);
701   ierr = PetscDeviceContextSetUp(globalContext);CHKERRQ(ierr);
702   PetscFunctionReturn(0);
703 }
704 
705 /*@C
706   PetscDeviceContextGetCurrentContext - Get the current active PetscDeviceContext
707 
708   Not Collective, Asynchronous
709 
710   Output Parameter:
711 . dctx - The PetscDeviceContext
712 
713   Notes:
714   The user generally should not destroy contexts retrieved with this routine unless they
715   themselves have created them. There exists no protection against destroying the root
716   context.
717 
718   Developer Notes:
719   Unless the user has set their own, this routine creates the "root" context the first time it
720   is called, registering its destructor to PetscFinalize().
721 
722   Level: beginner
723 
724 .seealso: PetscDeviceContextSetCurrentContext(), PetscDeviceContextFork(),
725 PetscDeviceContextJoin(), PetscDeviceContextCreate()
726 @*/
727 PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *dctx)
728 {
729   PetscErrorCode ierr;
730 
731   PetscFunctionBegin;
732   PetscValidPointer(dctx,1);
733   ierr = PetscDeviceContextSetupGlobalContext_Private();CHKERRQ(ierr);
734   /* while the static analyzer can find global variables, it will throw a warning about not
735    * being able to connect this back to the function arguments */
736   PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscValidDeviceContext(globalContext,-1));
737   *dctx = globalContext;
738   PetscFunctionReturn(0);
739 }
740 
741 /*@C
742   PetscDeviceContextSetCurrentContext - Set the current active PetscDeviceContext
743 
744   Not Collective, Asynchronous
745 
746   Input Parameter:
747 . dctx - The PetscDeviceContext
748 
749   Notes:
750   This routine can be used to set the defacto "root" PetscDeviceContext to a user-defined
751   implementation by calling this routine immediately after PetscInitialize() and ensuring that
752   PetscDevice is not greedily intialized. In this case the user is responsible for destroying
753   their PetscDeviceContext before PetscFinalize() returns.
754 
755   The old context is not stored in any way by this routine; if one is overriding a context that
756   they themselves do not control, one should take care to temporarily store it by calling
757   PetscDeviceContextGetCurrentContext() before calling this routine.
758 
759   Level: beginner
760 
761 .seealso: PetscDeviceContextGetCurrentContext(), PetscDeviceContextFork(),
762 PetscDeviceContextJoin(), PetscDeviceContextCreate()
763 @*/
764 PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext dctx)
765 {
766   PetscErrorCode ierr;
767 
768   PetscFunctionBegin;
769   PetscValidDeviceContext(dctx,1);
770   PetscAssert(dctx->setup,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PetscDeviceContext %" PetscInt_FMT " must be set up before being set as global context",dctx->id);
771   globalContext = dctx;
772   ierr = PetscInfo(nullptr,"Set global PetscDeviceContext id %" PetscInt_FMT "\n",dctx->id);CHKERRQ(ierr);
773   PetscFunctionReturn(0);
774 }
775 
776 /*@C
777   PetscDeviceContextSetFromOptions - Configure a PetscDeviceContext from the options database
778 
779   Collective on comm, Asynchronous
780 
781   Input Parameters:
782 + comm   - MPI communicator on which to query the options database
783 . prefix - prefix to prepend to all options database queries, NULL if not needed
784 - dctx   - The PetscDeviceContext to configure
785 
786   Output Parameter:
787 . dctx - The PetscDeviceContext
788 
789   Options Database:
790 + -device_context_stream_type - type of stream to create inside the PetscDeviceContext -
791    PetscDeviceContextSetStreamType()
792 - -device_context_device_type - the type of PetscDevice to attach by default - PetscDeviceType
793 
794   Level: beginner
795 
796 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextSetDevice()
797 @*/
798 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, const char prefix[], PetscDeviceContext dctx)
799 {
800   PetscBool      flag;
801   PetscInt       stype,dtype;
802   PetscErrorCode ierr;
803 
804   PetscFunctionBegin;
805   if (prefix) PetscValidCharPointer(prefix,2);
806   PetscValidDeviceContext(dctx,3);
807   ierr = PetscOptionsBegin(comm,prefix,"PetscDeviceContext Options","Sys");CHKERRQ(ierr);
808   ierr = PetscOptionsEList("-device_context_stream_type","PetscDeviceContext PetscStreamType","PetscDeviceContextSetStreamType",PetscStreamTypes,PETSC_STREAM_MAX,PetscStreamTypes[dctx->streamType],&stype,&flag);CHKERRQ(ierr);
809   if (flag) {
810     ierr = PetscDeviceContextSetStreamType(dctx,static_cast<PetscStreamType>(stype));CHKERRQ(ierr);
811   }
812   ierr = PetscOptionsEList("-device_context_device_type","Underlying PetscDevice","PetscDeviceContextSetDevice",PetscDeviceTypes+1,PETSC_DEVICE_MAX-1,dctx->device ? PetscDeviceTypes[dctx->device->type] : PetscDeviceTypes[PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE],&dtype,&flag);CHKERRQ(ierr);
813   if (flag) {
814     ierr = PetscDeviceContextSetDefaultDeviceForType_Internal(dctx,static_cast<PetscDeviceType>(dtype+1));CHKERRQ(ierr);
815   }
816   ierr = PetscOptionsEnd();CHKERRQ(ierr);
817   PetscFunctionReturn(0);
818 }
819