xref: /petsc/src/sys/objects/device/interface/dcontext.cxx (revision 3ca90d2d9fe4d5ec7086bd4aee14f89370d16392)
1 #include <petsc/private/deviceimpl.h> /*I "petscdevice.h" I*/
2 #include "objpool.hpp"
3 
4 const char *const PetscStreamTypes[] = {
5   "global_blocking",
6   "default_blocking",
7   "global_nonblocking",
8   "max",
9   "PetscStreamType",
10   "PETSC_STREAM_",
11   PETSC_NULLPTR
12 };
13 
14 const char *const PetscDeviceContextJoinModes[] = {
15   "destroy",
16   "sync",
17   "no_sync",
18   "PetscDeviceContextJoinMode",
19   "PETSC_DEVICE_CONTEXT_JOIN_",
20   PETSC_NULLPTR
21 };
22 
23 /* Define the allocator */
24 struct PetscDeviceContextAllocator : Petsc::Allocator<PetscDeviceContext>
25 {
26   static PetscInt PetscDeviceContextID;
27 
28   PETSC_NODISCARD static PetscErrorCode create(PetscDeviceContext *dctx) PETSC_NOEXCEPT
29   {
30     PetscDeviceContext dc;
31     PetscErrorCode     ierr;
32 
33     PetscFunctionBegin;
34     ierr           = PetscNew(&dc);CHKERRQ(ierr);
35     dc->id         = PetscDeviceContextID++;
36     dc->idle       = PETSC_TRUE;
37     dc->streamType = PETSC_STREAM_DEFAULT_BLOCKING;
38     *dctx          = dc;
39     PetscFunctionReturn(0);
40   }
41 
42   PETSC_NODISCARD static PetscErrorCode destroy(PetscDeviceContext dctx) PETSC_NOEXCEPT
43   {
44     PetscErrorCode ierr;
45 
46     PetscFunctionBegin;
47     if (PetscUnlikelyDebug(dctx->numChildren)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying",dctx->numChildren);
48     if (dctx->ops->destroy) {ierr = (*dctx->ops->destroy)(dctx);CHKERRQ(ierr);}
49     ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr);
50     ierr = PetscFree(dctx->childIDs);CHKERRQ(ierr);
51     ierr = PetscFree(dctx);CHKERRQ(ierr);
52     PetscFunctionReturn(0);
53   }
54 
55   PETSC_NODISCARD static PetscErrorCode reset(PetscDeviceContext dctx) PETSC_NOEXCEPT
56   {
57     PetscErrorCode ierr;
58 
59     PetscFunctionBegin;
60     /* don't deallocate the child array, rather just zero it out */
61     ierr = PetscArrayzero(dctx->childIDs,dctx->maxNumChildren);CHKERRQ(ierr);
62     dctx->setup       = PETSC_FALSE;
63     dctx->numChildren = 0;
64     dctx->idle        = PETSC_TRUE;
65     dctx->streamType  = PETSC_STREAM_DEFAULT_BLOCKING;
66     PetscFunctionReturn(0);
67   }
68 
69   PETSC_NODISCARD static constexpr PetscErrorCode finalize() PETSC_NOEXCEPT { return 0; }
70 };
71 /* an ID = 0 is invalid */
72 PetscInt PetscDeviceContextAllocator::PetscDeviceContextID = 1;
73 
74 static Petsc::ObjectPool<PetscDeviceContext,PetscDeviceContextAllocator> contextPool;
75 
76 /*@C
77   PetscDeviceContextCreate - Creates a PetscDeviceContext
78 
79   Not Collective, Asynchronous
80 
81   Output Paramemter:
82 . dctx - The PetscDeviceContext
83 
84   Notes:
85   Unlike almost every other PETSc class it is advised that most users use
86   PetscDeviceContextDuplicate() rather than this routine to create new contexts. Contexts
87   of different types are incompatible with one another; using
88   PetscDeviceContextDuplicate() ensures compatible types.
89 
90   Level: beginner
91 
92 .seealso: PetscDeviceContextDuplicate(), PetscDeviceContextSetDevice(),
93 PetscDeviceContextSetStreamType(), PetscDeviceContextSetUp(),
94 PetscDeviceContextSetFromOptions(), PetscDeviceContextDestroy()
95 @*/
96 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
97 {
98   PetscErrorCode ierr;
99 
100   PetscFunctionBegin;
101   PetscValidPointer(dctx,1);
102   ierr = PetscDeviceInitializePackage();CHKERRQ(ierr);
103   ierr = contextPool.get(*dctx);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 /*@C
108   PetscDeviceContextDestroy - Frees a PetscDeviceContext
109 
110   Not Collective, Asynchronous
111 
112   Input Parameters:
113 . dctx - The PetscDeviceContext
114 
115   Notes:
116   No implicit synchronization occurs due to this routine, all resources are released completely asynchronously
117   w.r.t. the host. If one needs to guarantee access to the data produced on this contexts stream one should perform the
118   appropriate synchronization before calling this routine.
119 
120   Developer Notes:
121   The context is never actually "destroyed", only returned to an ever growing pool of
122   contexts. There are currently no safeguards on the size of the pool, this should perhaps
123   be implemented.
124 
125   Level: beginner
126 
127 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetUp(), PetscDeviceContextSynchronize()
128 @*/
129 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
130 {
131   PetscErrorCode ierr;
132 
133   PetscFunctionBegin;
134   if (!*dctx) PetscFunctionReturn(0);
135   ierr  = contextPool.reclaim(std::move(*dctx));CHKERRQ(ierr);
136   *dctx = PETSC_NULLPTR;
137   PetscFunctionReturn(0);
138 }
139 
140 /*@C
141   PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a PetscDeviceContext
142 
143   Not Collective, Asynchronous
144 
145   Input Parameters:
146 + dctx - The PetscDeviceContext
147 - type - The PetscStreamType
148 
149   Notes:
150   See PetscStreamType in include/petscdevicetypes.h for more information on the available
151   types and their interactions. If the PetscDeviceContext was previously set up and stream
152   type was changed, you must call PetscDeviceContextSetUp() again after this routine.
153 
154   Level: intermediate
155 
156 .seealso: PetscDeviceContextGetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetUp(), PetscDeviceContextSetFromOptions()
157 @*/
158 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
159 {
160   PetscFunctionBegin;
161   PetscValidDeviceContext(dctx,1);
162   PetscValidStreamType(type,2);
163   /* only need to do complex swapping if the object has already been setup */
164   if (dctx->setup && (dctx->streamType != type)) {
165     PetscErrorCode ierr;
166 
167     ierr = (*dctx->ops->changestreamtype)(dctx,type);CHKERRQ(ierr);
168     dctx->setup = PETSC_FALSE;
169   }
170   dctx->streamType = type;
171   PetscFunctionReturn(0);
172 }
173 
174 /*@C
175   PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a PetscDeviceContext
176 
177   Not Collective, Asynchronous
178 
179   Input Parameter:
180 . dctx - The PetscDeviceContext
181 
182   Output Parameter:
183 . type - The PetscStreamType
184 
185   Notes:
186   See PetscStreamType in include/petscdevicetypes.h for more information on the available types and their interactions
187 
188   Level: intermediate
189 
190 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetFromOptions()
191 @*/
192 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
193 {
194   PetscFunctionBegin;
195   PetscValidDeviceContext(dctx,1);
196   PetscValidIntPointer(type,2);
197   *type = dctx->streamType;
198   PetscFunctionReturn(0);
199 }
200 
201 /*@C
202   PetscDeviceContextSetDevice - Set the underlying device for the PetscDeviceContext
203 
204   Not Collective, Possibly Synchronous
205 
206   Input Parameters:
207 + dctx   - The PetscDeviceContext
208 - device - The PetscDevice
209 
210   Notes:
211   This routine is effectively PetscDeviceContext's "set-type" (so every PetscDeviceContext
212   must also have an attached PetscDevice). Unlike the usual set-type semantics, it is
213   not stricly necessary to set a contexts device to enable usage, any created device
214   contexts will always come equipped with the "default" device.
215 
216   This routine is a no-op if dctx is already attached to device.
217 
218   This routine may initialize the backend device and incur synchronization.
219 
220   Level: intermediate
221 
222 .seealso: PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceContextGetDevice()
223 @*/
224 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
225 {
226   PetscErrorCode ierr;
227 
228   PetscFunctionBegin;
229   PetscValidDeviceContext(dctx,1);
230   PetscValidDevice(device,2);
231   if (dctx->device) {
232     /* can't do a strict pointer equality check since PetscDevice's are reused */
233     if (dctx->device->ops->createcontext == device->ops->createcontext) PetscFunctionReturn(0);
234   }
235   ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr);
236   if (dctx->ops->destroy) {ierr = (*dctx->ops->destroy)(dctx);CHKERRQ(ierr);}
237   ierr = PetscMemzero(dctx->ops,sizeof(*dctx->ops));CHKERRQ(ierr);
238   ierr = (*device->ops->createcontext)(dctx);CHKERRQ(ierr);
239   ierr = PetscDeviceReference_Internal(device);CHKERRQ(ierr);
240   dctx->device = device;
241   dctx->setup  = PETSC_FALSE;
242   PetscFunctionReturn(0);
243 }
244 
245 /*@C
246   PetscDeviceContextGetDevice - Get the underlying PetscDevice for a PetscDeviceContext
247 
248   Not Collective, Asynchronous
249 
250   Input Parameter:
251 . dctx - the PetscDeviceContext
252 
253   Output Parameter:
254 . device - The PetscDevice
255 
256   Notes:
257   This is a borrowed reference, the user should not destroy the device.
258 
259   Level: intermediate
260 
261 .seealso: PetscDeviceContextSetDevice(), PetscDevice
262 @*/
263 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
264 {
265   PetscFunctionBegin;
266   PetscValidDeviceContext(dctx,1);
267   PetscValidPointer(device,2);
268   if (PetscUnlikelyDebug(!dctx->device)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PetscDeviceContext %" PetscInt_FMT " has no attached PetscDevice to get",dctx->id);
269   *device = dctx->device;
270   PetscFunctionReturn(0);
271 }
272 
273 /*@C
274   PetscDeviceContextSetUp - Prepares a PetscDeviceContext for use
275 
276   Not Collective, Asynchronous
277 
278   Input Parameter:
279 . dctx - The PetscDeviceContext
280 
281   Developer Notes:
282   This routine is usually the stage where a PetscDeviceContext acquires device-side data structures such as streams,
283   events, and (possibly) handles.
284 
285   Level: beginner
286 
287 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextDestroy(), PetscDeviceContextSetFromOptions()
288 @*/
289 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
290 {
291   PetscErrorCode ierr;
292 
293   PetscFunctionBegin;
294   PetscValidDeviceContext(dctx,1);
295   if (!dctx->device) {
296     ierr = PetscInfo2(PETSC_NULLPTR,"PetscDeviceContext %" PetscInt_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n",dctx->id,PetscDeviceTypes[PETSC_DEVICE_DEFAULT]);CHKERRQ(ierr);
297     ierr = PetscDeviceContextSetDefaultDevice_Internal(dctx);CHKERRQ(ierr);
298   }
299   if (dctx->setup) PetscFunctionReturn(0);
300   ierr = (*dctx->ops->setup)(dctx);CHKERRQ(ierr);
301   dctx->setup = PETSC_TRUE;
302   PetscFunctionReturn(0);
303 }
304 
305 /*@C
306   PetscDeviceContextDuplicate - Duplicates a PetscDeviceContext object
307 
308   Not Collective, Asynchronous
309 
310   Input Parameter:
311 . dctx - The PetscDeviceContext to duplicate
312 
313   Output Paramter:
314 . strmdup - The duplicated PetscDeviceContext
315 
316   Notes:
317   This is a shorthand method for creating a PetscDeviceContext with the exact same
318   settings as another. Note however that the duplicated PetscDeviceContext does not "share"
319   any of the underlying data with the original, (including its current stream-state) they
320   are completely separate objects.
321 
322   Level: beginner
323 
324 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetStreamType()
325 @*/
326 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
327 {
328   PetscDeviceContext dup;
329   PetscErrorCode     ierr;
330 
331   PetscFunctionBegin;
332   PetscValidDeviceContext(dctx,1);
333   PetscValidPointer(dctxdup,2);
334   ierr = PetscDeviceContextCreate(&dup);CHKERRQ(ierr);
335   ierr = PetscDeviceContextSetStreamType(dup,dctx->streamType);CHKERRQ(ierr);
336   if (dctx->device) {ierr = PetscDeviceContextSetDevice(dup,dctx->device);CHKERRQ(ierr);}
337   ierr = PetscDeviceContextSetUp(dup);CHKERRQ(ierr);
338   *dctxdup = dup;
339   PetscFunctionReturn(0);
340 }
341 
342 /*@C
343   PetscDeviceContextQueryIdle - Returns whether or not a PetscDeviceContext is idle
344 
345   Not Collective, Asynchronous
346 
347   Input Parameter:
348 . dctx - The PetscDeviceContext object
349 
350   Output Parameter:
351 . idle - PETSC_TRUE if PetscDeviceContext has NO work, PETSC_FALSE if it has work
352 
353   Notes:
354   This routine only refers a singular context and does NOT take any of its children into account. That is, if dctx is
355   idle but has dependents who do have work, this routine still returns PETSC_TRUE.
356 
357   Results of PetscDeviceContextQueryIdle() are cached on return, allowing this function to be called repeatedly in an
358   efficient manner. When debug mode is enabled this cache is verified on every call to
359   this routine, but is blindly believed when debugging is disabled.
360 
361   Level: intermediate
362 
363 .seealso: PetscDeviceContextCreate(), PetscDeviceContextWaitForContext(), PetscDeviceContextFork()
364 @*/
365 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
366 {
367   PetscErrorCode ierr;
368 
369   PetscFunctionBegin;
370   PetscValidDeviceContext(dctx,1);
371   PetscValidBoolPointer(idle,2);
372   if (dctx->idle) {
373     *idle = PETSC_TRUE;
374     ierr = PetscDeviceContextValidateIdle_Internal(dctx);CHKERRQ(ierr);
375   } else {
376     ierr = (*dctx->ops->query)(dctx,idle);CHKERRQ(ierr);
377     dctx->idle = *idle;
378   }
379   PetscFunctionReturn(0);
380 }
381 
382 /*@C
383   PetscDeviceContextWaitForContext - Make one context wait for another context to finish
384 
385   Not Collective, Asynchronous
386 
387   Input Parameters:
388 + dctxa - The PetscDeviceContext object that is waiting
389 - dctxb - The PetscDeviceContext object that is being waited on
390 
391   Notes:
392   Serializes two PetscDeviceContexts. This routine uses only the state of dctxb at the moment this routine was
393   called, so any future work queued will not affect dctxa. It is safe to pass the same context to both arguments.
394 
395   Level: beginner
396 
397 .seealso: PetscDeviceContextCreate(), PetscDeviceContextQueryIdle(), PetscDeviceContextJoin()
398 @*/
399 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
400 {
401   PetscErrorCode ierr;
402 
403   PetscFunctionBegin;
404   PetscCheckCompatibleDeviceContexts(dctxa,1,dctxb,2);
405   if (dctxa == dctxb) PetscFunctionReturn(0);
406   if (dctxb->idle) {
407     /* No need to do the extra function lookup and event record if the stream were waiting on isn't doing anything */
408     ierr = PetscDeviceContextValidateIdle_Internal(dctxb);CHKERRQ(ierr);
409   } else {
410     ierr = (*dctxa->ops->waitforctx)(dctxa,dctxb);CHKERRQ(ierr);
411   }
412   PetscFunctionReturn(0);
413 }
414 
415 /*@C
416   PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
417 
418   Not Collective, Asynchronous
419 
420   Input Parameters:
421 + dctx - The parent PetscDeviceContext
422 - n    - The number of children to create
423 
424   Output Parameter:
425 . dsub - The created child context(s)
426 
427   Notes:
428   This routine creates n edges of a DAG from a source node which are causally dependent on the source node, meaning
429   that work queued on child contexts will not start until the parent context finishes its work. This accounts for work
430   queued on the parent up until calling this function, any subsequent work enqueued on the parent has no effect on the children.
431 
432   Any children created with this routine have their lifetimes bounded by the parent. That is, the parent context expects
433   to free all of it's children (and ONLY its children) before itself is freed.
434 
435   DAG representation:
436 .vb
437   time ->
438 
439   -> dctx \----> dctx ------>
440            \---> dsub[0] --->
441             \--> ... ------->
442              \-> dsub[n-1] ->
443 .ve
444 
445   Level: intermediate
446 
447 .seealso: PetscDeviceContextJoin(), PetscDeviceContextSynchronize(), PetscDeviceContextQueryIdle()
448 @*/
449 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
450 {
451 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
452   const PetscInt      nBefore = n;
453   static std::string  idList;
454 #endif
455   PetscDeviceContext *dsubTmp = PETSC_NULLPTR;
456   PetscInt            i = 0;
457   PetscErrorCode      ierr;
458 
459   PetscFunctionBegin;
460   PetscValidDeviceContext(dctx,1);
461   PetscValidPointer(dsub,3);
462   if (PetscUnlikelyDebug(n < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts requested %" PetscInt_FMT " < 0",n);
463 #if PETSC_USE_DEBUG_AND_INFO
464   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
465   idList.reserve(4*n);
466 #endif
467   /* update child totals */
468   dctx->numChildren += n;
469   /* now to find out if we have room */
470   if (dctx->numChildren > dctx->maxNumChildren) {
471     /* no room, either from having too many kids or not having any */
472     if (dctx->childIDs) {
473       /* have existing children, must reallocate them */
474       ierr = PetscRealloc(dctx->numChildren*sizeof(*dctx->childIDs),&dctx->childIDs);CHKERRQ(ierr);
475       /* clear the extra memory since realloc doesn't do it for us */
476       ierr = PetscArrayzero((dctx->childIDs)+(dctx->maxNumChildren),(dctx->numChildren)-(dctx->maxNumChildren));CHKERRQ(ierr);
477     } else {
478       /* have no children */
479       ierr = PetscCalloc1(dctx->numChildren,&dctx->childIDs);CHKERRQ(ierr);
480     }
481     /* update total number of children */
482     dctx->maxNumChildren = dctx->numChildren;
483   }
484   ierr = PetscMalloc1(n,&dsubTmp);CHKERRQ(ierr);
485   while (n) {
486     /* empty child slot */
487     if (!(dctx->childIDs[i])) {
488       /* create the child context in the image of its parent */
489       ierr = PetscDeviceContextDuplicate(dctx,dsubTmp+i);CHKERRQ(ierr);
490       ierr = PetscDeviceContextWaitForContext(dsubTmp[i],dctx);CHKERRQ(ierr);
491       /* register the child with its parent */
492       dctx->childIDs[i] = dsubTmp[i]->id;
493 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
494       idList += std::to_string(dsubTmp[i]->id);
495       if (n != 1) idList += ", ";
496 #endif
497       --n;
498     }
499     ++i;
500   }
501 #if PETSC_USE_DEBUG_AND_INFO
502   ierr = PetscInfo3(PETSC_NULLPTR,"Forked %" PetscInt_FMT " children from parent %" PetscInt_FMT " with IDs: %s\n",nBefore,dctx->id,idList.c_str());CHKERRQ(ierr);
503   /* resets the size but doesn't deallocate the memory */
504   idList.clear();
505 #endif
506   /* pass the children back to caller */
507   *dsub = dsubTmp;
508   PetscFunctionReturn(0);
509 }
510 
511 /*@C
512   PetscDeviceContextJoin - Converge a set of child contexts
513 
514   Not Collective, Asynchronous
515 
516   Input Parameters:
517 + dctx         - A PetscDeviceContext to converge on
518 . n            - The number of sub contexts to converge
519 . joinMode     - The type of join to perform
520 - dsub         - The sub contexts to converge
521 
522   Notes:
523   If PetscDeviceContextFork() creates n edges from a source node which all depend on the
524   source node, then this routine is the exact mirror. That is, it creates a node
525   (represented in dctx) which recieves n edges (and optionally destroys them) which is
526   dependent on the completion of all incoming edges.
527 
528   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY all contexts in dsub will be destroyed
529   by this routine. Thus all sub contexts must have been created with the dctx passed to
530   this routine.
531 
532   if joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC dctx waits for all sub contexts but the
533   sub contexts do not wait for one another afterwards.
534 
535   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC all sub contexts will additionally
536   wait on dctx after converging. This has the effect of "synchronizing" the outgoing
537   edges.
538 
539   DAG representations:
540   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY
541 .vb
542   time ->
543 
544   -> dctx ---------/- dctx ->
545   -> dsub[0] -----/
546   ->  ... -------/
547   -> dsub[n-1] -/
548 .ve
549   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC
550 .vb
551   time ->
552 
553   -> dctx ---------/- dctx ->
554   -> dsub[0] -----/--------->
555   ->  ... -------/---------->
556   -> dsub[n-1] -/----------->
557 .ve
558   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC
559 .vb
560   time ->
561 
562   -> dctx ---------/- dctx -\----> dctx ------>
563   -> dsub[0] -----/          \---> dsub[0] --->
564   ->  ... -------/            \--> ... ------->
565   -> dsub[n-1] -/              \-> dsub[n-1] ->
566 .ve
567 
568   Level: intermediate
569 
570 .seealso: PetscDeviceContextFork(), PetscDeviceContextSynchronize(), PetscDeviceContextJoinMode
571 @*/
572 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
573 {
574 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
575   static std::string idList;
576 #endif
577   PetscErrorCode     ierr;
578 
579   PetscFunctionBegin;
580   /* validity of dctx is checked in the wait-for loop */
581   PetscValidPointer(dsub,4);
582   if (PetscUnlikelyDebug(n < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts merged %" PetscInt_FMT " < 0",n);
583 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
584   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
585   idList.reserve(4*n);
586 #endif
587   /* first dctx waits on all the incoming edges */
588   for (PetscInt i = 0; i < n; ++i) {
589     PetscCheckCompatibleDeviceContexts(dctx,1,(*dsub)[i],4);
590     ierr = PetscDeviceContextWaitForContext(dctx,(*dsub)[i]);CHKERRQ(ierr);
591 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
592     idList += std::to_string((*dsub)[i]->id);
593     if (i+1 < n) idList += ", ";
594 #endif
595   }
596 
597   /* now we handle the aftermath */
598   switch (joinMode) {
599   case PETSC_DEVICE_CONTEXT_JOIN_DESTROY:
600     {
601       PetscInt j = 0;
602 
603       if (PetscUnlikelyDebug(n > dctx->numChildren)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent",n,dctx->numChildren);
604       /* update child count while it's still fresh in memory */
605       dctx->numChildren -= n;
606       for (PetscInt i = 0; i < dctx->maxNumChildren; ++i) {
607         if (dctx->childIDs[i] && (dctx->childIDs[i] == (*dsub)[j]->id)) {
608           /* child is one of ours, can destroy it */
609           ierr = PetscDeviceContextDestroy((*dsub)+j);CHKERRQ(ierr);
610           /* reset the child slot */
611           dctx->childIDs[i] = 0;
612           if (++j == n) break;
613         }
614       }
615       /* gone through the loop but did not find every child, if this triggers (or well, doesn't) on perf-builds we leak the remaining contexts memory */
616       if (PetscUnlikelyDebug(j != n)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in.",n-j);
617       ierr = PetscFree(*dsub);CHKERRQ(ierr);
618     }
619     break;
620   case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
621     for (PetscInt i = 0; i < n; ++i) {
622       ierr = PetscDeviceContextWaitForContext((*dsub)[i],dctx);CHKERRQ(ierr);
623     }
624   case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
625     break;
626   default:
627     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown PetscDeviceContextJoinMode given");
628   }
629 
630 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
631   ierr = PetscInfo4(PETSC_NULLPTR,"Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt_FMT ", mode %s with IDs: %s\n",n,dctx->id,PetscDeviceContextJoinModes[joinMode],idList.c_str());CHKERRQ(ierr);
632   idList.clear();
633 #endif
634   PetscFunctionReturn(0);
635 }
636 
637 /*@C
638   PetscDeviceContextSynchronize - Block the host until all work queued on or associated with a PetscDeviceContext has finished
639 
640   Not Collective, Synchronous
641 
642   Input Parameters:
643 . dctx - The PetscDeviceContext to synchronize
644 
645   Level: beginner
646 
647 .seealso: PetscDeviceContextFork(), PetscDeviceContextJoin(), PetscDeviceContextQueryIdle()
648 @*/
649 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
650 {
651   PetscErrorCode ierr;
652 
653   PetscFunctionBegin;
654   PetscValidDeviceContext(dctx,1);
655   /* if it isn't setup there is nothing to sync on */
656   if (dctx->setup) {ierr = (*dctx->ops->synchronize)(dctx);CHKERRQ(ierr);}
657   dctx->idle = PETSC_TRUE;
658   PetscFunctionReturn(0);
659 }
660 
661 #define PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE PETSC_DEVICE_DEFAULT
662 // REMOVE ME (change)
663 #define PETSC_DEVICE_CONTEXT_DEFAULT_STREAM PETSC_STREAM_GLOBAL_BLOCKING
664 
665 static PetscDeviceType    rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE;
666 static PetscStreamType    rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM;
667 static PetscDeviceContext globalContext  = PETSC_NULLPTR;
668 
669 /* when PetsDevice initializes PetscDeviceContext eagerly the type of device created should
670  * match whatever device is eagerly intialized */
671 PetscErrorCode PetscDeviceContextSetRootDeviceType_Internal(PetscDeviceType type)
672 {
673   PetscFunctionBegin;
674   PetscValidDeviceType(type,1);
675   rootDeviceType = type;
676   PetscFunctionReturn(0);
677 }
678 
679 #if 0
680 /* currently unused */
681 PetscErrorCode PetscDeviceContextSetRootStreamType_Internal(PetscStreamType type)
682 {
683   PetscFunctionBegin;
684   PetscValidStreamType(type,1);
685   rootStreamType = type;
686   PetscFunctionReturn(0);
687 }
688 #endif
689 
690 static PetscErrorCode PetscDeviceContextSetupGlobalContext_Private(void)
691 {
692   PetscErrorCode    ierr;
693   static const auto PetscDeviceContextFinalizer = []() -> PetscErrorCode {
694     PetscErrorCode ierr;
695 
696     PetscFunctionBegin;
697     ierr = PetscDeviceContextDestroy(&globalContext);CHKERRQ(ierr);
698     rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE;
699     rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM;
700     PetscFunctionReturn(0);
701   };
702 
703   PetscFunctionBegin;
704   if (globalContext) PetscFunctionReturn(0);
705   /* this exists purely as a valid device check. */
706   ierr = PetscDeviceInitializePackage();CHKERRQ(ierr);
707   ierr = PetscRegisterFinalize(PetscDeviceContextFinalizer);CHKERRQ(ierr);
708   ierr = PetscInfo(PETSC_NULLPTR,"Initializing global PetscDeviceContext\n");CHKERRQ(ierr);
709   /* we call the allocator directly here since the ObjectPool creates a PetscContainer which
710    * eventually tries to call logging functions. However, this routine may be purposefully
711    * called __before__ logging is initialized, so the logging function would PETSCABORT */
712   ierr = PetscDeviceContextAllocator::create(&globalContext);CHKERRQ(ierr);
713   ierr = PetscDeviceContextSetStreamType(globalContext,rootStreamType);CHKERRQ(ierr);
714   ierr = PetscDeviceContextSetDefaultDeviceForType_Internal(globalContext,rootDeviceType);CHKERRQ(ierr);
715   ierr = PetscDeviceContextSetUp(globalContext);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 
719 /*@C
720   PetscDeviceContextGetCurrentContext - Get the current active PetscDeviceContext
721 
722   Not Collective, Asynchronous
723 
724   Output Parameter:
725 . dctx - The PetscDeviceContext
726 
727   Notes:
728   The user generally should not destroy contexts retrieved with this routine unless they
729   themselves have created them. There exists no protection against destroying the root
730   context.
731 
732   Developer Notes:
733   Unless the user has set their own, this routine creates the "root" context the first time it
734   is called, registering its destructor to PetscFinalize().
735 
736   Level: beginner
737 
738 .seealso: PetscDeviceContextSetCurrentContext(), PetscDeviceContextFork(),
739 PetscDeviceContextJoin(), PetscDeviceContextCreate()
740 @*/
741 PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *dctx)
742 {
743   PetscErrorCode ierr;
744 
745   PetscFunctionBegin;
746   PetscValidPointer(dctx,1);
747   ierr = PetscDeviceContextSetupGlobalContext_Private();CHKERRQ(ierr);
748   /* while the static analyzer can find global variables, it will throw a warning about not
749    * being able to connect this back to the function arguments */
750   PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscValidDeviceContext(globalContext,-1));
751   *dctx = globalContext;
752   PetscFunctionReturn(0);
753 }
754 
755 /*@C
756   PetscDeviceContextSetCurrentContext - Set the current active PetscDeviceContext
757 
758   Not Collective, Asynchronous
759 
760   Input Parameter:
761 . dctx - The PetscDeviceContext
762 
763   Notes:
764   This routine can be used to set the defacto "root" PetscDeviceContext to a user-defined
765   implementation by calling this routine immediately after PetscInitialize() and ensuring that
766   PetscDevice is not greedily intialized. In this case the user is responsible for destroying
767   their PetscDeviceContext before PetscFinalize() returns.
768 
769   The old context is not stored in any way by this routine; if one is overriding a context that
770   they themselves do not control, one should take care to temporarily store it by calling
771   PetscDeviceContextGetCurrentContext() before calling this routine.
772 
773   Level: beginner
774 
775 .seealso: PetscDeviceContextGetCurrentContext(), PetscDeviceContextFork(),
776 PetscDeviceContextJoin(), PetscDeviceContextCreate()
777 @*/
778 PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext dctx)
779 {
780   PetscErrorCode ierr;
781 
782   PetscFunctionBegin;
783   PetscValidDeviceContext(dctx,1);
784   if (PetscUnlikelyDebug(!dctx->setup)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PetscDeviceContext %" PetscInt_FMT " must be set up before being set as global context",dctx->id);
785   globalContext = dctx;
786   ierr = PetscInfo1(PETSC_NULLPTR,"Set global PetscDeviceContext id %" PetscInt_FMT "\n",dctx->id);CHKERRQ(ierr);
787   PetscFunctionReturn(0);
788 }
789 
790 /*@C
791   PetscDeviceContextSetFromOptions - Configure a PetscDeviceContext from the options database
792 
793   Collective on comm, Asynchronous
794 
795   Input Parameters:
796 + comm   - MPI communicator on which to query the options database
797 . prefix - prefix to prepend to all options database queries, NULL if not needed
798 - dctx   - The PetscDeviceContext to configure
799 
800   Output Parameter:
801 . dctx - The PetscDeviceContext
802 
803   Options Database:
804 + -device_context_stream_type - type of stream to create inside the PetscDeviceContext -
805    PetscDeviceContextSetStreamType()
806 - -device_context_device_type - the type of PetscDevice to attach by default - PetscDeviceType
807 
808   Level: beginner
809 
810 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextSetDevice()
811 @*/
812 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, const char prefix[], PetscDeviceContext dctx)
813 {
814   PetscBool      flag;
815   PetscInt       stype,dtype;
816   PetscErrorCode ierr;
817 
818   PetscFunctionBegin;
819   if (prefix) PetscValidCharPointer(prefix,2);
820   PetscValidDeviceContext(dctx,3);
821   ierr = PetscOptionsBegin(comm,prefix,"PetscDeviceContext Options","Sys");CHKERRQ(ierr);
822   ierr = PetscOptionsEList("-device_context_stream_type","PetscDeviceContext PetscStreamType","PetscDeviceContextSetStreamType",PetscStreamTypes,PETSC_STREAM_MAX,PetscStreamTypes[dctx->streamType],&stype,&flag);CHKERRQ(ierr);
823   if (flag) {
824     ierr = PetscDeviceContextSetStreamType(dctx,static_cast<PetscStreamType>(stype));CHKERRQ(ierr);
825   }
826   ierr = PetscOptionsEList("-device_context_device_type","Underlying PetscDevice","PetscDeviceContextSetDevice",PetscDeviceTypes+1,PETSC_DEVICE_MAX-1,dctx->device ? PetscDeviceTypes[dctx->device->type] : PetscDeviceTypes[PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE],&dtype,&flag);CHKERRQ(ierr);
827   if (flag) {
828     ierr = PetscDeviceContextSetDefaultDeviceForType_Internal(dctx,static_cast<PetscDeviceType>(dtype+1));CHKERRQ(ierr);
829   }
830   ierr = PetscOptionsEnd();CHKERRQ(ierr);
831   PetscFunctionReturn(0);
832 }
833