xref: /petsc/src/sys/objects/device/interface/dcontext.cxx (revision 075dfc9bb94b69f96d7d99357fc870a699005ff3)
1 #include <petsc/private/deviceimpl.h> /*I "petscdevice.h" I*/
2 #include "objpool.hpp"
3 
4 /* Define the allocator */
5 struct PetscDeviceContextAllocator : Petsc::Allocator<PetscDeviceContext>
6 {
7   static PetscInt PetscDeviceContextID;
8 
9   PETSC_NODISCARD PetscErrorCode create(PetscDeviceContext *dctx) noexcept
10   {
11     PetscDeviceContext dc;
12     PetscErrorCode     ierr;
13 
14     PetscFunctionBegin;
15     ierr           = PetscNew(&dc);CHKERRQ(ierr);
16     dc->id         = PetscDeviceContextID++;
17     dc->idle       = PETSC_TRUE;
18     dc->streamType = PETSC_STREAM_DEFAULT_BLOCKING;
19     *dctx          = dc;
20     PetscFunctionReturn(0);
21   }
22 
23   PETSC_NODISCARD PetscErrorCode destroy(PetscDeviceContext &dctx) const noexcept
24   {
25     PetscErrorCode ierr;
26 
27     PetscFunctionBegin;
28     if (PetscUnlikelyDebug(dctx->numChildren)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Device context still has %D un-restored children, must call PetscDeviceContextRestore() on all children before destroying",dctx->numChildren);
29     if (dctx->ops->destroy) {ierr = (*dctx->ops->destroy)(dctx);CHKERRQ(ierr);}
30     ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr);
31     ierr = PetscFree(dctx->childIDs);CHKERRQ(ierr);
32     ierr = PetscFree(dctx);CHKERRQ(ierr);
33     PetscFunctionReturn(0);
34   }
35 
36   PETSC_NODISCARD PetscErrorCode reset(PetscDeviceContext &dctx) const noexcept
37   {
38     PetscErrorCode ierr;
39 
40     PetscFunctionBegin;
41     /* don't deallocate the child array, rather just zero it out */
42     ierr = PetscArrayzero(dctx->childIDs,dctx->maxNumChildren);CHKERRQ(ierr);
43     dctx->setup       = PETSC_FALSE;
44     dctx->numChildren = 0;
45     dctx->idle        = PETSC_TRUE;
46     dctx->streamType  = PETSC_STREAM_DEFAULT_BLOCKING;
47     PetscFunctionReturn(0);
48   }
49 
50   PETSC_NODISCARD PetscErrorCode finalize(void) noexcept
51   {
52     PetscFunctionBegin;
53     PetscDeviceContextID = 0;
54     PetscFunctionReturn(0);
55   }
56 };
57 PetscInt PetscDeviceContextAllocator::PetscDeviceContextID = 0;
58 
59 static Petsc::ObjectPool<PetscDeviceContext,PetscDeviceContextAllocator> contextPool;
60 
61 /*@C
62   PetscDeviceContextCreate - Creates a PetscDeviceContext
63 
64   Not Collective, Asynchronous
65 
66   Output Paramemter:
67 . dctx - The PetscDeviceContext
68 
69   Notes:
70   Unlike almost every other PETSc class it is advised that most users use
71   PetscDeviceContextDuplicate() rather than this routine to create new contexts. Contexts
72   of different types are incompatible with one another; using
73   PetscDeviceContextDuplicate() ensures compatible types.
74 
75   Level: beginner
76 
77 .seealso: PetscDeviceContextDuplicate(), PetscDeviceContextSetDevice(),
78 PetscDeviceContextSetStreamType(), PetscDeviceContextSetUp(),
79 PetscDeviceContextSetFromOptions(), PetscDeviceContextDestroy()
80 @*/
81 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
82 {
83   PetscErrorCode ierr;
84 
85   PetscFunctionBegin;
86   PetscValidPointer(dctx,1);
87   ierr = PetscDeviceInitializePackage();CHKERRQ(ierr);
88   ierr = contextPool.get(*dctx);CHKERRQ(ierr);
89   PetscFunctionReturn(0);
90 }
91 
92 /*@C
93   PetscDeviceContextDestroy - Frees a PetscDeviceContext
94 
95   Not Collective, Asynchronous
96 
97   Input Parameters:
98 . dctx - The PetscDeviceContext
99 
100   Notes:
101   No implicit synchronization occurs due to this routine, all resources are released completely asynchronously
102   w.r.t. the host. If one needs to guarantee access to the data produced on this contexts stream one should perform the
103   appropriate synchronization before calling this routine.
104 
105   Developer Notes:
106   The context is never actually "destroyed", only returned to an ever growing pool of
107   contexts. There are currently no safeguards on the size of the pool, this should perhaps
108   be implemented.
109 
110   Level: beginner
111 
112 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetUp(), PetscDeviceContextSynchronize()
113 @*/
114 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
115 {
116   PetscErrorCode ierr;
117 
118   PetscFunctionBegin;
119   if (!*dctx) PetscFunctionReturn(0);
120   /* use move assignment whenever possible */
121   ierr = contextPool.reclaim(std::move(*dctx));CHKERRQ(ierr);
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a PetscDeviceContext
127 
128   Not Collective, Asynchronous
129 
130   Input Parameters:
131 + dctx - The PetscDeviceContext
132 - type - The PetscStreamType
133 
134   Notes:
135   See PetscStreamType in include/petscdevicetypes.h for more information on the available
136   types and their interactions. If the PetscDeviceContext was previously set up and stream
137   type was changed, you must call PetscDeviceContextSetUp() again after this routine.
138 
139   Level: intermediate
140 
141 .seealso: PetscDeviceContextGetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetUp(), PetscDeviceContextSetFromOptions()
142 @*/
143 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
144 {
145   PetscFunctionBegin;
146   PetscValidDeviceContext(dctx,1);
147   PetscValidStreamType(type,2);
148   /* only need to do complex swapping if the object has already been setup */
149   if (dctx->setup && (dctx->streamType != type)) {
150     PetscErrorCode ierr;
151 
152     ierr = (*dctx->ops->changestreamtype)(dctx,type);CHKERRQ(ierr);
153     dctx->setup = PETSC_FALSE;
154   }
155   dctx->streamType = type;
156   PetscFunctionReturn(0);
157 }
158 
159 /*@C
160   PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a PetscDeviceContext
161 
162   Not Collective, Asynchronous
163 
164   Input Parameter:
165 . dctx - The PetscDeviceContext
166 
167   Output Parameter:
168 . type - The PetscStreamType
169 
170   Notes:
171   See PetscStreamType in include/petscdevicetypes.h for more information on the available types and their interactions
172 
173   Level: intermediate
174 
175 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetFromOptions()
176 @*/
177 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
178 {
179   PetscFunctionBegin;
180   PetscValidDeviceContext(dctx,1);
181   PetscValidIntPointer(type,2);
182   *type = dctx->streamType;
183   PetscFunctionReturn(0);
184 }
185 
186 /*@C
187   PetscDeviceContextSetDevice - Set the underlying device for the PetscDeviceContext
188 
189   Not Collective, Possibly Synchronous
190 
191   Input Parameters:
192 + dctx   - The PetscDeviceContext
193 - device - The PetscDevice
194 
195   Notes:
196   This routine is effectively PetscDeviceContext's "set-type" (so every PetscDeviceContext
197   must also have an attached PetscDevice). Unlike the usual set-type semantics, it is
198   not stricly necessary to set a contexts device to enable usage, any created device
199   contexts will always come equipped with the "default" device.
200 
201   This routine may initialize the backend device and incur synchronization.
202 
203   Level: intermediate
204 
205 .seealso: PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceContextGetDevice()
206 @*/
207 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
208 {
209   PetscErrorCode ierr;
210 
211   PetscFunctionBegin;
212   PetscValidDeviceContext(dctx,1);
213   PetscValidDevice(device,2);
214   if (dctx->device == device) PetscFunctionReturn(0);
215   ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr);
216   ierr = PetscMemzero(dctx->ops,sizeof(*dctx->ops));CHKERRQ(ierr);
217   ierr = (*device->ops->createcontext)(dctx);CHKERRQ(ierr);
218   dctx->device = PetscDeviceReference(device);
219   dctx->setup  = PETSC_FALSE;
220   PetscFunctionReturn(0);
221 }
222 
223 /*@C
224   PetscDeviceContextGetDevice - Get the underlying PetscDevice for a PetscDeviceContext
225 
226   Not Collective, Asynchronous
227 
228   Input Parameter:
229 . dctx - the PetscDeviceContext
230 
231   Output Parameter:
232 . device - The PetscDevice
233 
234   Notes:
235   This is a borrowed reference, the user should not destroy the device.
236 
237   Level: intermediate
238 
239 .seealso: PetscDeviceContextSetDevice(), PetscDevice
240 @*/
241 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
242 {
243   PetscFunctionBegin;
244   PetscValidDeviceContext(dctx,1);
245   PetscValidPointer(device,2);
246   *device = dctx->device;
247   PetscFunctionReturn(0);
248 }
249 
250 /*@C
251   PetscDeviceContextSetUp - Prepares a PetscDeviceContext for use
252 
253   Not Collective, Asynchronous
254 
255   Input Parameter:
256 . dctx - The PetscDeviceContext
257 
258   Developer Notes:
259   This routine is usually the stage where a PetscDeviceContext acquires device-side data structures such as streams,
260   events, and (possibly) handles.
261 
262   Level: beginner
263 
264 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextDestroy(), PetscDeviceContextSetFromOptions()
265 @*/
266 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
267 {
268   PetscErrorCode ierr;
269 
270   PetscFunctionBegin;
271   PetscValidDeviceContext(dctx,1);
272   if (!dctx->device) {
273     ierr = PetscInfo2(NULL,"PetscDeviceContext %d did not have an explicitly attached PetscDevice, using default with type %s\n",dctx->id,PetscDeviceKinds[PETSC_DEVICE_DEFAULT]);CHKERRQ(ierr);
274     ierr = PetscDeviceContextSetDevice(dctx,PetscDeviceDefault_Internal());CHKERRQ(ierr);
275   }
276   if (dctx->setup) PetscFunctionReturn(0);
277   ierr = (*dctx->ops->setup)(dctx);CHKERRQ(ierr);
278   dctx->setup = PETSC_TRUE;
279   PetscFunctionReturn(0);
280 }
281 
282 /*@C
283   PetscDeviceContextDuplicate - Duplicates a PetscDeviceContext object
284 
285   Not Collective, Asynchronous
286 
287   Input Parameter:
288 . dctx - The PetscDeviceContext to duplicate
289 
290   Output Paramter:
291 . strmdup - The duplicated PetscDeviceContext
292 
293   Notes:
294   This is a shorthand method for creating a PetscDeviceContext with the exact same
295   settings as another. Note however that the duplicated PetscDeviceContext does not "share"
296   any of the underlying data with the original, (including its current stream-state) they
297   are completely separate objects.
298 
299   Level: beginner
300 
301 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetStreamType()
302 @*/
303 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
304 {
305   PetscErrorCode ierr;
306 
307   PetscFunctionBegin;
308   PetscValidDeviceContext(dctx,1);
309   PetscValidPointer(dctxdup,2);
310   ierr = PetscDeviceContextCreate(dctxdup);CHKERRQ(ierr);
311   ierr = PetscDeviceContextSetDevice(*dctxdup,dctx->device);CHKERRQ(ierr);
312   ierr = PetscDeviceContextSetStreamType(*dctxdup,dctx->streamType);CHKERRQ(ierr);
313   ierr = PetscDeviceContextSetUp(*dctxdup);CHKERRQ(ierr);
314   PetscFunctionReturn(0);
315 }
316 
317 /*@C
318   PetscDeviceContextQueryIdle - Returns whether or not a PetscDeviceContext is idle
319 
320   Not Collective, Asynchronous
321 
322   Input Parameter:
323 . dctx - The PetscDeviceContext object
324 
325   Output Parameter:
326 . idle - PETSC_TRUE if PetscDeviceContext has NO work, PETSC_FALSE if it has work
327 
328   Notes:
329   This routine only refers a singular context and does NOT take any of its children into account. That is, if dctx is
330   idle but has dependents who do have work, this routine still returns PETSC_TRUE.
331 
332   Results of PetscDeviceContextQueryIdle() are cached on return, allowing this function to be called repeatedly in an
333   efficient manner. When debug mode is enabled this cache is verified on every call to
334   this routine, but is blindly believed when debugging is disabled.
335 
336   Level: intermediate
337 
338 .seealso: PetscDeviceContextCreate(), PetscDeviceContextWaitForContext(), PetscDeviceContextFork()
339 @*/
340 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
341 {
342   PetscErrorCode ierr;
343 
344   PetscFunctionBegin;
345   PetscValidDeviceContext(dctx,1);
346   PetscValidBoolPointer(idle,2);
347   if (dctx->idle) {
348     *idle = PETSC_TRUE;
349     ierr = PetscDeviceContextValidateIdle_Internal(dctx);CHKERRQ(ierr);
350   } else {
351     ierr = (*dctx->ops->query)(dctx,idle);CHKERRQ(ierr);
352     dctx->idle = *idle;
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 /*@C
358   PetscDeviceContextWaitForContext - Make one context wait for another context to finish
359 
360   Not Collective, Asynchronous
361 
362   Input Parameters:
363 + dctxa - The PetscDeviceContext object that is waiting
364 - dctxb - The PetscDeviceContext object that is being waited on
365 
366   Notes:
367   Serializes two PetscDeviceContexts. This routine uses only the state of dctxb at the moment this routine was
368   called, so any future work queued will not affect dctxa. It is safe to pass the same context to both arguments.
369 
370   Level: beginner
371 
372 .seealso: PetscDeviceContextCreate(), PetscDeviceContextQueryIdle(), PetscDeviceContextJoin()
373 @*/
374 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
375 {
376   PetscErrorCode ierr;
377 
378   PetscFunctionBegin;
379   PetscCheckCompatibleDeviceContexts(dctxa,1,dctxb,2);
380   if (dctxa == dctxb) PetscFunctionReturn(0);
381   if (dctxb->idle) {
382     /* No need to do the extra function lookup and event record if the stream were waiting on isn't doing anything */
383     ierr = PetscDeviceContextValidateIdle_Internal(dctxb);CHKERRQ(ierr);
384   } else {
385     ierr = (*dctxa->ops->waitforctx)(dctxa,dctxb);CHKERRQ(ierr);
386   }
387   PetscFunctionReturn(0);
388 }
389 
390 /*@C
391   PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
392 
393   Not Collective, Asynchronous
394 
395   Input Parameters:
396 + dctx - The parent PetscDeviceContext
397 - n    - The number of children to create
398 
399   Output Parameter:
400 . dsub - The created child context(s)
401 
402   Notes:
403   This routine creates n edges of a DAG from a source node which are causally dependent on the source node, meaning
404   that work queued on child contexts will not start until the parent context finishes its work. This accounts for work
405   queued on the parent up until calling this function, any subsequent work enqueued on the parent has no effect on the children.
406 
407   Any children created with this routine have their lifetimes bounded by the parent. That is, the parent context expects
408   to free all of it's children (and ONLY its children) before itself is freed.
409 
410   DAG representation:
411 .vb
412   time ->
413 
414   -> dctx \----> dctx ------>
415            \---> dsub[0] --->
416             \--> ... ------->
417              \-> dsub[n-1] ->
418 .ve
419 
420   Level: intermediate
421 
422 .seealso: PetscDeviceContextJoin(), PetscDeviceContextSynchronize(), PetscDeviceContextQueryIdle()
423 @*/
424 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
425 {
426 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
427   const PetscInt      nBefore = n;
428   static std::string  idList;
429 #endif
430   PetscDeviceContext *dsubTmp = nullptr;
431   PetscInt            i = 0;
432   PetscErrorCode      ierr;
433 
434   PetscFunctionBegin;
435   PetscValidDeviceContext(dctx,1);
436   PetscValidPointer(dsub,3);
437   if (PetscUnlikelyDebug(n < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts requested %D < 0",n);
438 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
439   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
440   idList.reserve(4*n);
441 #endif
442   /* update child totals */
443   dctx->numChildren += n;
444   /* now to find out if we have room */
445   if (dctx->numChildren > dctx->maxNumChildren) {
446     /* no room, either from having too many kids or not having any */
447     if (dctx->childIDs) {
448       /* have existing children, must reallocate them */
449       ierr = PetscRealloc(dctx->numChildren*sizeof(*dctx->childIDs),&dctx->childIDs);CHKERRQ(ierr);
450       /* clear the extra memory since realloc doesn't do it for us */
451       ierr = PetscArrayzero((dctx->childIDs)+(dctx->maxNumChildren),(dctx->numChildren)-(dctx->maxNumChildren));CHKERRQ(ierr);
452     } else {
453       /* have no children */
454       ierr = PetscCalloc1(dctx->numChildren,&dctx->childIDs);CHKERRQ(ierr);
455     }
456     /* update total number of children */
457     dctx->maxNumChildren = dctx->numChildren;
458   }
459   ierr = PetscMalloc1(n,&dsubTmp);CHKERRQ(ierr);
460   while (n) {
461     /* empty child slot */
462     if (!(dctx->childIDs[i])) {
463       /* create the child context in the image of its parent */
464       ierr = PetscDeviceContextDuplicate(dctx,dsubTmp+i);CHKERRQ(ierr);
465       ierr = PetscDeviceContextWaitForContext(dsubTmp[i],dctx);CHKERRQ(ierr);
466       /* register the child with its parent */
467       dctx->childIDs[i] = dsubTmp[i]->id;
468 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
469       idList += std::to_string(dsubTmp[i]->id);
470       if (n != 1) idList += ", ";
471 #endif
472       --n;
473     }
474     ++i;
475   }
476 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
477   ierr = PetscInfo3(NULL,"Forked %D children from parent %D with IDs: %s\n",nBefore,dctx->id,idList.c_str());CHKERRQ(ierr);
478   /* resets the size but doesn't deallocate the memory */
479   idList.clear();
480 #endif
481   /* pass the children back to caller */
482   *dsub = dsubTmp;
483   PetscFunctionReturn(0);
484 }
485 
486 /*@C
487   PetscDeviceContextJoin - Converge a set of child contexts
488 
489   Not Collective, Asynchronous
490 
491   Input Parameters:
492 + dctx         - A PetscDeviceContext to converge on
493 . n            - The number of sub contexts to converge
494 . joinMode     - The type of join to perform
495 - dsub         - The sub contexts to converge
496 
497   Notes:
498   If PetscDeviceContextFork() creates n edges from a source node which all depend on the
499   source node, then this routine is the exact mirror. That is, it creates a node
500   (represented in dctx) which recieves n edges (and optionally destroys them) which is
501   dependent on the completion of all incoming edges.
502 
503   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY all contexts in dsub will be destroyed
504   by this routine. Thus all sub contexts must have been created with the dctx passed to
505   this routine.
506 
507   if joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC dctx waits for all sub contexts but the
508   sub contexts do not wait for one another afterwards.
509 
510   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC all sub contexts will additionally
511   wait on dctx after converging. This has the effect of "synchronizing" the outgoing
512   edges.
513 
514   DAG representations:
515   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY
516 .vb
517   time ->
518 
519   -> dctx ---------/- dctx ->
520   -> dsub[0] -----/
521   ->  ... -------/
522   -> dsub[n-1] -/
523 .ve
524   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC
525 .vb
526   time ->
527 
528   -> dctx ---------/- dctx ->
529   -> dsub[0] -----/--------->
530   ->  ... -------/---------->
531   -> dsub[n-1] -/----------->
532 .ve
533   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC
534 .vb
535   time ->
536 
537   -> dctx ---------/- dctx -\----> dctx ------>
538   -> dsub[0] -----/          \---> dsub[0] --->
539   ->  ... -------/            \--> ... ------->
540   -> dsub[n-1] -/              \-> dsub[n-1] ->
541 .ve
542 
543   Level: intermediate
544 
545 .seealso: PetscDeviceContextFork(), PetscDeviceContextSynchronize(), PetscDeviceContextJoinMode
546 @*/
547 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
548 {
549 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
550   static std::string idList;
551 #endif
552   PetscErrorCode     ierr;
553 
554   PetscFunctionBegin;
555   /* validity of dctx is checked in the wait-for loop */
556   PetscValidPointer(dsub,4);
557   if (PetscUnlikelyDebug(n < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts merged %D < 0",n);
558 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
559   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
560   idList.reserve(4*n);
561 #endif
562   /* first dctx waits on all the incoming edges */
563   for (PetscInt i = 0; i < n; ++i) {
564     PetscCheckCompatibleDeviceContexts(dctx,1,(*dsub)[i],4);
565     ierr = PetscDeviceContextWaitForContext(dctx,(*dsub)[i]);CHKERRQ(ierr);
566 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
567     idList += std::to_string((*dsub)[i]->id);
568     if (i+1 < n) idList += ", ";
569 #endif
570   }
571 
572   /* now we handle the aftermath */
573   switch (joinMode) {
574   case PETSC_DEVICE_CONTEXT_JOIN_DESTROY:
575     {
576       PetscInt j = 0;
577 
578       if (PetscUnlikelyDebug(n > dctx->numChildren)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Trying to destroy %D children of a parent context that only has %D children, likely trying to restore to wrong parent",n,dctx->numChildren);
579       /* update child count while it's still fresh in memory */
580       dctx->numChildren -= n;
581       for (PetscInt i = 0; i < dctx->maxNumChildren; ++i) {
582         if (dctx->childIDs[i] && (dctx->childIDs[i] == (*dsub)[j]->id)) {
583           /* child is one of ours, can destroy it */
584           ierr = PetscDeviceContextDestroy((*dsub)+j);CHKERRQ(ierr);
585           /* reset the child slot */
586           dctx->childIDs[i] = 0;
587           if (++j == n) break;
588         }
589       }
590       /* gone through the loop but did not find every child, if this triggers (or well, doesn't) on perf-builds we leak the remaining contexts memory */
591       if (PetscUnlikelyDebug(j != n)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"%D contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in.",n-j);
592       ierr = PetscFree(*dsub);CHKERRQ(ierr);
593     }
594     break;
595   case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
596     for (PetscInt i = 0; i < n; ++i) {
597       ierr = PetscDeviceContextWaitForContext((*dsub)[i],dctx);CHKERRQ(ierr);
598     }
599   case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
600     break;
601   default:
602     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown PetscDeviceContextJoinMode given");
603   }
604 
605 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
606   ierr = PetscInfo4(NULL,"Joined %D ctxs to ctx %D, mode %s with IDs: %s\n",n,dctx->id,PetscDeviceContextJoinModes[joinMode],idList.c_str());CHKERRQ(ierr);
607   idList.clear();
608 #endif
609   PetscFunctionReturn(0);
610 }
611 
612 /*@C
613   PetscDeviceContextSynchronize - Block the host until all work queued on or associated with a PetscDeviceContext has finished
614 
615   Not Collective, Synchronous
616 
617   Input Parameters:
618 . dctx - The PetscDeviceContext to synchronize
619 
620   Level: beginner
621 
622 .seealso: PetscDeviceContextFork(), PetscDeviceContextJoin(), PetscDeviceContextQueryIdle()
623 @*/
624 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
625 {
626   PetscErrorCode ierr;
627 
628   PetscFunctionBegin;
629   PetscValidDeviceContext(dctx,1);
630   /* if it isn't setup there is nothing to sync on */
631   if (dctx->setup) {ierr = (*dctx->ops->synchronize)(dctx);CHKERRQ(ierr);}
632   dctx->idle = PETSC_TRUE;
633   PetscFunctionReturn(0);
634 }
635 
636 static PetscDeviceContext globalContext      = nullptr;
637 static PetscBool          globalContextSetup = PETSC_FALSE;
638 static PetscStreamType    defaultStreamType  = PETSC_STREAM_DEFAULT_BLOCKING;
639 
640 /* automatically registered to PetscFinalize() when first context is instantiated, do not
641    call */
642 static PetscErrorCode PetscDeviceContextDestroyGlobalContext_Private(void)
643 {
644   PetscErrorCode ierr;
645 
646   PetscFunctionBegin;
647   ierr = PetscDeviceContextSynchronize(globalContext);CHKERRQ(ierr);
648   ierr = PetscDeviceContextDestroy(&globalContext);CHKERRQ(ierr);
649   /* reset everything to defaults */
650   defaultStreamType  = PETSC_STREAM_DEFAULT_BLOCKING;
651   globalContextSetup = PETSC_FALSE;
652   PetscFunctionReturn(0);
653 }
654 
655 /* creates and initializes the root context in PetscInitialize() but does not call
656    SetUp() as the user may wish to change types after PetscInitialize() */
657 PetscErrorCode PetscDeviceContextInitializeRootContext_Internal(MPI_Comm comm, const char prefix[])
658 {
659   PetscErrorCode ierr;
660 
661   PetscFunctionBegin;
662   ierr = PetscInfo1(NULL,"Initializing root PetscDeviceContext with PetscDeviceKind %s\n",PetscDeviceKinds[PETSC_DEVICE_DEFAULT]);CHKERRQ(ierr);
663   ierr = PetscDeviceContextCreate(&globalContext);CHKERRQ(ierr);
664   if (PetscUnlikelyDebug(globalContext->id != 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The root current PetscDeviceContext should have id = 0, however it has id = %D",globalContext->id);
665   ierr = PetscDeviceContextSetDevice(globalContext,PetscDeviceDefault_Internal());CHKERRQ(ierr);
666   ierr = PetscDeviceContextSetStreamType(globalContext,defaultStreamType);CHKERRQ(ierr);
667   ierr = PetscDeviceContextSetFromOptions(comm,prefix,globalContext);CHKERRQ(ierr);
668   ierr = PetscRegisterFinalize(PetscDeviceContextDestroyGlobalContext_Private);CHKERRQ(ierr);
669   PetscFunctionReturn(0);
670 }
671 
672 /*@C
673   PetscDeviceContextGetCurrentContext - Get the current active PetscDeviceContext
674 
675   Not Collective, Asynchronous
676 
677   Output Parameter:
678 . dctx - The PetscDeviceContext
679 
680   Notes:
681   The user generally should not destroy contexts retrieved with this routine unless they themselves have created
682   them. There exists no protection against destroying the root context.
683 
684   Developer Notes:
685   This routine creates the "root" context the first time it is called, registering its
686   destructor to PetscFinalize(). The root context is synchronized before being destroyed.
687 
688   Level: beginner
689 
690 .seealso: PetscDeviceContextSetCurrentContext(), PetscDeviceContextFork(),
691 PetscDeviceContextJoin(), PetscDeviceContextCreate()
692 @*/
693 PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *dctx)
694 {
695   PetscFunctionBegin;
696   PetscValidPointer(dctx,1);
697   if (PetscUnlikely(!globalContextSetup)) {
698     PetscErrorCode ierr;
699 
700     /* if there is no available device backend, PetscDeviceInitializePackage() will fire a
701        PETSC_ERR_SUP_SYS error. */
702     ierr = PetscDeviceInitializePackage();CHKERRQ(ierr);
703     ierr = PetscDeviceContextSetUp(globalContext);CHKERRQ(ierr);
704     globalContextSetup = PETSC_TRUE;
705   }
706   *dctx = globalContext;
707   PetscFunctionReturn(0);
708 }
709 
710 /*@C
711   PetscDeviceContextSetCurrentContext - Set the current active PetscDeviceContext
712 
713   Not Collective, Asynchronous
714 
715   Input Parameter:
716 . dctx - The PetscDeviceContext
717 
718   Notes:
719   The old context is not stored in any way by this routine; if one is overriding a context that they themselves do not
720   control, one should take care to temporarily store it by calling PetscDeviceContextGetCurrentContext() before calling
721   this routine.
722 
723   Level: beginner
724 
725 .seealso: PetscDeviceContextGetCurrentContext(), PetscDeviceContextFork(),
726 PetscDeviceContextJoin(), PetscDeviceContextCreate()
727 @*/
728 PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext dctx)
729 {
730   PetscErrorCode ierr;
731 
732   PetscFunctionBegin;
733   PetscValidDeviceContext(dctx,1);
734   globalContext = dctx;
735   ierr = PetscInfo1(NULL,"Set global device context id %D\n",dctx->id);CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 /*@C
740   PetscDeviceContextSetFromOptions - Configure a PetscDeviceContext from the options database
741 
742   Collective on comm, Asynchronous
743 
744   Input Parameters:
745 + comm   - MPI communicator on which to query the options database
746 . prefix - prefix to prepend to all options database queries, NULL if not needed
747 - dctx   - The PetscDeviceContext to configure
748 
749   Output Parameter:
750 . dctx - The PetscDeviceContext
751 
752   Options Database:
753 + -device_context_device_kind - the kind of PetscDevice to attach by default - PetscDeviceKind
754 - -device_context_stream_type - type of stream to create inside the PetscDeviceContext -
755   PetscDeviceContextSetStreamType()
756 
757   Level: beginner
758 
759 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextSetDevice()
760 @*/
761 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, const char prefix[], PetscDeviceContext dctx)
762 {
763   PetscBool      flag;
764   PetscInt       stype,dkind;
765   PetscErrorCode ierr;
766 
767   PetscFunctionBegin;
768   if (prefix) {PetscValidCharPointer(prefix,2);}
769   PetscValidDeviceContext(dctx,3);
770   ierr = PetscOptionsBegin(comm,prefix,"PetscDeviceContext Options","Sys");CHKERRQ(ierr);
771   ierr = PetscOptionsEList("-device_context_device_kind","Underlying PetscDevice","PetscDeviceContextSetDevice",PetscDeviceKinds+1,PETSC_DEVICE_MAX-1,dctx->device ? PetscDeviceKinds[dctx->device->kind] : PetscDeviceKinds[PETSC_DEVICE_DEFAULT],&dkind,&flag);CHKERRQ(ierr);
772   if (flag) {
773     ierr = PetscDeviceContextSetDevice(dctx,PetscDeviceDefaultKind_Internal(static_cast<PetscDeviceKind>(dkind+1)));CHKERRQ(ierr);
774   }
775   ierr = PetscOptionsEList("-device_context_stream_type","PetscDeviceContext PetscStreamType","PetscDeviceContextSetStreamType",PetscStreamTypes,3,PetscStreamTypes[dctx->streamType],&stype,&flag);CHKERRQ(ierr);
776   if (flag) {
777     ierr = PetscDeviceContextSetStreamType(dctx,static_cast<PetscStreamType>(stype));CHKERRQ(ierr);
778   }
779   ierr = PetscOptionsEnd();CHKERRQ(ierr);
780   PetscFunctionReturn(0);
781 }
782