xref: /petsc/src/sys/objects/device/interface/dcontext.cxx (revision 79982354c9518193af7b73b747d5eed8eda58f60)
1 #include <petsc/private/deviceimpl.h> /*I "petscdevice.h" I*/
2 #include "objpool.hpp"
3 
4 const char *const PetscStreamTypes[] = {
5   "global_blocking",
6   "default_blocking",
7   "global_nonblocking",
8   "max",
9   "PetscStreamType",
10   "PETSC_STREAM_",
11   PETSC_NULLPTR
12 };
13 
14 const char *const PetscDeviceContextJoinModes[] = {
15   "destroy",
16   "sync",
17   "no_sync",
18   "PetscDeviceContextJoinMode",
19   "PETSC_DEVICE_CONTEXT_JOIN_",
20   PETSC_NULLPTR
21 };
22 
23 /* Define the allocator */
24 struct PetscDeviceContextAllocator : Petsc::Allocator<PetscDeviceContext>
25 {
26   static PetscInt PetscDeviceContextID;
27 
28   PETSC_NODISCARD static PetscErrorCode create(PetscDeviceContext *dctx) PETSC_NOEXCEPT
29   {
30     PetscDeviceContext dc;
31     PetscErrorCode     ierr;
32 
33     PetscFunctionBegin;
34     ierr           = PetscNew(&dc);CHKERRQ(ierr);
35     dc->id         = PetscDeviceContextID++;
36     dc->idle       = PETSC_TRUE;
37     dc->streamType = PETSC_STREAM_DEFAULT_BLOCKING;
38     *dctx          = dc;
39     PetscFunctionReturn(0);
40   }
41 
42   PETSC_NODISCARD static PetscErrorCode destroy(PetscDeviceContext dctx) PETSC_NOEXCEPT
43   {
44     PetscErrorCode ierr;
45 
46     PetscFunctionBegin;
47     if (PetscUnlikelyDebug(dctx->numChildren)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying",dctx->numChildren);
48     if (dctx->ops->destroy) {ierr = (*dctx->ops->destroy)(dctx);CHKERRQ(ierr);}
49     ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr);
50     ierr = PetscFree(dctx->childIDs);CHKERRQ(ierr);
51     ierr = PetscFree(dctx);CHKERRQ(ierr);
52     PetscFunctionReturn(0);
53   }
54 
55   PETSC_NODISCARD static PetscErrorCode reset(PetscDeviceContext dctx) PETSC_NOEXCEPT
56   {
57     PetscErrorCode ierr;
58 
59     PetscFunctionBegin;
60     /* don't deallocate the child array, rather just zero it out */
61     ierr = PetscArrayzero(dctx->childIDs,dctx->maxNumChildren);CHKERRQ(ierr);
62     dctx->setup       = PETSC_FALSE;
63     dctx->numChildren = 0;
64     dctx->idle        = PETSC_TRUE;
65     dctx->streamType  = PETSC_STREAM_DEFAULT_BLOCKING;
66     PetscFunctionReturn(0);
67   }
68 
69   PETSC_NODISCARD static constexpr PetscErrorCode finalize() PETSC_NOEXCEPT { return 0; }
70 };
71 /* an ID = 0 is invalid */
72 PetscInt PetscDeviceContextAllocator::PetscDeviceContextID = 1;
73 
74 static Petsc::ObjectPool<PetscDeviceContext,PetscDeviceContextAllocator> contextPool;
75 
76 /*@C
77   PetscDeviceContextCreate - Creates a PetscDeviceContext
78 
79   Not Collective, Asynchronous
80 
81   Output Paramemter:
82 . dctx - The PetscDeviceContext
83 
84   Notes:
85   Unlike almost every other PETSc class it is advised that most users use
86   PetscDeviceContextDuplicate() rather than this routine to create new contexts. Contexts
87   of different types are incompatible with one another; using
88   PetscDeviceContextDuplicate() ensures compatible types.
89 
90   Level: beginner
91 
92 .seealso: PetscDeviceContextDuplicate(), PetscDeviceContextSetDevice(),
93 PetscDeviceContextSetStreamType(), PetscDeviceContextSetUp(),
94 PetscDeviceContextSetFromOptions(), PetscDeviceContextDestroy()
95 @*/
96 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
97 {
98   PetscErrorCode ierr;
99 
100   PetscFunctionBegin;
101   PetscValidPointer(dctx,1);
102   ierr = PetscDeviceInitializePackage();CHKERRQ(ierr);
103   ierr = contextPool.get(*dctx);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 /*@C
108   PetscDeviceContextDestroy - Frees a PetscDeviceContext
109 
110   Not Collective, Asynchronous
111 
112   Input Parameters:
113 . dctx - The PetscDeviceContext
114 
115   Notes:
116   No implicit synchronization occurs due to this routine, all resources are released completely asynchronously
117   w.r.t. the host. If one needs to guarantee access to the data produced on this contexts stream one should perform the
118   appropriate synchronization before calling this routine.
119 
120   Developer Notes:
121   The context is never actually "destroyed", only returned to an ever growing pool of
122   contexts. There are currently no safeguards on the size of the pool, this should perhaps
123   be implemented.
124 
125   Level: beginner
126 
127 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetUp(), PetscDeviceContextSynchronize()
128 @*/
129 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
130 {
131   PetscErrorCode ierr;
132 
133   PetscFunctionBegin;
134   if (!*dctx) PetscFunctionReturn(0);
135   ierr  = contextPool.reclaim(std::move(*dctx));CHKERRQ(ierr);
136   *dctx = PETSC_NULLPTR;
137   PetscFunctionReturn(0);
138 }
139 
140 /*@C
141   PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a PetscDeviceContext
142 
143   Not Collective, Asynchronous
144 
145   Input Parameters:
146 + dctx - The PetscDeviceContext
147 - type - The PetscStreamType
148 
149   Notes:
150   See PetscStreamType in include/petscdevicetypes.h for more information on the available
151   types and their interactions. If the PetscDeviceContext was previously set up and stream
152   type was changed, you must call PetscDeviceContextSetUp() again after this routine.
153 
154   Level: intermediate
155 
156 .seealso: PetscDeviceContextGetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetUp(), PetscDeviceContextSetFromOptions()
157 @*/
158 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
159 {
160   PetscFunctionBegin;
161   PetscValidDeviceContext(dctx,1);
162   PetscValidStreamType(type,2);
163   /* only need to do complex swapping if the object has already been setup */
164   if (dctx->setup && (dctx->streamType != type)) {
165     PetscErrorCode ierr;
166 
167     ierr = (*dctx->ops->changestreamtype)(dctx,type);CHKERRQ(ierr);
168     dctx->setup = PETSC_FALSE;
169   }
170   dctx->streamType = type;
171   PetscFunctionReturn(0);
172 }
173 
174 /*@C
175   PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a PetscDeviceContext
176 
177   Not Collective, Asynchronous
178 
179   Input Parameter:
180 . dctx - The PetscDeviceContext
181 
182   Output Parameter:
183 . type - The PetscStreamType
184 
185   Notes:
186   See PetscStreamType in include/petscdevicetypes.h for more information on the available types and their interactions
187 
188   Level: intermediate
189 
190 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetFromOptions()
191 @*/
192 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
193 {
194   PetscFunctionBegin;
195   PetscValidDeviceContext(dctx,1);
196   PetscValidIntPointer(type,2);
197   *type = dctx->streamType;
198   PetscFunctionReturn(0);
199 }
200 
201 /*@C
202   PetscDeviceContextSetDevice - Set the underlying device for the PetscDeviceContext
203 
204   Not Collective, Possibly Synchronous
205 
206   Input Parameters:
207 + dctx   - The PetscDeviceContext
208 - device - The PetscDevice
209 
210   Notes:
211   This routine is effectively PetscDeviceContext's "set-type" (so every PetscDeviceContext
212   must also have an attached PetscDevice). Unlike the usual set-type semantics, it is
213   not stricly necessary to set a contexts device to enable usage, any created device
214   contexts will always come equipped with the "default" device.
215 
216   This routine is a no-op if dctx is already attached to device.
217 
218   This routine may initialize the backend device and incur synchronization.
219 
220   Level: intermediate
221 
222 .seealso: PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceContextGetDevice()
223 @*/
224 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
225 {
226   PetscErrorCode ierr;
227 
228   PetscFunctionBegin;
229   PetscValidDeviceContext(dctx,1);
230   PetscValidDevice(device,2);
231   if (dctx->device) {
232     /* can't do a strict pointer equality check since PetscDevice's are reused */
233     if (dctx->device->ops->createcontext == device->ops->createcontext) PetscFunctionReturn(0);
234   }
235   ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr);
236   if (dctx->ops->destroy) {ierr = (*dctx->ops->destroy)(dctx);CHKERRQ(ierr);}
237   ierr = PetscMemzero(dctx->ops,sizeof(*dctx->ops));CHKERRQ(ierr);
238   ierr = (*device->ops->createcontext)(dctx);CHKERRQ(ierr);
239   ierr = PetscDeviceReference_Internal(device);CHKERRQ(ierr);
240   dctx->device = device;
241   dctx->setup  = PETSC_FALSE;
242   PetscFunctionReturn(0);
243 }
244 
245 /*@C
246   PetscDeviceContextGetDevice - Get the underlying PetscDevice for a PetscDeviceContext
247 
248   Not Collective, Asynchronous
249 
250   Input Parameter:
251 . dctx - the PetscDeviceContext
252 
253   Output Parameter:
254 . device - The PetscDevice
255 
256   Notes:
257   This is a borrowed reference, the user should not destroy the device.
258 
259   Level: intermediate
260 
261 .seealso: PetscDeviceContextSetDevice(), PetscDevice
262 @*/
263 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
264 {
265   PetscFunctionBegin;
266   PetscValidDeviceContext(dctx,1);
267   PetscValidPointer(device,2);
268   if (PetscUnlikelyDebug(!dctx->device)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PetscDeviceContext %" PetscInt_FMT " has no attached PetscDevice to get",dctx->id);
269   *device = dctx->device;
270   PetscFunctionReturn(0);
271 }
272 
273 /*@C
274   PetscDeviceContextSetUp - Prepares a PetscDeviceContext for use
275 
276   Not Collective, Asynchronous
277 
278   Input Parameter:
279 . dctx - The PetscDeviceContext
280 
281   Developer Notes:
282   This routine is usually the stage where a PetscDeviceContext acquires device-side data structures such as streams,
283   events, and (possibly) handles.
284 
285   Level: beginner
286 
287 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextDestroy(), PetscDeviceContextSetFromOptions()
288 @*/
289 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
290 {
291   PetscErrorCode ierr;
292 
293   PetscFunctionBegin;
294   PetscValidDeviceContext(dctx,1);
295   if (!dctx->device) {
296     ierr = PetscInfo2(PETSC_NULLPTR,"PetscDeviceContext %" PetscInt_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n",dctx->id,PetscDeviceTypes[PETSC_DEVICE_DEFAULT]);CHKERRQ(ierr);
297     ierr = PetscDeviceContextSetDefaultDevice_Internal(dctx);CHKERRQ(ierr);
298   }
299   if (dctx->setup) PetscFunctionReturn(0);
300   ierr = (*dctx->ops->setup)(dctx);CHKERRQ(ierr);
301   dctx->setup = PETSC_TRUE;
302   PetscFunctionReturn(0);
303 }
304 
305 /*@C
306   PetscDeviceContextDuplicate - Duplicates a PetscDeviceContext object
307 
308   Not Collective, Asynchronous
309 
310   Input Parameter:
311 . dctx - The PetscDeviceContext to duplicate
312 
313   Output Paramter:
314 . dctxdup - The duplicated PetscDeviceContext
315 
316   Notes:
317   This is a shorthand method for creating a PetscDeviceContext with the exact same
318   settings as another. Note however that the duplicated PetscDeviceContext does not "share"
319   any of the underlying data with the original, (including its current stream-state) they
320   are completely separate objects.
321 
322   Level: beginner
323 
324 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetStreamType()
325 @*/
326 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
327 {
328   PetscDeviceContext dup;
329   PetscErrorCode     ierr;
330 
331   PetscFunctionBegin;
332   PetscValidDeviceContext(dctx,1);
333   PetscValidPointer(dctxdup,2);
334   ierr = PetscDeviceContextCreate(&dup);CHKERRQ(ierr);
335   ierr = PetscDeviceContextSetStreamType(dup,dctx->streamType);CHKERRQ(ierr);
336   if (dctx->device) {ierr = PetscDeviceContextSetDevice(dup,dctx->device);CHKERRQ(ierr);}
337   ierr = PetscDeviceContextSetUp(dup);CHKERRQ(ierr);
338   *dctxdup = dup;
339   PetscFunctionReturn(0);
340 }
341 
342 /*@C
343   PetscDeviceContextQueryIdle - Returns whether or not a PetscDeviceContext is idle
344 
345   Not Collective, Asynchronous
346 
347   Input Parameter:
348 . dctx - The PetscDeviceContext object
349 
350   Output Parameter:
351 . idle - PETSC_TRUE if PetscDeviceContext has NO work, PETSC_FALSE if it has work
352 
353   Notes:
354   This routine only refers a singular context and does NOT take any of its children into account. That is, if dctx is
355   idle but has dependents who do have work, this routine still returns PETSC_TRUE.
356 
357   Results of PetscDeviceContextQueryIdle() are cached on return, allowing this function to be called repeatedly in an
358   efficient manner. When debug mode is enabled this cache is verified on every call to
359   this routine, but is blindly believed when debugging is disabled.
360 
361   Level: intermediate
362 
363 .seealso: PetscDeviceContextCreate(), PetscDeviceContextWaitForContext(), PetscDeviceContextFork()
364 @*/
365 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
366 {
367   PetscErrorCode ierr;
368 
369   PetscFunctionBegin;
370   PetscValidDeviceContext(dctx,1);
371   PetscValidBoolPointer(idle,2);
372   if (dctx->idle) {
373     *idle = PETSC_TRUE;
374     ierr = PetscDeviceContextValidateIdle_Internal(dctx);CHKERRQ(ierr);
375   } else {
376     ierr = (*dctx->ops->query)(dctx,idle);CHKERRQ(ierr);
377     dctx->idle = *idle;
378   }
379   PetscFunctionReturn(0);
380 }
381 
382 /*@C
383   PetscDeviceContextWaitForContext - Make one context wait for another context to finish
384 
385   Not Collective, Asynchronous
386 
387   Input Parameters:
388 + dctxa - The PetscDeviceContext object that is waiting
389 - dctxb - The PetscDeviceContext object that is being waited on
390 
391   Notes:
392   Serializes two PetscDeviceContexts. This routine uses only the state of dctxb at the moment this routine was
393   called, so any future work queued will not affect dctxa. It is safe to pass the same context to both arguments.
394 
395   Level: beginner
396 
397 .seealso: PetscDeviceContextCreate(), PetscDeviceContextQueryIdle(), PetscDeviceContextJoin()
398 @*/
399 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
400 {
401   PetscErrorCode ierr;
402 
403   PetscFunctionBegin;
404   PetscCheckCompatibleDeviceContexts(dctxa,1,dctxb,2);
405   if (dctxa == dctxb) PetscFunctionReturn(0);
406   if (dctxb->idle) {
407     /* No need to do the extra function lookup and event record if the stream were waiting on isn't doing anything */
408     ierr = PetscDeviceContextValidateIdle_Internal(dctxb);CHKERRQ(ierr);
409   } else {
410     ierr = (*dctxa->ops->waitforctx)(dctxa,dctxb);CHKERRQ(ierr);
411   }
412   PetscFunctionReturn(0);
413 }
414 
415 #define PETSC_USE_DEBUG_AND_INFO (PetscDefined(USE_DEBUG) && PetscDefined(USE_INFO))
416 #if PETSC_USE_DEBUG_AND_INFO
417 #include <string>
418 #endif
419 /*@C
420   PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
421 
422   Not Collective, Asynchronous
423 
424   Input Parameters:
425 + dctx - The parent PetscDeviceContext
426 - n    - The number of children to create
427 
428   Output Parameter:
429 . dsub - The created child context(s)
430 
431   Notes:
432   This routine creates n edges of a DAG from a source node which are causally dependent on the source node, meaning
433   that work queued on child contexts will not start until the parent context finishes its work. This accounts for work
434   queued on the parent up until calling this function, any subsequent work enqueued on the parent has no effect on the children.
435 
436   Any children created with this routine have their lifetimes bounded by the parent. That is, the parent context expects
437   to free all of it's children (and ONLY its children) before itself is freed.
438 
439   DAG representation:
440 .vb
441   time ->
442 
443   -> dctx \----> dctx ------>
444            \---> dsub[0] --->
445             \--> ... ------->
446              \-> dsub[n-1] ->
447 .ve
448 
449   Level: intermediate
450 
451 .seealso: PetscDeviceContextJoin(), PetscDeviceContextSynchronize(), PetscDeviceContextQueryIdle()
452 @*/
453 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
454 {
455 #if PETSC_USE_DEBUG_AND_INFO
456   const PetscInt      nBefore = n;
457   static std::string  idList;
458 #endif
459   PetscDeviceContext *dsubTmp = PETSC_NULLPTR;
460   PetscInt            i = 0;
461   PetscErrorCode      ierr;
462 
463   PetscFunctionBegin;
464   PetscValidDeviceContext(dctx,1);
465   PetscValidPointer(dsub,3);
466   if (PetscUnlikelyDebug(n < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts requested %" PetscInt_FMT " < 0",n);
467 #if PETSC_USE_DEBUG_AND_INFO
468   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
469   idList.reserve(4*n);
470 #endif
471   /* update child totals */
472   dctx->numChildren += n;
473   /* now to find out if we have room */
474   if (dctx->numChildren > dctx->maxNumChildren) {
475     /* no room, either from having too many kids or not having any */
476     if (dctx->childIDs) {
477       /* have existing children, must reallocate them */
478       ierr = PetscRealloc(dctx->numChildren*sizeof(*dctx->childIDs),&dctx->childIDs);CHKERRQ(ierr);
479       /* clear the extra memory since realloc doesn't do it for us */
480       ierr = PetscArrayzero((dctx->childIDs)+(dctx->maxNumChildren),(dctx->numChildren)-(dctx->maxNumChildren));CHKERRQ(ierr);
481     } else {
482       /* have no children */
483       ierr = PetscCalloc1(dctx->numChildren,&dctx->childIDs);CHKERRQ(ierr);
484     }
485     /* update total number of children */
486     dctx->maxNumChildren = dctx->numChildren;
487   }
488   ierr = PetscMalloc1(n,&dsubTmp);CHKERRQ(ierr);
489   while (n) {
490     /* empty child slot */
491     if (!(dctx->childIDs[i])) {
492       /* create the child context in the image of its parent */
493       ierr = PetscDeviceContextDuplicate(dctx,dsubTmp+i);CHKERRQ(ierr);
494       ierr = PetscDeviceContextWaitForContext(dsubTmp[i],dctx);CHKERRQ(ierr);
495       /* register the child with its parent */
496       dctx->childIDs[i] = dsubTmp[i]->id;
497 #if PETSC_USE_DEBUG_AND_INFO
498       idList += std::to_string(dsubTmp[i]->id);
499       if (n != 1) idList += ", ";
500 #endif
501       --n;
502     }
503     ++i;
504   }
505 #if PETSC_USE_DEBUG_AND_INFO
506   ierr = PetscInfo3(PETSC_NULLPTR,"Forked %" PetscInt_FMT " children from parent %" PetscInt_FMT " with IDs: %s\n",nBefore,dctx->id,idList.c_str());CHKERRQ(ierr);
507   /* resets the size but doesn't deallocate the memory */
508   idList.clear();
509 #endif
510   /* pass the children back to caller */
511   *dsub = dsubTmp;
512   PetscFunctionReturn(0);
513 }
514 
515 /*@C
516   PetscDeviceContextJoin - Converge a set of child contexts
517 
518   Not Collective, Asynchronous
519 
520   Input Parameters:
521 + dctx         - A PetscDeviceContext to converge on
522 . n            - The number of sub contexts to converge
523 . joinMode     - The type of join to perform
524 - dsub         - The sub contexts to converge
525 
526   Notes:
527   If PetscDeviceContextFork() creates n edges from a source node which all depend on the
528   source node, then this routine is the exact mirror. That is, it creates a node
529   (represented in dctx) which recieves n edges (and optionally destroys them) which is
530   dependent on the completion of all incoming edges.
531 
532   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY all contexts in dsub will be destroyed
533   by this routine. Thus all sub contexts must have been created with the dctx passed to
534   this routine.
535 
536   if joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC dctx waits for all sub contexts but the
537   sub contexts do not wait for one another afterwards.
538 
539   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC all sub contexts will additionally
540   wait on dctx after converging. This has the effect of "synchronizing" the outgoing
541   edges.
542 
543   DAG representations:
544   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY
545 .vb
546   time ->
547 
548   -> dctx ---------/- dctx ->
549   -> dsub[0] -----/
550   ->  ... -------/
551   -> dsub[n-1] -/
552 .ve
553   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC
554 .vb
555   time ->
556 
557   -> dctx ---------/- dctx ->
558   -> dsub[0] -----/--------->
559   ->  ... -------/---------->
560   -> dsub[n-1] -/----------->
561 .ve
562   If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC
563 .vb
564   time ->
565 
566   -> dctx ---------/- dctx -\----> dctx ------>
567   -> dsub[0] -----/          \---> dsub[0] --->
568   ->  ... -------/            \--> ... ------->
569   -> dsub[n-1] -/              \-> dsub[n-1] ->
570 .ve
571 
572   Level: intermediate
573 
574 .seealso: PetscDeviceContextFork(), PetscDeviceContextSynchronize(), PetscDeviceContextJoinMode
575 @*/
576 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
577 {
578 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
579   static std::string idList;
580 #endif
581   PetscErrorCode     ierr;
582 
583   PetscFunctionBegin;
584   /* validity of dctx is checked in the wait-for loop */
585   PetscValidPointer(dsub,4);
586   if (PetscUnlikelyDebug(n < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts merged %" PetscInt_FMT " < 0",n);
587 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
588   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
589   idList.reserve(4*n);
590 #endif
591   /* first dctx waits on all the incoming edges */
592   for (PetscInt i = 0; i < n; ++i) {
593     PetscCheckCompatibleDeviceContexts(dctx,1,(*dsub)[i],4);
594     ierr = PetscDeviceContextWaitForContext(dctx,(*dsub)[i]);CHKERRQ(ierr);
595 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
596     idList += std::to_string((*dsub)[i]->id);
597     if (i+1 < n) idList += ", ";
598 #endif
599   }
600 
601   /* now we handle the aftermath */
602   switch (joinMode) {
603   case PETSC_DEVICE_CONTEXT_JOIN_DESTROY:
604     {
605       PetscInt j = 0;
606 
607       if (PetscUnlikelyDebug(n > dctx->numChildren)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent",n,dctx->numChildren);
608       /* update child count while it's still fresh in memory */
609       dctx->numChildren -= n;
610       for (PetscInt i = 0; i < dctx->maxNumChildren; ++i) {
611         if (dctx->childIDs[i] && (dctx->childIDs[i] == (*dsub)[j]->id)) {
612           /* child is one of ours, can destroy it */
613           ierr = PetscDeviceContextDestroy((*dsub)+j);CHKERRQ(ierr);
614           /* reset the child slot */
615           dctx->childIDs[i] = 0;
616           if (++j == n) break;
617         }
618       }
619       /* gone through the loop but did not find every child, if this triggers (or well, doesn't) on perf-builds we leak the remaining contexts memory */
620       if (PetscUnlikelyDebug(j != n)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in.",n-j);
621       ierr = PetscFree(*dsub);CHKERRQ(ierr);
622     }
623     break;
624   case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
625     for (PetscInt i = 0; i < n; ++i) {
626       ierr = PetscDeviceContextWaitForContext((*dsub)[i],dctx);CHKERRQ(ierr);
627     }
628   case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
629     break;
630   default:
631     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown PetscDeviceContextJoinMode given");
632   }
633 
634 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO)
635   ierr = PetscInfo4(PETSC_NULLPTR,"Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt_FMT ", mode %s with IDs: %s\n",n,dctx->id,PetscDeviceContextJoinModes[joinMode],idList.c_str());CHKERRQ(ierr);
636   idList.clear();
637 #endif
638   PetscFunctionReturn(0);
639 }
640 
641 /*@C
642   PetscDeviceContextSynchronize - Block the host until all work queued on or associated with a PetscDeviceContext has finished
643 
644   Not Collective, Synchronous
645 
646   Input Parameters:
647 . dctx - The PetscDeviceContext to synchronize
648 
649   Level: beginner
650 
651 .seealso: PetscDeviceContextFork(), PetscDeviceContextJoin(), PetscDeviceContextQueryIdle()
652 @*/
653 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
654 {
655   PetscErrorCode ierr;
656 
657   PetscFunctionBegin;
658   PetscValidDeviceContext(dctx,1);
659   /* if it isn't setup there is nothing to sync on */
660   if (dctx->setup) {ierr = (*dctx->ops->synchronize)(dctx);CHKERRQ(ierr);}
661   dctx->idle = PETSC_TRUE;
662   PetscFunctionReturn(0);
663 }
664 
665 #define PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE PETSC_DEVICE_DEFAULT
666 // REMOVE ME (change)
667 #define PETSC_DEVICE_CONTEXT_DEFAULT_STREAM PETSC_STREAM_GLOBAL_BLOCKING
668 
669 static PetscDeviceType    rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE;
670 static PetscStreamType    rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM;
671 static PetscDeviceContext globalContext  = PETSC_NULLPTR;
672 
673 /* when PetsDevice initializes PetscDeviceContext eagerly the type of device created should
674  * match whatever device is eagerly intialized */
675 PetscErrorCode PetscDeviceContextSetRootDeviceType_Internal(PetscDeviceType type)
676 {
677   PetscFunctionBegin;
678   PetscValidDeviceType(type,1);
679   rootDeviceType = type;
680   PetscFunctionReturn(0);
681 }
682 
683 #if 0
684 /* currently unused */
685 PetscErrorCode PetscDeviceContextSetRootStreamType_Internal(PetscStreamType type)
686 {
687   PetscFunctionBegin;
688   PetscValidStreamType(type,1);
689   rootStreamType = type;
690   PetscFunctionReturn(0);
691 }
692 #endif
693 
694 static PetscErrorCode PetscDeviceContextSetupGlobalContext_Private(void)
695 {
696   PetscErrorCode    ierr;
697   static const auto PetscDeviceContextFinalizer = []() -> PetscErrorCode {
698     PetscErrorCode ierr;
699 
700     PetscFunctionBegin;
701     ierr = PetscDeviceContextDestroy(&globalContext);CHKERRQ(ierr);
702     rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE;
703     rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM;
704     PetscFunctionReturn(0);
705   };
706 
707   PetscFunctionBegin;
708   if (globalContext) PetscFunctionReturn(0);
709   /* this exists purely as a valid device check. */
710   ierr = PetscDeviceInitializePackage();CHKERRQ(ierr);
711   ierr = PetscRegisterFinalize(PetscDeviceContextFinalizer);CHKERRQ(ierr);
712   ierr = PetscInfo(PETSC_NULLPTR,"Initializing global PetscDeviceContext\n");CHKERRQ(ierr);
713   /* we call the allocator directly here since the ObjectPool creates a PetscContainer which
714    * eventually tries to call logging functions. However, this routine may be purposefully
715    * called __before__ logging is initialized, so the logging function would PETSCABORT */
716   ierr = PetscDeviceContextAllocator::create(&globalContext);CHKERRQ(ierr);
717   ierr = PetscDeviceContextSetStreamType(globalContext,rootStreamType);CHKERRQ(ierr);
718   ierr = PetscDeviceContextSetDefaultDeviceForType_Internal(globalContext,rootDeviceType);CHKERRQ(ierr);
719   ierr = PetscDeviceContextSetUp(globalContext);CHKERRQ(ierr);
720   PetscFunctionReturn(0);
721 }
722 
723 /*@C
724   PetscDeviceContextGetCurrentContext - Get the current active PetscDeviceContext
725 
726   Not Collective, Asynchronous
727 
728   Output Parameter:
729 . dctx - The PetscDeviceContext
730 
731   Notes:
732   The user generally should not destroy contexts retrieved with this routine unless they
733   themselves have created them. There exists no protection against destroying the root
734   context.
735 
736   Developer Notes:
737   Unless the user has set their own, this routine creates the "root" context the first time it
738   is called, registering its destructor to PetscFinalize().
739 
740   Level: beginner
741 
742 .seealso: PetscDeviceContextSetCurrentContext(), PetscDeviceContextFork(),
743 PetscDeviceContextJoin(), PetscDeviceContextCreate()
744 @*/
745 PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *dctx)
746 {
747   PetscErrorCode ierr;
748 
749   PetscFunctionBegin;
750   PetscValidPointer(dctx,1);
751   ierr = PetscDeviceContextSetupGlobalContext_Private();CHKERRQ(ierr);
752   /* while the static analyzer can find global variables, it will throw a warning about not
753    * being able to connect this back to the function arguments */
754   PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscValidDeviceContext(globalContext,-1));
755   *dctx = globalContext;
756   PetscFunctionReturn(0);
757 }
758 
759 /*@C
760   PetscDeviceContextSetCurrentContext - Set the current active PetscDeviceContext
761 
762   Not Collective, Asynchronous
763 
764   Input Parameter:
765 . dctx - The PetscDeviceContext
766 
767   Notes:
768   This routine can be used to set the defacto "root" PetscDeviceContext to a user-defined
769   implementation by calling this routine immediately after PetscInitialize() and ensuring that
770   PetscDevice is not greedily intialized. In this case the user is responsible for destroying
771   their PetscDeviceContext before PetscFinalize() returns.
772 
773   The old context is not stored in any way by this routine; if one is overriding a context that
774   they themselves do not control, one should take care to temporarily store it by calling
775   PetscDeviceContextGetCurrentContext() before calling this routine.
776 
777   Level: beginner
778 
779 .seealso: PetscDeviceContextGetCurrentContext(), PetscDeviceContextFork(),
780 PetscDeviceContextJoin(), PetscDeviceContextCreate()
781 @*/
782 PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext dctx)
783 {
784   PetscErrorCode ierr;
785 
786   PetscFunctionBegin;
787   PetscValidDeviceContext(dctx,1);
788   if (PetscUnlikelyDebug(!dctx->setup)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PetscDeviceContext %" PetscInt_FMT " must be set up before being set as global context",dctx->id);
789   globalContext = dctx;
790   ierr = PetscInfo1(PETSC_NULLPTR,"Set global PetscDeviceContext id %" PetscInt_FMT "\n",dctx->id);CHKERRQ(ierr);
791   PetscFunctionReturn(0);
792 }
793 
794 /*@C
795   PetscDeviceContextSetFromOptions - Configure a PetscDeviceContext from the options database
796 
797   Collective on comm, Asynchronous
798 
799   Input Parameters:
800 + comm   - MPI communicator on which to query the options database
801 . prefix - prefix to prepend to all options database queries, NULL if not needed
802 - dctx   - The PetscDeviceContext to configure
803 
804   Output Parameter:
805 . dctx - The PetscDeviceContext
806 
807   Options Database:
808 + -device_context_stream_type - type of stream to create inside the PetscDeviceContext -
809    PetscDeviceContextSetStreamType()
810 - -device_context_device_type - the type of PetscDevice to attach by default - PetscDeviceType
811 
812   Level: beginner
813 
814 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextSetDevice()
815 @*/
816 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, const char prefix[], PetscDeviceContext dctx)
817 {
818   PetscBool      flag;
819   PetscInt       stype,dtype;
820   PetscErrorCode ierr;
821 
822   PetscFunctionBegin;
823   if (prefix) PetscValidCharPointer(prefix,2);
824   PetscValidDeviceContext(dctx,3);
825   ierr = PetscOptionsBegin(comm,prefix,"PetscDeviceContext Options","Sys");CHKERRQ(ierr);
826   ierr = PetscOptionsEList("-device_context_stream_type","PetscDeviceContext PetscStreamType","PetscDeviceContextSetStreamType",PetscStreamTypes,PETSC_STREAM_MAX,PetscStreamTypes[dctx->streamType],&stype,&flag);CHKERRQ(ierr);
827   if (flag) {
828     ierr = PetscDeviceContextSetStreamType(dctx,static_cast<PetscStreamType>(stype));CHKERRQ(ierr);
829   }
830   ierr = PetscOptionsEList("-device_context_device_type","Underlying PetscDevice","PetscDeviceContextSetDevice",PetscDeviceTypes+1,PETSC_DEVICE_MAX-1,dctx->device ? PetscDeviceTypes[dctx->device->type] : PetscDeviceTypes[PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE],&dtype,&flag);CHKERRQ(ierr);
831   if (flag) {
832     ierr = PetscDeviceContextSetDefaultDeviceForType_Internal(dctx,static_cast<PetscDeviceType>(dtype+1));CHKERRQ(ierr);
833   }
834   ierr = PetscOptionsEnd();CHKERRQ(ierr);
835   PetscFunctionReturn(0);
836 }
837